mandoc 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +7 -0
  3. data/COPYING +674 -0
  4. data/README.md +117 -0
  5. data/ext/mandoc/extconf.rb +59 -0
  6. data/ext/mandoc/rb_mandoc.c +548 -0
  7. data/ext/mandoc/rb_mandoc.h +22 -0
  8. data/lib/mandoc/version.rb +19 -0
  9. data/lib/mandoc.rb +26 -0
  10. data/mandoc-1.14.6/LICENSE +55 -0
  11. data/mandoc-1.14.6/arch.c +54 -0
  12. data/mandoc-1.14.6/att.c +49 -0
  13. data/mandoc-1.14.6/catman.c +260 -0
  14. data/mandoc-1.14.6/cgi.c +1279 -0
  15. data/mandoc-1.14.6/chars.c +507 -0
  16. data/mandoc-1.14.6/compat_err.c +103 -0
  17. data/mandoc-1.14.6/compat_fts.c +696 -0
  18. data/mandoc-1.14.6/compat_fts.h +106 -0
  19. data/mandoc-1.14.6/compat_getline.c +59 -0
  20. data/mandoc-1.14.6/compat_getsubopt.c +87 -0
  21. data/mandoc-1.14.6/compat_isblank.c +23 -0
  22. data/mandoc-1.14.6/compat_mkdtemp.c +50 -0
  23. data/mandoc-1.14.6/compat_mkstemps.c +63 -0
  24. data/mandoc-1.14.6/compat_ohash.c +330 -0
  25. data/mandoc-1.14.6/compat_ohash.h +72 -0
  26. data/mandoc-1.14.6/compat_progname.c +31 -0
  27. data/mandoc-1.14.6/compat_reallocarray.c +40 -0
  28. data/mandoc-1.14.6/compat_recallocarray.c +99 -0
  29. data/mandoc-1.14.6/compat_strcasestr.c +64 -0
  30. data/mandoc-1.14.6/compat_stringlist.c +135 -0
  31. data/mandoc-1.14.6/compat_stringlist.h +48 -0
  32. data/mandoc-1.14.6/compat_strlcat.c +57 -0
  33. data/mandoc-1.14.6/compat_strlcpy.c +52 -0
  34. data/mandoc-1.14.6/compat_strndup.c +42 -0
  35. data/mandoc-1.14.6/compat_strsep.c +70 -0
  36. data/mandoc-1.14.6/compat_strtonum.c +67 -0
  37. data/mandoc-1.14.6/compat_vasprintf.c +47 -0
  38. data/mandoc-1.14.6/config.h +52 -0
  39. data/mandoc-1.14.6/dba.c +508 -0
  40. data/mandoc-1.14.6/dba.h +50 -0
  41. data/mandoc-1.14.6/dba_array.c +190 -0
  42. data/mandoc-1.14.6/dba_array.h +47 -0
  43. data/mandoc-1.14.6/dba_read.c +74 -0
  44. data/mandoc-1.14.6/dba_write.c +127 -0
  45. data/mandoc-1.14.6/dba_write.h +30 -0
  46. data/mandoc-1.14.6/dbm.c +480 -0
  47. data/mandoc-1.14.6/dbm.h +68 -0
  48. data/mandoc-1.14.6/dbm_map.c +194 -0
  49. data/mandoc-1.14.6/dbm_map.h +29 -0
  50. data/mandoc-1.14.6/demandoc.c +260 -0
  51. data/mandoc-1.14.6/eqn.c +1132 -0
  52. data/mandoc-1.14.6/eqn.h +72 -0
  53. data/mandoc-1.14.6/eqn_html.c +246 -0
  54. data/mandoc-1.14.6/eqn_parse.h +48 -0
  55. data/mandoc-1.14.6/eqn_term.c +174 -0
  56. data/mandoc-1.14.6/html.c +1102 -0
  57. data/mandoc-1.14.6/html.h +142 -0
  58. data/mandoc-1.14.6/lib.c +35 -0
  59. data/mandoc-1.14.6/libman.h +42 -0
  60. data/mandoc-1.14.6/libmandoc.h +85 -0
  61. data/mandoc-1.14.6/libmdoc.h +87 -0
  62. data/mandoc-1.14.6/main.c +1375 -0
  63. data/mandoc-1.14.6/main.h +53 -0
  64. data/mandoc-1.14.6/man.c +345 -0
  65. data/mandoc-1.14.6/man.h +21 -0
  66. data/mandoc-1.14.6/man_html.c +640 -0
  67. data/mandoc-1.14.6/man_macro.c +470 -0
  68. data/mandoc-1.14.6/man_term.c +1143 -0
  69. data/mandoc-1.14.6/man_validate.c +660 -0
  70. data/mandoc-1.14.6/manconf.h +58 -0
  71. data/mandoc-1.14.6/mandoc.c +669 -0
  72. data/mandoc-1.14.6/mandoc.h +329 -0
  73. data/mandoc-1.14.6/mandoc_aux.c +118 -0
  74. data/mandoc-1.14.6/mandoc_aux.h +27 -0
  75. data/mandoc-1.14.6/mandoc_msg.c +375 -0
  76. data/mandoc-1.14.6/mandoc_ohash.c +65 -0
  77. data/mandoc-1.14.6/mandoc_ohash.h +23 -0
  78. data/mandoc-1.14.6/mandoc_parse.h +44 -0
  79. data/mandoc-1.14.6/mandoc_xr.c +123 -0
  80. data/mandoc-1.14.6/mandoc_xr.h +31 -0
  81. data/mandoc-1.14.6/mandocd.c +282 -0
  82. data/mandoc-1.14.6/mandocdb.c +2448 -0
  83. data/mandoc-1.14.6/manpath.c +363 -0
  84. data/mandoc-1.14.6/mansearch.c +851 -0
  85. data/mandoc-1.14.6/mansearch.h +118 -0
  86. data/mandoc-1.14.6/mdoc.c +433 -0
  87. data/mandoc-1.14.6/mdoc.h +158 -0
  88. data/mandoc-1.14.6/mdoc_argv.c +682 -0
  89. data/mandoc-1.14.6/mdoc_html.c +1762 -0
  90. data/mandoc-1.14.6/mdoc_macro.c +1600 -0
  91. data/mandoc-1.14.6/mdoc_man.c +1850 -0
  92. data/mandoc-1.14.6/mdoc_markdown.c +1610 -0
  93. data/mandoc-1.14.6/mdoc_state.c +256 -0
  94. data/mandoc-1.14.6/mdoc_term.c +1964 -0
  95. data/mandoc-1.14.6/mdoc_validate.c +3062 -0
  96. data/mandoc-1.14.6/msec.c +37 -0
  97. data/mandoc-1.14.6/out.c +544 -0
  98. data/mandoc-1.14.6/out.h +70 -0
  99. data/mandoc-1.14.6/preconv.c +179 -0
  100. data/mandoc-1.14.6/read.c +732 -0
  101. data/mandoc-1.14.6/roff.c +4390 -0
  102. data/mandoc-1.14.6/roff.h +561 -0
  103. data/mandoc-1.14.6/roff_html.c +119 -0
  104. data/mandoc-1.14.6/roff_int.h +94 -0
  105. data/mandoc-1.14.6/roff_term.c +266 -0
  106. data/mandoc-1.14.6/roff_validate.c +151 -0
  107. data/mandoc-1.14.6/soelim.c +182 -0
  108. data/mandoc-1.14.6/st.c +82 -0
  109. data/mandoc-1.14.6/tag.c +327 -0
  110. data/mandoc-1.14.6/tag.h +35 -0
  111. data/mandoc-1.14.6/tbl.c +183 -0
  112. data/mandoc-1.14.6/tbl.h +121 -0
  113. data/mandoc-1.14.6/tbl_data.c +323 -0
  114. data/mandoc-1.14.6/tbl_html.c +293 -0
  115. data/mandoc-1.14.6/tbl_int.h +47 -0
  116. data/mandoc-1.14.6/tbl_layout.c +376 -0
  117. data/mandoc-1.14.6/tbl_opts.c +173 -0
  118. data/mandoc-1.14.6/tbl_parse.h +30 -0
  119. data/mandoc-1.14.6/tbl_term.c +948 -0
  120. data/mandoc-1.14.6/term.c +1113 -0
  121. data/mandoc-1.14.6/term.h +158 -0
  122. data/mandoc-1.14.6/term_ascii.c +424 -0
  123. data/mandoc-1.14.6/term_ps.c +1362 -0
  124. data/mandoc-1.14.6/term_tab.c +130 -0
  125. data/mandoc-1.14.6/term_tag.c +227 -0
  126. data/mandoc-1.14.6/term_tag.h +34 -0
  127. data/mandoc-1.14.6/tree.c +536 -0
  128. metadata +170 -0
@@ -0,0 +1,2448 @@
1
+ /* $Id: mandocdb.c,v 1.269 2021/08/19 16:55:31 schwarze Exp $ */
2
+ /*
3
+ * Copyright (c) 2011-2020 Ingo Schwarze <schwarze@openbsd.org>
4
+ * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
5
+ * Copyright (c) 2016 Ed Maste <emaste@freebsd.org>
6
+ *
7
+ * Permission to use, copy, modify, and distribute this software for any
8
+ * purpose with or without fee is hereby granted, provided that the above
9
+ * copyright notice and this permission notice appear in all copies.
10
+ *
11
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18
+ *
19
+ * Implementation of the makewhatis(8) program.
20
+ */
21
+ #include "config.h"
22
+
23
+ #include <sys/types.h>
24
+ #include <sys/mman.h>
25
+ #include <sys/stat.h>
26
+
27
+ #include <assert.h>
28
+ #include <ctype.h>
29
+ #if HAVE_ERR
30
+ #include <err.h>
31
+ #endif
32
+ #include <errno.h>
33
+ #include <fcntl.h>
34
+ #if HAVE_FTS
35
+ #include <fts.h>
36
+ #else
37
+ #include "compat_fts.h"
38
+ #endif
39
+ #include <limits.h>
40
+ #if HAVE_SANDBOX_INIT
41
+ #include <sandbox.h>
42
+ #endif
43
+ #include <stdarg.h>
44
+ #include <stddef.h>
45
+ #include <stdio.h>
46
+ #include <stdint.h>
47
+ #include <stdlib.h>
48
+ #include <string.h>
49
+ #include <unistd.h>
50
+
51
+ #include "mandoc_aux.h"
52
+ #include "mandoc_ohash.h"
53
+ #include "mandoc.h"
54
+ #include "roff.h"
55
+ #include "mdoc.h"
56
+ #include "man.h"
57
+ #include "mandoc_parse.h"
58
+ #include "manconf.h"
59
+ #include "mansearch.h"
60
+ #include "dba_array.h"
61
+ #include "dba.h"
62
+
63
+ extern const char *const mansearch_keynames[];
64
+
65
+ enum op {
66
+ OP_DEFAULT = 0, /* new dbs from dir list or default config */
67
+ OP_CONFFILE, /* new databases from custom config file */
68
+ OP_UPDATE, /* delete/add entries in existing database */
69
+ OP_DELETE, /* delete entries from existing database */
70
+ OP_TEST /* change no databases, report potential problems */
71
+ };
72
+
73
+ struct str {
74
+ const struct mpage *mpage; /* if set, the owning parse */
75
+ uint64_t mask; /* bitmask in sequence */
76
+ char key[]; /* rendered text */
77
+ };
78
+
79
+ struct inodev {
80
+ ino_t st_ino;
81
+ dev_t st_dev;
82
+ };
83
+
84
+ struct mpage {
85
+ struct inodev inodev; /* used for hashing routine */
86
+ struct dba_array *dba;
87
+ char *sec; /* section from file content */
88
+ char *arch; /* architecture from file content */
89
+ char *title; /* title from file content */
90
+ char *desc; /* description from file content */
91
+ struct mpage *next; /* singly linked list */
92
+ struct mlink *mlinks; /* singly linked list */
93
+ int name_head_done;
94
+ enum form form; /* format from file content */
95
+ };
96
+
97
+ struct mlink {
98
+ char file[PATH_MAX]; /* filename rel. to manpath */
99
+ char *dsec; /* section from directory */
100
+ char *arch; /* architecture from directory */
101
+ char *name; /* name from file name (not empty) */
102
+ char *fsec; /* section from file name suffix */
103
+ struct mlink *next; /* singly linked list */
104
+ struct mpage *mpage; /* parent */
105
+ int gzip; /* filename has a .gz suffix */
106
+ enum form dform; /* format from directory */
107
+ enum form fform; /* format from file name suffix */
108
+ };
109
+
110
+ typedef int (*mdoc_fp)(struct mpage *, const struct roff_meta *,
111
+ const struct roff_node *);
112
+
113
+ struct mdoc_handler {
114
+ mdoc_fp fp; /* optional handler */
115
+ uint64_t mask; /* set unless handler returns 0 */
116
+ int taboo; /* node flags that must not be set */
117
+ };
118
+
119
+
120
+ int mandocdb(int, char *[]);
121
+
122
+ static void dbadd(struct dba *, struct mpage *);
123
+ static void dbadd_mlink(const struct mlink *);
124
+ static void dbprune(struct dba *);
125
+ static void dbwrite(struct dba *);
126
+ static void filescan(const char *);
127
+ #if HAVE_FTS_COMPARE_CONST
128
+ static int fts_compare(const FTSENT *const *, const FTSENT *const *);
129
+ #else
130
+ static int fts_compare(const FTSENT **, const FTSENT **);
131
+ #endif
132
+ static void mlink_add(struct mlink *, const struct stat *);
133
+ static void mlink_check(struct mpage *, struct mlink *);
134
+ static void mlink_free(struct mlink *);
135
+ static void mlinks_undupe(struct mpage *);
136
+ static void mpages_free(void);
137
+ static void mpages_merge(struct dba *, struct mparse *);
138
+ static void parse_cat(struct mpage *, int);
139
+ static void parse_man(struct mpage *, const struct roff_meta *,
140
+ const struct roff_node *);
141
+ static void parse_mdoc(struct mpage *, const struct roff_meta *,
142
+ const struct roff_node *);
143
+ static int parse_mdoc_head(struct mpage *, const struct roff_meta *,
144
+ const struct roff_node *);
145
+ static int parse_mdoc_Fa(struct mpage *, const struct roff_meta *,
146
+ const struct roff_node *);
147
+ static int parse_mdoc_Fd(struct mpage *, const struct roff_meta *,
148
+ const struct roff_node *);
149
+ static void parse_mdoc_fname(struct mpage *, const struct roff_node *);
150
+ static int parse_mdoc_Fn(struct mpage *, const struct roff_meta *,
151
+ const struct roff_node *);
152
+ static int parse_mdoc_Fo(struct mpage *, const struct roff_meta *,
153
+ const struct roff_node *);
154
+ static int parse_mdoc_Nd(struct mpage *, const struct roff_meta *,
155
+ const struct roff_node *);
156
+ static int parse_mdoc_Nm(struct mpage *, const struct roff_meta *,
157
+ const struct roff_node *);
158
+ static int parse_mdoc_Sh(struct mpage *, const struct roff_meta *,
159
+ const struct roff_node *);
160
+ static int parse_mdoc_Va(struct mpage *, const struct roff_meta *,
161
+ const struct roff_node *);
162
+ static int parse_mdoc_Xr(struct mpage *, const struct roff_meta *,
163
+ const struct roff_node *);
164
+ static void putkey(const struct mpage *, char *, uint64_t);
165
+ static void putkeys(const struct mpage *, char *, size_t, uint64_t);
166
+ static void putmdockey(const struct mpage *,
167
+ const struct roff_node *, uint64_t, int);
168
+ #ifdef READ_ALLOWED_PATH
169
+ static int read_allowed(const char *);
170
+ #endif
171
+ static int render_string(char **, size_t *);
172
+ static void say(const char *, const char *, ...)
173
+ __attribute__((__format__ (__printf__, 2, 3)));
174
+ static int set_basedir(const char *, int);
175
+ static int treescan(void);
176
+ static size_t utf8(unsigned int, char [7]);
177
+
178
+ static int nodb; /* no database changes */
179
+ static int mparse_options; /* abort the parse early */
180
+ static int use_all; /* use all found files */
181
+ static int debug; /* print what we're doing */
182
+ static int warnings; /* warn about crap */
183
+ static int write_utf8; /* write UTF-8 output; else ASCII */
184
+ static int exitcode; /* to be returned by main */
185
+ static enum op op; /* operational mode */
186
+ static char basedir[PATH_MAX]; /* current base directory */
187
+ static size_t basedir_len; /* strlen(basedir) */
188
+ static struct mpage *mpage_head; /* list of distinct manual pages */
189
+ static struct ohash mpages; /* table of distinct manual pages */
190
+ static struct ohash mlinks; /* table of directory entries */
191
+ static struct ohash names; /* table of all names */
192
+ static struct ohash strings; /* table of all strings */
193
+ static uint64_t name_mask;
194
+
195
+ static const struct mdoc_handler mdoc_handlers[MDOC_MAX - MDOC_Dd] = {
196
+ { NULL, 0, NODE_NOPRT }, /* Dd */
197
+ { NULL, 0, NODE_NOPRT }, /* Dt */
198
+ { NULL, 0, NODE_NOPRT }, /* Os */
199
+ { parse_mdoc_Sh, TYPE_Sh, 0 }, /* Sh */
200
+ { parse_mdoc_head, TYPE_Ss, 0 }, /* Ss */
201
+ { NULL, 0, 0 }, /* Pp */
202
+ { NULL, 0, 0 }, /* D1 */
203
+ { NULL, 0, 0 }, /* Dl */
204
+ { NULL, 0, 0 }, /* Bd */
205
+ { NULL, 0, 0 }, /* Ed */
206
+ { NULL, 0, 0 }, /* Bl */
207
+ { NULL, 0, 0 }, /* El */
208
+ { NULL, 0, 0 }, /* It */
209
+ { NULL, 0, 0 }, /* Ad */
210
+ { NULL, TYPE_An, 0 }, /* An */
211
+ { NULL, 0, 0 }, /* Ap */
212
+ { NULL, TYPE_Ar, 0 }, /* Ar */
213
+ { NULL, TYPE_Cd, 0 }, /* Cd */
214
+ { NULL, TYPE_Cm, 0 }, /* Cm */
215
+ { NULL, TYPE_Dv, 0 }, /* Dv */
216
+ { NULL, TYPE_Er, 0 }, /* Er */
217
+ { NULL, TYPE_Ev, 0 }, /* Ev */
218
+ { NULL, 0, 0 }, /* Ex */
219
+ { parse_mdoc_Fa, 0, 0 }, /* Fa */
220
+ { parse_mdoc_Fd, 0, 0 }, /* Fd */
221
+ { NULL, TYPE_Fl, 0 }, /* Fl */
222
+ { parse_mdoc_Fn, 0, 0 }, /* Fn */
223
+ { NULL, TYPE_Ft | TYPE_Vt, 0 }, /* Ft */
224
+ { NULL, TYPE_Ic, 0 }, /* Ic */
225
+ { NULL, TYPE_In, 0 }, /* In */
226
+ { NULL, TYPE_Li, 0 }, /* Li */
227
+ { parse_mdoc_Nd, 0, 0 }, /* Nd */
228
+ { parse_mdoc_Nm, 0, 0 }, /* Nm */
229
+ { NULL, 0, 0 }, /* Op */
230
+ { NULL, 0, 0 }, /* Ot */
231
+ { NULL, TYPE_Pa, NODE_NOSRC }, /* Pa */
232
+ { NULL, 0, 0 }, /* Rv */
233
+ { NULL, TYPE_St, 0 }, /* St */
234
+ { parse_mdoc_Va, TYPE_Va, 0 }, /* Va */
235
+ { parse_mdoc_Va, TYPE_Vt, 0 }, /* Vt */
236
+ { parse_mdoc_Xr, 0, 0 }, /* Xr */
237
+ { NULL, 0, 0 }, /* %A */
238
+ { NULL, 0, 0 }, /* %B */
239
+ { NULL, 0, 0 }, /* %D */
240
+ { NULL, 0, 0 }, /* %I */
241
+ { NULL, 0, 0 }, /* %J */
242
+ { NULL, 0, 0 }, /* %N */
243
+ { NULL, 0, 0 }, /* %O */
244
+ { NULL, 0, 0 }, /* %P */
245
+ { NULL, 0, 0 }, /* %R */
246
+ { NULL, 0, 0 }, /* %T */
247
+ { NULL, 0, 0 }, /* %V */
248
+ { NULL, 0, 0 }, /* Ac */
249
+ { NULL, 0, 0 }, /* Ao */
250
+ { NULL, 0, 0 }, /* Aq */
251
+ { NULL, TYPE_At, 0 }, /* At */
252
+ { NULL, 0, 0 }, /* Bc */
253
+ { NULL, 0, 0 }, /* Bf */
254
+ { NULL, 0, 0 }, /* Bo */
255
+ { NULL, 0, 0 }, /* Bq */
256
+ { NULL, TYPE_Bsx, NODE_NOSRC }, /* Bsx */
257
+ { NULL, TYPE_Bx, NODE_NOSRC }, /* Bx */
258
+ { NULL, 0, 0 }, /* Db */
259
+ { NULL, 0, 0 }, /* Dc */
260
+ { NULL, 0, 0 }, /* Do */
261
+ { NULL, 0, 0 }, /* Dq */
262
+ { NULL, 0, 0 }, /* Ec */
263
+ { NULL, 0, 0 }, /* Ef */
264
+ { NULL, TYPE_Em, 0 }, /* Em */
265
+ { NULL, 0, 0 }, /* Eo */
266
+ { NULL, TYPE_Fx, NODE_NOSRC }, /* Fx */
267
+ { NULL, TYPE_Ms, 0 }, /* Ms */
268
+ { NULL, 0, 0 }, /* No */
269
+ { NULL, 0, 0 }, /* Ns */
270
+ { NULL, TYPE_Nx, NODE_NOSRC }, /* Nx */
271
+ { NULL, TYPE_Ox, NODE_NOSRC }, /* Ox */
272
+ { NULL, 0, 0 }, /* Pc */
273
+ { NULL, 0, 0 }, /* Pf */
274
+ { NULL, 0, 0 }, /* Po */
275
+ { NULL, 0, 0 }, /* Pq */
276
+ { NULL, 0, 0 }, /* Qc */
277
+ { NULL, 0, 0 }, /* Ql */
278
+ { NULL, 0, 0 }, /* Qo */
279
+ { NULL, 0, 0 }, /* Qq */
280
+ { NULL, 0, 0 }, /* Re */
281
+ { NULL, 0, 0 }, /* Rs */
282
+ { NULL, 0, 0 }, /* Sc */
283
+ { NULL, 0, 0 }, /* So */
284
+ { NULL, 0, 0 }, /* Sq */
285
+ { NULL, 0, 0 }, /* Sm */
286
+ { NULL, 0, 0 }, /* Sx */
287
+ { NULL, TYPE_Sy, 0 }, /* Sy */
288
+ { NULL, TYPE_Tn, 0 }, /* Tn */
289
+ { NULL, 0, NODE_NOSRC }, /* Ux */
290
+ { NULL, 0, 0 }, /* Xc */
291
+ { NULL, 0, 0 }, /* Xo */
292
+ { parse_mdoc_Fo, 0, 0 }, /* Fo */
293
+ { NULL, 0, 0 }, /* Fc */
294
+ { NULL, 0, 0 }, /* Oo */
295
+ { NULL, 0, 0 }, /* Oc */
296
+ { NULL, 0, 0 }, /* Bk */
297
+ { NULL, 0, 0 }, /* Ek */
298
+ { NULL, 0, 0 }, /* Bt */
299
+ { NULL, 0, 0 }, /* Hf */
300
+ { NULL, 0, 0 }, /* Fr */
301
+ { NULL, 0, 0 }, /* Ud */
302
+ { NULL, TYPE_Lb, NODE_NOSRC }, /* Lb */
303
+ { NULL, 0, 0 }, /* Lp */
304
+ { NULL, TYPE_Lk, 0 }, /* Lk */
305
+ { NULL, TYPE_Mt, NODE_NOSRC }, /* Mt */
306
+ { NULL, 0, 0 }, /* Brq */
307
+ { NULL, 0, 0 }, /* Bro */
308
+ { NULL, 0, 0 }, /* Brc */
309
+ { NULL, 0, 0 }, /* %C */
310
+ { NULL, 0, 0 }, /* Es */
311
+ { NULL, 0, 0 }, /* En */
312
+ { NULL, TYPE_Dx, NODE_NOSRC }, /* Dx */
313
+ { NULL, 0, 0 }, /* %Q */
314
+ { NULL, 0, 0 }, /* %U */
315
+ { NULL, 0, 0 }, /* Ta */
316
+ };
317
+
318
+
319
+ int
320
+ mandocdb(int argc, char *argv[])
321
+ {
322
+ struct manconf conf;
323
+ struct mparse *mp;
324
+ struct dba *dba;
325
+ const char *path_arg, *progname;
326
+ size_t j, sz;
327
+ int ch, i;
328
+
329
+ #if HAVE_PLEDGE
330
+ if (pledge("stdio rpath wpath cpath", NULL) == -1) {
331
+ warn("pledge");
332
+ return (int)MANDOCLEVEL_SYSERR;
333
+ }
334
+ #endif
335
+
336
+ #if HAVE_SANDBOX_INIT
337
+ if (sandbox_init(kSBXProfileNoInternet, SANDBOX_NAMED, NULL) == -1) {
338
+ warnx("sandbox_init");
339
+ return (int)MANDOCLEVEL_SYSERR;
340
+ }
341
+ #endif
342
+
343
+ memset(&conf, 0, sizeof(conf));
344
+
345
+ /*
346
+ * We accept a few different invocations.
347
+ * The CHECKOP macro makes sure that invocation styles don't
348
+ * clobber each other.
349
+ */
350
+ #define CHECKOP(_op, _ch) do \
351
+ if ((_op) != OP_DEFAULT) { \
352
+ warnx("-%c: Conflicting option", (_ch)); \
353
+ goto usage; \
354
+ } while (/*CONSTCOND*/0)
355
+
356
+ mparse_options = MPARSE_VALIDATE;
357
+ path_arg = NULL;
358
+ op = OP_DEFAULT;
359
+
360
+ while ((ch = getopt(argc, argv, "aC:Dd:npQT:tu:v")) != -1)
361
+ switch (ch) {
362
+ case 'a':
363
+ use_all = 1;
364
+ break;
365
+ case 'C':
366
+ CHECKOP(op, ch);
367
+ path_arg = optarg;
368
+ op = OP_CONFFILE;
369
+ break;
370
+ case 'D':
371
+ debug++;
372
+ break;
373
+ case 'd':
374
+ CHECKOP(op, ch);
375
+ path_arg = optarg;
376
+ op = OP_UPDATE;
377
+ break;
378
+ case 'n':
379
+ nodb = 1;
380
+ break;
381
+ case 'p':
382
+ warnings = 1;
383
+ break;
384
+ case 'Q':
385
+ mparse_options |= MPARSE_QUICK;
386
+ break;
387
+ case 'T':
388
+ if (strcmp(optarg, "utf8") != 0) {
389
+ warnx("-T%s: Unsupported output format",
390
+ optarg);
391
+ goto usage;
392
+ }
393
+ write_utf8 = 1;
394
+ break;
395
+ case 't':
396
+ CHECKOP(op, ch);
397
+ dup2(STDOUT_FILENO, STDERR_FILENO);
398
+ op = OP_TEST;
399
+ nodb = warnings = 1;
400
+ break;
401
+ case 'u':
402
+ CHECKOP(op, ch);
403
+ path_arg = optarg;
404
+ op = OP_DELETE;
405
+ break;
406
+ case 'v':
407
+ /* Compatibility with espie@'s makewhatis. */
408
+ break;
409
+ default:
410
+ goto usage;
411
+ }
412
+
413
+ argc -= optind;
414
+ argv += optind;
415
+
416
+ #if HAVE_PLEDGE
417
+ if (nodb) {
418
+ if (pledge("stdio rpath", NULL) == -1) {
419
+ warn("pledge");
420
+ return (int)MANDOCLEVEL_SYSERR;
421
+ }
422
+ }
423
+ #endif
424
+
425
+ if (op == OP_CONFFILE && argc > 0) {
426
+ warnx("-C: Too many arguments");
427
+ goto usage;
428
+ }
429
+
430
+ exitcode = (int)MANDOCLEVEL_OK;
431
+ mchars_alloc();
432
+ mp = mparse_alloc(mparse_options, MANDOC_OS_OTHER, NULL);
433
+ mandoc_ohash_init(&mpages, 6, offsetof(struct mpage, inodev));
434
+ mandoc_ohash_init(&mlinks, 6, offsetof(struct mlink, file));
435
+
436
+ if (op == OP_UPDATE || op == OP_DELETE || op == OP_TEST) {
437
+
438
+ /*
439
+ * Most of these deal with a specific directory.
440
+ * Jump into that directory first.
441
+ */
442
+ if (op != OP_TEST && set_basedir(path_arg, 1) == 0)
443
+ goto out;
444
+
445
+ dba = nodb ? dba_new(128) : dba_read(MANDOC_DB);
446
+ if (dba != NULL) {
447
+ /*
448
+ * The existing database is usable. Process
449
+ * all files specified on the command-line.
450
+ */
451
+ use_all = 1;
452
+ for (i = 0; i < argc; i++)
453
+ filescan(argv[i]);
454
+ if (nodb == 0)
455
+ dbprune(dba);
456
+ } else {
457
+ /* Database missing or corrupt. */
458
+ if (op != OP_UPDATE || errno != ENOENT)
459
+ say(MANDOC_DB, "%s: Automatically recreating"
460
+ " from scratch", strerror(errno));
461
+ exitcode = (int)MANDOCLEVEL_OK;
462
+ op = OP_DEFAULT;
463
+ if (treescan() == 0)
464
+ goto out;
465
+ dba = dba_new(128);
466
+ }
467
+ if (op != OP_DELETE)
468
+ mpages_merge(dba, mp);
469
+ if (nodb == 0)
470
+ dbwrite(dba);
471
+ dba_free(dba);
472
+ } else {
473
+ /*
474
+ * If we have arguments, use them as our manpaths.
475
+ * If we don't, use man.conf(5).
476
+ */
477
+ if (argc > 0) {
478
+ conf.manpath.paths = mandoc_reallocarray(NULL,
479
+ argc, sizeof(char *));
480
+ conf.manpath.sz = (size_t)argc;
481
+ for (i = 0; i < argc; i++)
482
+ conf.manpath.paths[i] = mandoc_strdup(argv[i]);
483
+ } else
484
+ manconf_parse(&conf, path_arg, NULL, NULL);
485
+
486
+ if (conf.manpath.sz == 0) {
487
+ exitcode = (int)MANDOCLEVEL_BADARG;
488
+ say("", "Empty manpath");
489
+ }
490
+
491
+ /*
492
+ * First scan the tree rooted at a base directory, then
493
+ * build a new database and finally move it into place.
494
+ * Ignore zero-length directories and strip trailing
495
+ * slashes.
496
+ */
497
+ for (j = 0; j < conf.manpath.sz; j++) {
498
+ sz = strlen(conf.manpath.paths[j]);
499
+ if (sz && conf.manpath.paths[j][sz - 1] == '/')
500
+ conf.manpath.paths[j][--sz] = '\0';
501
+ if (sz == 0)
502
+ continue;
503
+
504
+ if (j) {
505
+ mandoc_ohash_init(&mpages, 6,
506
+ offsetof(struct mpage, inodev));
507
+ mandoc_ohash_init(&mlinks, 6,
508
+ offsetof(struct mlink, file));
509
+ }
510
+
511
+ if (set_basedir(conf.manpath.paths[j], argc > 0) == 0)
512
+ continue;
513
+ if (treescan() == 0)
514
+ continue;
515
+ dba = dba_new(128);
516
+ mpages_merge(dba, mp);
517
+ if (nodb == 0)
518
+ dbwrite(dba);
519
+ dba_free(dba);
520
+
521
+ if (j + 1 < conf.manpath.sz) {
522
+ mpages_free();
523
+ ohash_delete(&mpages);
524
+ ohash_delete(&mlinks);
525
+ }
526
+ }
527
+ }
528
+ out:
529
+ manconf_free(&conf);
530
+ mparse_free(mp);
531
+ mchars_free();
532
+ mpages_free();
533
+ ohash_delete(&mpages);
534
+ ohash_delete(&mlinks);
535
+ return exitcode;
536
+ usage:
537
+ progname = getprogname();
538
+ fprintf(stderr, "usage: %s [-aDnpQ] [-C file] [-Tutf8]\n"
539
+ " %s [-aDnpQ] [-Tutf8] dir ...\n"
540
+ " %s [-DnpQ] [-Tutf8] -d dir [file ...]\n"
541
+ " %s [-Dnp] -u dir [file ...]\n"
542
+ " %s [-Q] -t file ...\n",
543
+ progname, progname, progname, progname, progname);
544
+
545
+ return (int)MANDOCLEVEL_BADARG;
546
+ }
547
+
548
+ /*
549
+ * To get a singly linked list in alpha order while inserting entries
550
+ * at the beginning, process directory entries in reverse alpha order.
551
+ */
552
+ static int
553
+ #if HAVE_FTS_COMPARE_CONST
554
+ fts_compare(const FTSENT *const *a, const FTSENT *const *b)
555
+ #else
556
+ fts_compare(const FTSENT **a, const FTSENT **b)
557
+ #endif
558
+ {
559
+ return -strcmp((*a)->fts_name, (*b)->fts_name);
560
+ }
561
+
562
+ /*
563
+ * Scan a directory tree rooted at "basedir" for manpages.
564
+ * We use fts(), scanning directory parts along the way for clues to our
565
+ * section and architecture.
566
+ *
567
+ * If use_all has been specified, grok all files.
568
+ * If not, sanitise paths to the following:
569
+ *
570
+ * [./]man*[/<arch>]/<name>.<section>
571
+ * or
572
+ * [./]cat<section>[/<arch>]/<name>.0
573
+ *
574
+ * TODO: accommodate for multi-language directories.
575
+ */
576
+ static int
577
+ treescan(void)
578
+ {
579
+ char buf[PATH_MAX];
580
+ FTS *f;
581
+ FTSENT *ff;
582
+ struct mlink *mlink;
583
+ int gzip;
584
+ enum form dform;
585
+ char *dsec, *arch, *fsec, *cp;
586
+ const char *path;
587
+ const char *argv[2];
588
+
589
+ argv[0] = ".";
590
+ argv[1] = NULL;
591
+
592
+ f = fts_open((char * const *)argv, FTS_PHYSICAL | FTS_NOCHDIR,
593
+ fts_compare);
594
+ if (f == NULL) {
595
+ exitcode = (int)MANDOCLEVEL_SYSERR;
596
+ say("", "&fts_open");
597
+ return 0;
598
+ }
599
+
600
+ dsec = arch = NULL;
601
+ dform = FORM_NONE;
602
+
603
+ while ((ff = fts_read(f)) != NULL) {
604
+ path = ff->fts_path + 2;
605
+ switch (ff->fts_info) {
606
+
607
+ /*
608
+ * Symbolic links require various sanity checks,
609
+ * then get handled just like regular files.
610
+ */
611
+ case FTS_SL:
612
+ if (realpath(path, buf) == NULL) {
613
+ if (warnings)
614
+ say(path, "&realpath");
615
+ continue;
616
+ }
617
+ if (strncmp(buf, basedir, basedir_len) != 0
618
+ #ifdef READ_ALLOWED_PATH
619
+ && !read_allowed(buf)
620
+ #endif
621
+ ) {
622
+ if (warnings) say("",
623
+ "%s: outside base directory", buf);
624
+ continue;
625
+ }
626
+ /* Use logical inode to avoid mpages dupe. */
627
+ if (stat(path, ff->fts_statp) == -1) {
628
+ if (warnings)
629
+ say(path, "&stat");
630
+ continue;
631
+ }
632
+ if ((ff->fts_statp->st_mode & S_IFMT) != S_IFREG)
633
+ continue;
634
+ /* FALLTHROUGH */
635
+
636
+ /*
637
+ * If we're a regular file, add an mlink by using the
638
+ * stored directory data and handling the filename.
639
+ */
640
+ case FTS_F:
641
+ if ( ! strcmp(path, MANDOC_DB))
642
+ continue;
643
+ if ( ! use_all && ff->fts_level < 2) {
644
+ if (warnings)
645
+ say(path, "Extraneous file");
646
+ continue;
647
+ }
648
+ gzip = 0;
649
+ fsec = NULL;
650
+ while (fsec == NULL) {
651
+ fsec = strrchr(ff->fts_name, '.');
652
+ if (fsec == NULL || strcmp(fsec+1, "gz"))
653
+ break;
654
+ gzip = 1;
655
+ *fsec = '\0';
656
+ fsec = NULL;
657
+ }
658
+ if (fsec == NULL) {
659
+ if ( ! use_all) {
660
+ if (warnings)
661
+ say(path,
662
+ "No filename suffix");
663
+ continue;
664
+ }
665
+ } else if ( ! strcmp(++fsec, "html")) {
666
+ if (warnings)
667
+ say(path, "Skip html");
668
+ continue;
669
+ } else if ( ! strcmp(fsec, "ps")) {
670
+ if (warnings)
671
+ say(path, "Skip ps");
672
+ continue;
673
+ } else if ( ! strcmp(fsec, "pdf")) {
674
+ if (warnings)
675
+ say(path, "Skip pdf");
676
+ continue;
677
+ } else if ( ! use_all &&
678
+ ((dform == FORM_SRC &&
679
+ strncmp(fsec, dsec, strlen(dsec))) ||
680
+ (dform == FORM_CAT && strcmp(fsec, "0")))) {
681
+ if (warnings)
682
+ say(path, "Wrong filename suffix");
683
+ continue;
684
+ } else
685
+ fsec[-1] = '\0';
686
+
687
+ mlink = mandoc_calloc(1, sizeof(struct mlink));
688
+ if (strlcpy(mlink->file, path,
689
+ sizeof(mlink->file)) >=
690
+ sizeof(mlink->file)) {
691
+ say(path, "Filename too long");
692
+ free(mlink);
693
+ continue;
694
+ }
695
+ mlink->dform = dform;
696
+ mlink->dsec = dsec;
697
+ mlink->arch = arch;
698
+ mlink->name = ff->fts_name;
699
+ mlink->fsec = fsec;
700
+ mlink->gzip = gzip;
701
+ mlink_add(mlink, ff->fts_statp);
702
+ continue;
703
+
704
+ case FTS_D:
705
+ case FTS_DP:
706
+ break;
707
+
708
+ default:
709
+ if (warnings)
710
+ say(path, "Not a regular file");
711
+ continue;
712
+ }
713
+
714
+ switch (ff->fts_level) {
715
+ case 0:
716
+ /* Ignore the root directory. */
717
+ break;
718
+ case 1:
719
+ /*
720
+ * This might contain manX/ or catX/.
721
+ * Try to infer this from the name.
722
+ * If we're not in use_all, enforce it.
723
+ */
724
+ cp = ff->fts_name;
725
+ if (ff->fts_info == FTS_DP) {
726
+ dform = FORM_NONE;
727
+ dsec = NULL;
728
+ break;
729
+ }
730
+
731
+ if ( ! strncmp(cp, "man", 3)) {
732
+ dform = FORM_SRC;
733
+ dsec = cp + 3;
734
+ } else if ( ! strncmp(cp, "cat", 3)) {
735
+ dform = FORM_CAT;
736
+ dsec = cp + 3;
737
+ } else {
738
+ dform = FORM_NONE;
739
+ dsec = NULL;
740
+ }
741
+
742
+ if (dsec != NULL || use_all)
743
+ break;
744
+
745
+ if (warnings)
746
+ say(path, "Unknown directory part");
747
+ fts_set(f, ff, FTS_SKIP);
748
+ break;
749
+ case 2:
750
+ /*
751
+ * Possibly our architecture.
752
+ * If we're descending, keep tabs on it.
753
+ */
754
+ if (ff->fts_info != FTS_DP && dsec != NULL)
755
+ arch = ff->fts_name;
756
+ else
757
+ arch = NULL;
758
+ break;
759
+ default:
760
+ if (ff->fts_info == FTS_DP || use_all)
761
+ break;
762
+ if (warnings)
763
+ say(path, "Extraneous directory part");
764
+ fts_set(f, ff, FTS_SKIP);
765
+ break;
766
+ }
767
+ }
768
+
769
+ fts_close(f);
770
+ return 1;
771
+ }
772
+
773
+ /*
774
+ * Add a file to the mlinks table.
775
+ * Do not verify that it's a "valid" looking manpage (we'll do that
776
+ * later).
777
+ *
778
+ * Try to infer the manual section, architecture, and page name from the
779
+ * path, assuming it looks like
780
+ *
781
+ * [./]man*[/<arch>]/<name>.<section>
782
+ * or
783
+ * [./]cat<section>[/<arch>]/<name>.0
784
+ *
785
+ * See treescan() for the fts(3) version of this.
786
+ */
787
+ static void
788
+ filescan(const char *infile)
789
+ {
790
+ struct stat st;
791
+ struct mlink *mlink;
792
+ char *linkfile, *p, *realdir, *start, *usefile;
793
+ size_t realdir_len;
794
+
795
+ assert(use_all);
796
+
797
+ if (strncmp(infile, "./", 2) == 0)
798
+ infile += 2;
799
+
800
+ /*
801
+ * We have to do lstat(2) before realpath(3) loses
802
+ * the information whether this is a symbolic link.
803
+ * We need to know that because for symbolic links,
804
+ * we want to use the orginal file name, while for
805
+ * regular files, we want to use the real path.
806
+ */
807
+ if (lstat(infile, &st) == -1) {
808
+ exitcode = (int)MANDOCLEVEL_BADARG;
809
+ say(infile, "&lstat");
810
+ return;
811
+ } else if (S_ISREG(st.st_mode) == 0 && S_ISLNK(st.st_mode) == 0) {
812
+ exitcode = (int)MANDOCLEVEL_BADARG;
813
+ say(infile, "Not a regular file");
814
+ return;
815
+ }
816
+
817
+ /*
818
+ * We have to resolve the file name to the real path
819
+ * in any case for the base directory check.
820
+ */
821
+ if ((usefile = realpath(infile, NULL)) == NULL) {
822
+ exitcode = (int)MANDOCLEVEL_BADARG;
823
+ say(infile, "&realpath");
824
+ return;
825
+ }
826
+
827
+ if (op == OP_TEST)
828
+ start = usefile;
829
+ else if (strncmp(usefile, basedir, basedir_len) == 0)
830
+ start = usefile + basedir_len;
831
+ #ifdef READ_ALLOWED_PATH
832
+ else if (read_allowed(usefile))
833
+ start = usefile;
834
+ #endif
835
+ else {
836
+ exitcode = (int)MANDOCLEVEL_BADARG;
837
+ say("", "%s: outside base directory", infile);
838
+ free(usefile);
839
+ return;
840
+ }
841
+
842
+ /*
843
+ * Now we are sure the file is inside our tree.
844
+ * If it is a symbolic link, ignore the real path
845
+ * and use the original name.
846
+ */
847
+ do {
848
+ if (S_ISLNK(st.st_mode) == 0)
849
+ break;
850
+
851
+ /*
852
+ * Some implementations of realpath(3) may succeed
853
+ * even if the target of the link does not exist,
854
+ * so check again for extra safety.
855
+ */
856
+ if (stat(usefile, &st) == -1) {
857
+ exitcode = (int)MANDOCLEVEL_BADARG;
858
+ say(infile, "&stat");
859
+ free(usefile);
860
+ return;
861
+ }
862
+ linkfile = mandoc_strdup(infile);
863
+ if (op == OP_TEST) {
864
+ free(usefile);
865
+ start = usefile = linkfile;
866
+ break;
867
+ }
868
+ if (strncmp(infile, basedir, basedir_len) == 0) {
869
+ free(usefile);
870
+ usefile = linkfile;
871
+ start = usefile + basedir_len;
872
+ break;
873
+ }
874
+
875
+ /*
876
+ * This symbolic link points into the basedir
877
+ * from the outside. Let's see whether any of
878
+ * the parent directories resolve to the basedir.
879
+ */
880
+ p = strchr(linkfile, '\0');
881
+ do {
882
+ while (*--p != '/')
883
+ continue;
884
+ *p = '\0';
885
+ if ((realdir = realpath(linkfile, NULL)) == NULL) {
886
+ exitcode = (int)MANDOCLEVEL_BADARG;
887
+ say(infile, "&realpath");
888
+ free(linkfile);
889
+ free(usefile);
890
+ return;
891
+ }
892
+ realdir_len = strlen(realdir) + 1;
893
+ free(realdir);
894
+ *p = '/';
895
+ } while (realdir_len > basedir_len);
896
+
897
+ /*
898
+ * If one of the directories resolves to the basedir,
899
+ * use the rest of the original name.
900
+ * Otherwise, the best we can do
901
+ * is to use the filename pointed to.
902
+ */
903
+ if (realdir_len == basedir_len) {
904
+ free(usefile);
905
+ usefile = linkfile;
906
+ start = p + 1;
907
+ } else {
908
+ free(linkfile);
909
+ start = usefile + basedir_len;
910
+ }
911
+ } while (/* CONSTCOND */ 0);
912
+
913
+ mlink = mandoc_calloc(1, sizeof(struct mlink));
914
+ mlink->dform = FORM_NONE;
915
+ if (strlcpy(mlink->file, start, sizeof(mlink->file)) >=
916
+ sizeof(mlink->file)) {
917
+ say(start, "Filename too long");
918
+ free(mlink);
919
+ free(usefile);
920
+ return;
921
+ }
922
+
923
+ /*
924
+ * In test mode or when the original name is absolute
925
+ * but outside our tree, guess the base directory.
926
+ */
927
+
928
+ if (op == OP_TEST || (start == usefile && *start == '/')) {
929
+ if (strncmp(usefile, "man/", 4) == 0)
930
+ start = usefile + 4;
931
+ else if ((start = strstr(usefile, "/man/")) != NULL)
932
+ start += 5;
933
+ else
934
+ start = usefile;
935
+ }
936
+
937
+ /*
938
+ * First try to guess our directory structure.
939
+ * If we find a separator, try to look for man* or cat*.
940
+ * If we find one of these and what's underneath is a directory,
941
+ * assume it's an architecture.
942
+ */
943
+ if ((p = strchr(start, '/')) != NULL) {
944
+ *p++ = '\0';
945
+ if (strncmp(start, "man", 3) == 0) {
946
+ mlink->dform = FORM_SRC;
947
+ mlink->dsec = start + 3;
948
+ } else if (strncmp(start, "cat", 3) == 0) {
949
+ mlink->dform = FORM_CAT;
950
+ mlink->dsec = start + 3;
951
+ }
952
+
953
+ start = p;
954
+ if (mlink->dsec != NULL && (p = strchr(start, '/')) != NULL) {
955
+ *p++ = '\0';
956
+ mlink->arch = start;
957
+ start = p;
958
+ }
959
+ }
960
+
961
+ /*
962
+ * Now check the file suffix.
963
+ * Suffix of `.0' indicates a catpage, `.1-9' is a manpage.
964
+ */
965
+ p = strrchr(start, '\0');
966
+ while (p-- > start && *p != '/' && *p != '.')
967
+ continue;
968
+
969
+ if (*p == '.') {
970
+ *p++ = '\0';
971
+ mlink->fsec = p;
972
+ }
973
+
974
+ /*
975
+ * Now try to parse the name.
976
+ * Use the filename portion of the path.
977
+ */
978
+ mlink->name = start;
979
+ if ((p = strrchr(start, '/')) != NULL) {
980
+ mlink->name = p + 1;
981
+ *p = '\0';
982
+ }
983
+ mlink_add(mlink, &st);
984
+ free(usefile);
985
+ }
986
+
987
+ static void
988
+ mlink_add(struct mlink *mlink, const struct stat *st)
989
+ {
990
+ struct inodev inodev;
991
+ struct mpage *mpage;
992
+ unsigned int slot;
993
+
994
+ assert(NULL != mlink->file);
995
+
996
+ mlink->dsec = mandoc_strdup(mlink->dsec ? mlink->dsec : "");
997
+ mlink->arch = mandoc_strdup(mlink->arch ? mlink->arch : "");
998
+ mlink->name = mandoc_strdup(mlink->name ? mlink->name : "");
999
+ mlink->fsec = mandoc_strdup(mlink->fsec ? mlink->fsec : "");
1000
+
1001
+ if ('0' == *mlink->fsec) {
1002
+ free(mlink->fsec);
1003
+ mlink->fsec = mandoc_strdup(mlink->dsec);
1004
+ mlink->fform = FORM_CAT;
1005
+ } else if ('1' <= *mlink->fsec && '9' >= *mlink->fsec)
1006
+ mlink->fform = FORM_SRC;
1007
+ else
1008
+ mlink->fform = FORM_NONE;
1009
+
1010
+ slot = ohash_qlookup(&mlinks, mlink->file);
1011
+ assert(NULL == ohash_find(&mlinks, slot));
1012
+ ohash_insert(&mlinks, slot, mlink);
1013
+
1014
+ memset(&inodev, 0, sizeof(inodev)); /* Clear padding. */
1015
+ inodev.st_ino = st->st_ino;
1016
+ inodev.st_dev = st->st_dev;
1017
+ slot = ohash_lookup_memory(&mpages, (char *)&inodev,
1018
+ sizeof(struct inodev), inodev.st_ino);
1019
+ mpage = ohash_find(&mpages, slot);
1020
+ if (NULL == mpage) {
1021
+ mpage = mandoc_calloc(1, sizeof(struct mpage));
1022
+ mpage->inodev.st_ino = inodev.st_ino;
1023
+ mpage->inodev.st_dev = inodev.st_dev;
1024
+ mpage->form = FORM_NONE;
1025
+ mpage->next = mpage_head;
1026
+ mpage_head = mpage;
1027
+ ohash_insert(&mpages, slot, mpage);
1028
+ } else
1029
+ mlink->next = mpage->mlinks;
1030
+ mpage->mlinks = mlink;
1031
+ mlink->mpage = mpage;
1032
+ }
1033
+
1034
+ static void
1035
+ mlink_free(struct mlink *mlink)
1036
+ {
1037
+
1038
+ free(mlink->dsec);
1039
+ free(mlink->arch);
1040
+ free(mlink->name);
1041
+ free(mlink->fsec);
1042
+ free(mlink);
1043
+ }
1044
+
1045
+ static void
1046
+ mpages_free(void)
1047
+ {
1048
+ struct mpage *mpage;
1049
+ struct mlink *mlink;
1050
+
1051
+ while ((mpage = mpage_head) != NULL) {
1052
+ while ((mlink = mpage->mlinks) != NULL) {
1053
+ mpage->mlinks = mlink->next;
1054
+ mlink_free(mlink);
1055
+ }
1056
+ mpage_head = mpage->next;
1057
+ free(mpage->sec);
1058
+ free(mpage->arch);
1059
+ free(mpage->title);
1060
+ free(mpage->desc);
1061
+ free(mpage);
1062
+ }
1063
+ }
1064
+
1065
+ /*
1066
+ * For each mlink to the mpage, check whether the path looks like
1067
+ * it is formatted, and if it does, check whether a source manual
1068
+ * exists by the same name, ignoring the suffix.
1069
+ * If both conditions hold, drop the mlink.
1070
+ */
1071
+ static void
1072
+ mlinks_undupe(struct mpage *mpage)
1073
+ {
1074
+ char buf[PATH_MAX];
1075
+ struct mlink **prev;
1076
+ struct mlink *mlink;
1077
+ char *bufp;
1078
+
1079
+ mpage->form = FORM_CAT;
1080
+ prev = &mpage->mlinks;
1081
+ while (NULL != (mlink = *prev)) {
1082
+ if (FORM_CAT != mlink->dform) {
1083
+ mpage->form = FORM_NONE;
1084
+ goto nextlink;
1085
+ }
1086
+ (void)strlcpy(buf, mlink->file, sizeof(buf));
1087
+ bufp = strstr(buf, "cat");
1088
+ assert(NULL != bufp);
1089
+ memcpy(bufp, "man", 3);
1090
+ if (NULL != (bufp = strrchr(buf, '.')))
1091
+ *++bufp = '\0';
1092
+ (void)strlcat(buf, mlink->dsec, sizeof(buf));
1093
+ if (NULL == ohash_find(&mlinks,
1094
+ ohash_qlookup(&mlinks, buf)))
1095
+ goto nextlink;
1096
+ if (warnings)
1097
+ say(mlink->file, "Man source exists: %s", buf);
1098
+ if (use_all)
1099
+ goto nextlink;
1100
+ *prev = mlink->next;
1101
+ mlink_free(mlink);
1102
+ continue;
1103
+ nextlink:
1104
+ prev = &(*prev)->next;
1105
+ }
1106
+ }
1107
+
1108
+ static void
1109
+ mlink_check(struct mpage *mpage, struct mlink *mlink)
1110
+ {
1111
+ struct str *str;
1112
+ unsigned int slot;
1113
+
1114
+ /*
1115
+ * Check whether the manual section given in a file
1116
+ * agrees with the directory where the file is located.
1117
+ * Some manuals have suffixes like (3p) on their
1118
+ * section number either inside the file or in the
1119
+ * directory name, some are linked into more than one
1120
+ * section, like encrypt(1) = makekey(8).
1121
+ */
1122
+
1123
+ if (FORM_SRC == mpage->form &&
1124
+ strcasecmp(mpage->sec, mlink->dsec))
1125
+ say(mlink->file, "Section \"%s\" manual in %s directory",
1126
+ mpage->sec, mlink->dsec);
1127
+
1128
+ /*
1129
+ * Manual page directories exist for each kernel
1130
+ * architecture as returned by machine(1).
1131
+ * However, many manuals only depend on the
1132
+ * application architecture as returned by arch(1).
1133
+ * For example, some (2/ARM) manuals are shared
1134
+ * across the "armish" and "zaurus" kernel
1135
+ * architectures.
1136
+ * A few manuals are even shared across completely
1137
+ * different architectures, for example fdformat(1)
1138
+ * on amd64, i386, and sparc64.
1139
+ */
1140
+
1141
+ if (strcasecmp(mpage->arch, mlink->arch))
1142
+ say(mlink->file, "Architecture \"%s\" manual in "
1143
+ "\"%s\" directory", mpage->arch, mlink->arch);
1144
+
1145
+ /*
1146
+ * XXX
1147
+ * parse_cat() doesn't set NAME_TITLE yet.
1148
+ */
1149
+
1150
+ if (FORM_CAT == mpage->form)
1151
+ return;
1152
+
1153
+ /*
1154
+ * Check whether this mlink
1155
+ * appears as a name in the NAME section.
1156
+ */
1157
+
1158
+ slot = ohash_qlookup(&names, mlink->name);
1159
+ str = ohash_find(&names, slot);
1160
+ assert(NULL != str);
1161
+ if ( ! (NAME_TITLE & str->mask))
1162
+ say(mlink->file, "Name missing in NAME section");
1163
+ }
1164
+
1165
+ /*
1166
+ * Run through the files in the global vector "mpages"
1167
+ * and add them to the database specified in "basedir".
1168
+ *
1169
+ * This handles the parsing scheme itself, using the cues of directory
1170
+ * and filename to determine whether the file is parsable or not.
1171
+ */
1172
+ static void
1173
+ mpages_merge(struct dba *dba, struct mparse *mp)
1174
+ {
1175
+ struct mpage *mpage, *mpage_dest;
1176
+ struct mlink *mlink, *mlink_dest;
1177
+ struct roff_meta *meta;
1178
+ char *cp;
1179
+ int fd;
1180
+
1181
+ for (mpage = mpage_head; mpage != NULL; mpage = mpage->next) {
1182
+ mlinks_undupe(mpage);
1183
+ if ((mlink = mpage->mlinks) == NULL)
1184
+ continue;
1185
+
1186
+ name_mask = NAME_MASK;
1187
+ mandoc_ohash_init(&names, 4, offsetof(struct str, key));
1188
+ mandoc_ohash_init(&strings, 6, offsetof(struct str, key));
1189
+ mparse_reset(mp);
1190
+ meta = NULL;
1191
+
1192
+ if ((fd = mparse_open(mp, mlink->file)) == -1) {
1193
+ say(mlink->file, "&open");
1194
+ goto nextpage;
1195
+ }
1196
+
1197
+ /*
1198
+ * Interpret the file as mdoc(7) or man(7) source
1199
+ * code, unless it is known to be formatted.
1200
+ */
1201
+ if (mlink->dform != FORM_CAT || mlink->fform != FORM_CAT) {
1202
+ mparse_readfd(mp, fd, mlink->file);
1203
+ close(fd);
1204
+ fd = -1;
1205
+ meta = mparse_result(mp);
1206
+ }
1207
+
1208
+ if (meta != NULL && meta->sodest != NULL) {
1209
+ mlink_dest = ohash_find(&mlinks,
1210
+ ohash_qlookup(&mlinks, meta->sodest));
1211
+ if (mlink_dest == NULL) {
1212
+ mandoc_asprintf(&cp, "%s.gz", meta->sodest);
1213
+ mlink_dest = ohash_find(&mlinks,
1214
+ ohash_qlookup(&mlinks, cp));
1215
+ free(cp);
1216
+ }
1217
+ if (mlink_dest != NULL) {
1218
+
1219
+ /* The .so target exists. */
1220
+
1221
+ mpage_dest = mlink_dest->mpage;
1222
+ while (1) {
1223
+ mlink->mpage = mpage_dest;
1224
+
1225
+ /*
1226
+ * If the target was already
1227
+ * processed, add the links
1228
+ * to the database now.
1229
+ * Otherwise, this will
1230
+ * happen when we come
1231
+ * to the target.
1232
+ */
1233
+
1234
+ if (mpage_dest->dba != NULL)
1235
+ dbadd_mlink(mlink);
1236
+
1237
+ if (mlink->next == NULL)
1238
+ break;
1239
+ mlink = mlink->next;
1240
+ }
1241
+
1242
+ /* Move all links to the target. */
1243
+
1244
+ mlink->next = mlink_dest->next;
1245
+ mlink_dest->next = mpage->mlinks;
1246
+ mpage->mlinks = NULL;
1247
+ goto nextpage;
1248
+ }
1249
+ meta->macroset = MACROSET_NONE;
1250
+ }
1251
+ if (meta != NULL && meta->macroset == MACROSET_MDOC) {
1252
+ mpage->form = FORM_SRC;
1253
+ mpage->sec = meta->msec;
1254
+ mpage->sec = mandoc_strdup(
1255
+ mpage->sec == NULL ? "" : mpage->sec);
1256
+ mpage->arch = meta->arch;
1257
+ mpage->arch = mandoc_strdup(
1258
+ mpage->arch == NULL ? "" : mpage->arch);
1259
+ mpage->title = mandoc_strdup(meta->title);
1260
+ } else if (meta != NULL && meta->macroset == MACROSET_MAN) {
1261
+ if (*meta->msec != '\0' || *meta->title != '\0') {
1262
+ mpage->form = FORM_SRC;
1263
+ mpage->sec = mandoc_strdup(meta->msec);
1264
+ mpage->arch = mandoc_strdup(mlink->arch);
1265
+ mpage->title = mandoc_strdup(meta->title);
1266
+ } else
1267
+ meta = NULL;
1268
+ }
1269
+
1270
+ assert(mpage->desc == NULL);
1271
+ if (meta == NULL || meta->sodest != NULL) {
1272
+ mpage->sec = mandoc_strdup(mlink->dsec);
1273
+ mpage->arch = mandoc_strdup(mlink->arch);
1274
+ mpage->title = mandoc_strdup(mlink->name);
1275
+ if (meta == NULL) {
1276
+ mpage->form = FORM_CAT;
1277
+ parse_cat(mpage, fd);
1278
+ } else
1279
+ mpage->form = FORM_SRC;
1280
+ } else if (meta->macroset == MACROSET_MDOC)
1281
+ parse_mdoc(mpage, meta, meta->first);
1282
+ else
1283
+ parse_man(mpage, meta, meta->first);
1284
+ if (mpage->desc == NULL) {
1285
+ mpage->desc = mandoc_strdup(mlink->name);
1286
+ if (warnings)
1287
+ say(mlink->file, "No one-line description, "
1288
+ "using filename \"%s\"", mlink->name);
1289
+ }
1290
+
1291
+ for (mlink = mpage->mlinks;
1292
+ mlink != NULL;
1293
+ mlink = mlink->next) {
1294
+ putkey(mpage, mlink->name, NAME_FILE);
1295
+ if (warnings && !use_all)
1296
+ mlink_check(mpage, mlink);
1297
+ }
1298
+
1299
+ dbadd(dba, mpage);
1300
+
1301
+ nextpage:
1302
+ ohash_delete(&strings);
1303
+ ohash_delete(&names);
1304
+ }
1305
+ }
1306
+
1307
+ static void
1308
+ parse_cat(struct mpage *mpage, int fd)
1309
+ {
1310
+ FILE *stream;
1311
+ struct mlink *mlink;
1312
+ char *line, *p, *title, *sec;
1313
+ size_t linesz, plen, titlesz;
1314
+ ssize_t len;
1315
+ int offs;
1316
+
1317
+ mlink = mpage->mlinks;
1318
+ stream = fd == -1 ? fopen(mlink->file, "r") : fdopen(fd, "r");
1319
+ if (stream == NULL) {
1320
+ if (fd != -1)
1321
+ close(fd);
1322
+ if (warnings)
1323
+ say(mlink->file, "&fopen");
1324
+ return;
1325
+ }
1326
+
1327
+ line = NULL;
1328
+ linesz = 0;
1329
+
1330
+ /* Parse the section number from the header line. */
1331
+
1332
+ while (getline(&line, &linesz, stream) != -1) {
1333
+ if (*line == '\n')
1334
+ continue;
1335
+ if ((sec = strchr(line, '(')) == NULL)
1336
+ break;
1337
+ if ((p = strchr(++sec, ')')) == NULL)
1338
+ break;
1339
+ free(mpage->sec);
1340
+ mpage->sec = mandoc_strndup(sec, p - sec);
1341
+ if (warnings && *mlink->dsec != '\0' &&
1342
+ strcasecmp(mpage->sec, mlink->dsec))
1343
+ say(mlink->file,
1344
+ "Section \"%s\" manual in %s directory",
1345
+ mpage->sec, mlink->dsec);
1346
+ break;
1347
+ }
1348
+
1349
+ /* Skip to first blank line. */
1350
+
1351
+ while (line == NULL || *line != '\n')
1352
+ if (getline(&line, &linesz, stream) == -1)
1353
+ break;
1354
+
1355
+ /*
1356
+ * Assume the first line that is not indented
1357
+ * is the first section header. Skip to it.
1358
+ */
1359
+
1360
+ while (getline(&line, &linesz, stream) != -1)
1361
+ if (*line != '\n' && *line != ' ')
1362
+ break;
1363
+
1364
+ /*
1365
+ * Read up until the next section into a buffer.
1366
+ * Strip the leading and trailing newline from each read line,
1367
+ * appending a trailing space.
1368
+ * Ignore empty (whitespace-only) lines.
1369
+ */
1370
+
1371
+ titlesz = 0;
1372
+ title = NULL;
1373
+
1374
+ while ((len = getline(&line, &linesz, stream)) != -1) {
1375
+ if (*line != ' ')
1376
+ break;
1377
+ offs = 0;
1378
+ while (isspace((unsigned char)line[offs]))
1379
+ offs++;
1380
+ if (line[offs] == '\0')
1381
+ continue;
1382
+ title = mandoc_realloc(title, titlesz + len - offs);
1383
+ memcpy(title + titlesz, line + offs, len - offs);
1384
+ titlesz += len - offs;
1385
+ title[titlesz - 1] = ' ';
1386
+ }
1387
+ free(line);
1388
+
1389
+ /*
1390
+ * If no page content can be found, or the input line
1391
+ * is already the next section header, or there is no
1392
+ * trailing newline, reuse the page title as the page
1393
+ * description.
1394
+ */
1395
+
1396
+ if (NULL == title || '\0' == *title) {
1397
+ if (warnings)
1398
+ say(mlink->file, "Cannot find NAME section");
1399
+ fclose(stream);
1400
+ free(title);
1401
+ return;
1402
+ }
1403
+
1404
+ title[titlesz - 1] = '\0';
1405
+
1406
+ /*
1407
+ * Skip to the first dash.
1408
+ * Use the remaining line as the description (no more than 70
1409
+ * bytes).
1410
+ */
1411
+
1412
+ if (NULL != (p = strstr(title, "- "))) {
1413
+ for (p += 2; ' ' == *p || '\b' == *p; p++)
1414
+ /* Skip to next word. */ ;
1415
+ } else {
1416
+ if (warnings)
1417
+ say(mlink->file, "No dash in title line, "
1418
+ "reusing \"%s\" as one-line description", title);
1419
+ p = title;
1420
+ }
1421
+
1422
+ plen = strlen(p);
1423
+
1424
+ /* Strip backspace-encoding from line. */
1425
+
1426
+ while (NULL != (line = memchr(p, '\b', plen))) {
1427
+ len = line - p;
1428
+ if (0 == len) {
1429
+ memmove(line, line + 1, plen--);
1430
+ continue;
1431
+ }
1432
+ memmove(line - 1, line + 1, plen - len);
1433
+ plen -= 2;
1434
+ }
1435
+
1436
+ /*
1437
+ * Cut off excessive one-line descriptions.
1438
+ * Bad pages are not worth better heuristics.
1439
+ */
1440
+
1441
+ mpage->desc = mandoc_strndup(p, 150);
1442
+ fclose(stream);
1443
+ free(title);
1444
+ }
1445
+
1446
+ /*
1447
+ * Put a type/word pair into the word database for this particular file.
1448
+ */
1449
+ static void
1450
+ putkey(const struct mpage *mpage, char *value, uint64_t type)
1451
+ {
1452
+ putkeys(mpage, value, strlen(value), type);
1453
+ }
1454
+
1455
+ /*
1456
+ * Grok all nodes at or below a certain mdoc node into putkey().
1457
+ */
1458
+ static void
1459
+ putmdockey(const struct mpage *mpage,
1460
+ const struct roff_node *n, uint64_t m, int taboo)
1461
+ {
1462
+
1463
+ for ( ; NULL != n; n = n->next) {
1464
+ if (n->flags & taboo)
1465
+ continue;
1466
+ if (NULL != n->child)
1467
+ putmdockey(mpage, n->child, m, taboo);
1468
+ if (n->type == ROFFT_TEXT)
1469
+ putkey(mpage, n->string, m);
1470
+ }
1471
+ }
1472
+
1473
+ static void
1474
+ parse_man(struct mpage *mpage, const struct roff_meta *meta,
1475
+ const struct roff_node *n)
1476
+ {
1477
+ const struct roff_node *head, *body;
1478
+ char *start, *title;
1479
+ char byte;
1480
+ size_t sz;
1481
+
1482
+ if (n == NULL)
1483
+ return;
1484
+
1485
+ /*
1486
+ * We're only searching for one thing: the first text child in
1487
+ * the BODY of a NAME section. Since we don't keep track of
1488
+ * sections in -man, run some hoops to find out whether we're in
1489
+ * the correct section or not.
1490
+ */
1491
+
1492
+ if (n->type == ROFFT_BODY && n->tok == MAN_SH) {
1493
+ body = n;
1494
+ if ((head = body->parent->head) != NULL &&
1495
+ (head = head->child) != NULL &&
1496
+ head->next == NULL &&
1497
+ head->type == ROFFT_TEXT &&
1498
+ strcmp(head->string, "NAME") == 0 &&
1499
+ body->child != NULL) {
1500
+
1501
+ /*
1502
+ * Suck the entire NAME section into memory.
1503
+ * Yes, we might run away.
1504
+ * But too many manuals have big, spread-out
1505
+ * NAME sections over many lines.
1506
+ */
1507
+
1508
+ title = NULL;
1509
+ deroff(&title, body);
1510
+ if (NULL == title)
1511
+ return;
1512
+
1513
+ /*
1514
+ * Go through a special heuristic dance here.
1515
+ * Conventionally, one or more manual names are
1516
+ * comma-specified prior to a whitespace, then a
1517
+ * dash, then a description. Try to puzzle out
1518
+ * the name parts here.
1519
+ */
1520
+
1521
+ start = title;
1522
+ for ( ;; ) {
1523
+ sz = strcspn(start, " ,");
1524
+ if ('\0' == start[sz])
1525
+ break;
1526
+
1527
+ byte = start[sz];
1528
+ start[sz] = '\0';
1529
+
1530
+ /*
1531
+ * Assume a stray trailing comma in the
1532
+ * name list if a name begins with a dash.
1533
+ */
1534
+
1535
+ if ('-' == start[0] ||
1536
+ ('\\' == start[0] && '-' == start[1]))
1537
+ break;
1538
+
1539
+ putkey(mpage, start, NAME_TITLE);
1540
+ if ( ! (mpage->name_head_done ||
1541
+ strcasecmp(start, meta->title))) {
1542
+ putkey(mpage, start, NAME_HEAD);
1543
+ mpage->name_head_done = 1;
1544
+ }
1545
+
1546
+ if (' ' == byte) {
1547
+ start += sz + 1;
1548
+ break;
1549
+ }
1550
+
1551
+ assert(',' == byte);
1552
+ start += sz + 1;
1553
+ while (' ' == *start)
1554
+ start++;
1555
+ }
1556
+
1557
+ if (start == title) {
1558
+ putkey(mpage, start, NAME_TITLE);
1559
+ if ( ! (mpage->name_head_done ||
1560
+ strcasecmp(start, meta->title))) {
1561
+ putkey(mpage, start, NAME_HEAD);
1562
+ mpage->name_head_done = 1;
1563
+ }
1564
+ free(title);
1565
+ return;
1566
+ }
1567
+
1568
+ while (isspace((unsigned char)*start))
1569
+ start++;
1570
+
1571
+ if (0 == strncmp(start, "-", 1))
1572
+ start += 1;
1573
+ else if (0 == strncmp(start, "\\-\\-", 4))
1574
+ start += 4;
1575
+ else if (0 == strncmp(start, "\\-", 2))
1576
+ start += 2;
1577
+ else if (0 == strncmp(start, "\\(en", 4))
1578
+ start += 4;
1579
+ else if (0 == strncmp(start, "\\(em", 4))
1580
+ start += 4;
1581
+
1582
+ while (' ' == *start)
1583
+ start++;
1584
+
1585
+ /*
1586
+ * Cut off excessive one-line descriptions.
1587
+ * Bad pages are not worth better heuristics.
1588
+ */
1589
+
1590
+ mpage->desc = mandoc_strndup(start, 150);
1591
+ free(title);
1592
+ return;
1593
+ }
1594
+ }
1595
+
1596
+ for (n = n->child; n; n = n->next) {
1597
+ if (NULL != mpage->desc)
1598
+ break;
1599
+ parse_man(mpage, meta, n);
1600
+ }
1601
+ }
1602
+
1603
+ static void
1604
+ parse_mdoc(struct mpage *mpage, const struct roff_meta *meta,
1605
+ const struct roff_node *n)
1606
+ {
1607
+ const struct mdoc_handler *handler;
1608
+
1609
+ for (n = n->child; n != NULL; n = n->next) {
1610
+ if (n->tok == TOKEN_NONE || n->tok < ROFF_MAX)
1611
+ continue;
1612
+ assert(n->tok >= MDOC_Dd && n->tok < MDOC_MAX);
1613
+ handler = mdoc_handlers + (n->tok - MDOC_Dd);
1614
+ if (n->flags & handler->taboo)
1615
+ continue;
1616
+
1617
+ switch (n->type) {
1618
+ case ROFFT_ELEM:
1619
+ case ROFFT_BLOCK:
1620
+ case ROFFT_HEAD:
1621
+ case ROFFT_BODY:
1622
+ case ROFFT_TAIL:
1623
+ if (handler->fp != NULL &&
1624
+ (*handler->fp)(mpage, meta, n) == 0)
1625
+ break;
1626
+ if (handler->mask)
1627
+ putmdockey(mpage, n->child,
1628
+ handler->mask, handler->taboo);
1629
+ break;
1630
+ default:
1631
+ continue;
1632
+ }
1633
+ if (NULL != n->child)
1634
+ parse_mdoc(mpage, meta, n);
1635
+ }
1636
+ }
1637
+
1638
+ static int
1639
+ parse_mdoc_Fa(struct mpage *mpage, const struct roff_meta *meta,
1640
+ const struct roff_node *n)
1641
+ {
1642
+ uint64_t mask;
1643
+
1644
+ mask = TYPE_Fa;
1645
+ if (n->sec == SEC_SYNOPSIS)
1646
+ mask |= TYPE_Vt;
1647
+
1648
+ putmdockey(mpage, n->child, mask, 0);
1649
+ return 0;
1650
+ }
1651
+
1652
+ static int
1653
+ parse_mdoc_Fd(struct mpage *mpage, const struct roff_meta *meta,
1654
+ const struct roff_node *n)
1655
+ {
1656
+ char *start, *end;
1657
+ size_t sz;
1658
+
1659
+ if (SEC_SYNOPSIS != n->sec ||
1660
+ NULL == (n = n->child) ||
1661
+ n->type != ROFFT_TEXT)
1662
+ return 0;
1663
+
1664
+ /*
1665
+ * Only consider those `Fd' macro fields that begin with an
1666
+ * "inclusion" token (versus, e.g., #define).
1667
+ */
1668
+
1669
+ if (strcmp("#include", n->string))
1670
+ return 0;
1671
+
1672
+ if ((n = n->next) == NULL || n->type != ROFFT_TEXT)
1673
+ return 0;
1674
+
1675
+ /*
1676
+ * Strip away the enclosing angle brackets and make sure we're
1677
+ * not zero-length.
1678
+ */
1679
+
1680
+ start = n->string;
1681
+ if ('<' == *start || '"' == *start)
1682
+ start++;
1683
+
1684
+ if (0 == (sz = strlen(start)))
1685
+ return 0;
1686
+
1687
+ end = &start[(int)sz - 1];
1688
+ if ('>' == *end || '"' == *end)
1689
+ end--;
1690
+
1691
+ if (end > start)
1692
+ putkeys(mpage, start, end - start + 1, TYPE_In);
1693
+ return 0;
1694
+ }
1695
+
1696
+ static void
1697
+ parse_mdoc_fname(struct mpage *mpage, const struct roff_node *n)
1698
+ {
1699
+ char *cp;
1700
+ size_t sz;
1701
+
1702
+ if (n->type != ROFFT_TEXT)
1703
+ return;
1704
+
1705
+ /* Skip function pointer punctuation. */
1706
+
1707
+ cp = n->string;
1708
+ while (*cp == '(' || *cp == '*')
1709
+ cp++;
1710
+ sz = strcspn(cp, "()");
1711
+
1712
+ putkeys(mpage, cp, sz, TYPE_Fn);
1713
+ if (n->sec == SEC_SYNOPSIS)
1714
+ putkeys(mpage, cp, sz, NAME_SYN);
1715
+ }
1716
+
1717
+ static int
1718
+ parse_mdoc_Fn(struct mpage *mpage, const struct roff_meta *meta,
1719
+ const struct roff_node *n)
1720
+ {
1721
+ uint64_t mask;
1722
+
1723
+ if (n->child == NULL)
1724
+ return 0;
1725
+
1726
+ parse_mdoc_fname(mpage, n->child);
1727
+
1728
+ n = n->child->next;
1729
+ if (n != NULL && n->type == ROFFT_TEXT) {
1730
+ mask = TYPE_Fa;
1731
+ if (n->sec == SEC_SYNOPSIS)
1732
+ mask |= TYPE_Vt;
1733
+ putmdockey(mpage, n, mask, 0);
1734
+ }
1735
+
1736
+ return 0;
1737
+ }
1738
+
1739
+ static int
1740
+ parse_mdoc_Fo(struct mpage *mpage, const struct roff_meta *meta,
1741
+ const struct roff_node *n)
1742
+ {
1743
+
1744
+ if (n->type != ROFFT_HEAD)
1745
+ return 1;
1746
+
1747
+ if (n->child != NULL)
1748
+ parse_mdoc_fname(mpage, n->child);
1749
+
1750
+ return 0;
1751
+ }
1752
+
1753
+ static int
1754
+ parse_mdoc_Va(struct mpage *mpage, const struct roff_meta *meta,
1755
+ const struct roff_node *n)
1756
+ {
1757
+ char *cp;
1758
+
1759
+ if (n->type != ROFFT_ELEM && n->type != ROFFT_BODY)
1760
+ return 0;
1761
+
1762
+ if (n->child != NULL &&
1763
+ n->child->next == NULL &&
1764
+ n->child->type == ROFFT_TEXT)
1765
+ return 1;
1766
+
1767
+ cp = NULL;
1768
+ deroff(&cp, n);
1769
+ if (cp != NULL) {
1770
+ putkey(mpage, cp, TYPE_Vt | (n->tok == MDOC_Va ||
1771
+ n->type == ROFFT_BODY ? TYPE_Va : 0));
1772
+ free(cp);
1773
+ }
1774
+
1775
+ return 0;
1776
+ }
1777
+
1778
+ static int
1779
+ parse_mdoc_Xr(struct mpage *mpage, const struct roff_meta *meta,
1780
+ const struct roff_node *n)
1781
+ {
1782
+ char *cp;
1783
+
1784
+ if (NULL == (n = n->child))
1785
+ return 0;
1786
+
1787
+ if (NULL == n->next) {
1788
+ putkey(mpage, n->string, TYPE_Xr);
1789
+ return 0;
1790
+ }
1791
+
1792
+ mandoc_asprintf(&cp, "%s(%s)", n->string, n->next->string);
1793
+ putkey(mpage, cp, TYPE_Xr);
1794
+ free(cp);
1795
+ return 0;
1796
+ }
1797
+
1798
+ static int
1799
+ parse_mdoc_Nd(struct mpage *mpage, const struct roff_meta *meta,
1800
+ const struct roff_node *n)
1801
+ {
1802
+
1803
+ if (n->type == ROFFT_BODY)
1804
+ deroff(&mpage->desc, n);
1805
+ return 0;
1806
+ }
1807
+
1808
+ static int
1809
+ parse_mdoc_Nm(struct mpage *mpage, const struct roff_meta *meta,
1810
+ const struct roff_node *n)
1811
+ {
1812
+
1813
+ if (SEC_NAME == n->sec)
1814
+ putmdockey(mpage, n->child, NAME_TITLE, 0);
1815
+ else if (n->sec == SEC_SYNOPSIS && n->type == ROFFT_HEAD) {
1816
+ if (n->child == NULL)
1817
+ putkey(mpage, meta->name, NAME_SYN);
1818
+ else
1819
+ putmdockey(mpage, n->child, NAME_SYN, 0);
1820
+ }
1821
+ if ( ! (mpage->name_head_done ||
1822
+ n->child == NULL || n->child->string == NULL ||
1823
+ strcasecmp(n->child->string, meta->title))) {
1824
+ putkey(mpage, n->child->string, NAME_HEAD);
1825
+ mpage->name_head_done = 1;
1826
+ }
1827
+ return 0;
1828
+ }
1829
+
1830
+ static int
1831
+ parse_mdoc_Sh(struct mpage *mpage, const struct roff_meta *meta,
1832
+ const struct roff_node *n)
1833
+ {
1834
+
1835
+ return n->sec == SEC_CUSTOM && n->type == ROFFT_HEAD;
1836
+ }
1837
+
1838
+ static int
1839
+ parse_mdoc_head(struct mpage *mpage, const struct roff_meta *meta,
1840
+ const struct roff_node *n)
1841
+ {
1842
+
1843
+ return n->type == ROFFT_HEAD;
1844
+ }
1845
+
1846
+ /*
1847
+ * Add a string to the hash table for the current manual.
1848
+ * Each string has a bitmask telling which macros it belongs to.
1849
+ * When we finish the manual, we'll dump the table.
1850
+ */
1851
+ static void
1852
+ putkeys(const struct mpage *mpage, char *cp, size_t sz, uint64_t v)
1853
+ {
1854
+ struct ohash *htab;
1855
+ struct str *s;
1856
+ const char *end;
1857
+ unsigned int slot;
1858
+ int i, mustfree;
1859
+
1860
+ if (0 == sz)
1861
+ return;
1862
+
1863
+ mustfree = render_string(&cp, &sz);
1864
+
1865
+ if (TYPE_Nm & v) {
1866
+ htab = &names;
1867
+ v &= name_mask;
1868
+ if (v & NAME_FIRST)
1869
+ name_mask &= ~NAME_FIRST;
1870
+ if (debug > 1)
1871
+ say(mpage->mlinks->file,
1872
+ "Adding name %*s, bits=0x%llx", (int)sz, cp,
1873
+ (unsigned long long)v);
1874
+ } else {
1875
+ htab = &strings;
1876
+ if (debug > 1)
1877
+ for (i = 0; i < KEY_MAX; i++)
1878
+ if ((uint64_t)1 << i & v)
1879
+ say(mpage->mlinks->file,
1880
+ "Adding key %s=%*s",
1881
+ mansearch_keynames[i], (int)sz, cp);
1882
+ }
1883
+
1884
+ end = cp + sz;
1885
+ slot = ohash_qlookupi(htab, cp, &end);
1886
+ s = ohash_find(htab, slot);
1887
+
1888
+ if (NULL != s && mpage == s->mpage) {
1889
+ s->mask |= v;
1890
+ return;
1891
+ } else if (NULL == s) {
1892
+ s = mandoc_calloc(1, sizeof(struct str) + sz + 1);
1893
+ memcpy(s->key, cp, sz);
1894
+ ohash_insert(htab, slot, s);
1895
+ }
1896
+ s->mpage = mpage;
1897
+ s->mask = v;
1898
+
1899
+ if (mustfree)
1900
+ free(cp);
1901
+ }
1902
+
1903
+ /*
1904
+ * Take a Unicode codepoint and produce its UTF-8 encoding.
1905
+ * This isn't the best way to do this, but it works.
1906
+ * The magic numbers are from the UTF-8 packaging.
1907
+ * They're not as scary as they seem: read the UTF-8 spec for details.
1908
+ */
1909
+ static size_t
1910
+ utf8(unsigned int cp, char out[7])
1911
+ {
1912
+ size_t rc;
1913
+
1914
+ rc = 0;
1915
+ if (cp <= 0x0000007F) {
1916
+ rc = 1;
1917
+ out[0] = (char)cp;
1918
+ } else if (cp <= 0x000007FF) {
1919
+ rc = 2;
1920
+ out[0] = (cp >> 6 & 31) | 192;
1921
+ out[1] = (cp & 63) | 128;
1922
+ } else if (cp <= 0x0000FFFF) {
1923
+ rc = 3;
1924
+ out[0] = (cp >> 12 & 15) | 224;
1925
+ out[1] = (cp >> 6 & 63) | 128;
1926
+ out[2] = (cp & 63) | 128;
1927
+ } else if (cp <= 0x001FFFFF) {
1928
+ rc = 4;
1929
+ out[0] = (cp >> 18 & 7) | 240;
1930
+ out[1] = (cp >> 12 & 63) | 128;
1931
+ out[2] = (cp >> 6 & 63) | 128;
1932
+ out[3] = (cp & 63) | 128;
1933
+ } else if (cp <= 0x03FFFFFF) {
1934
+ rc = 5;
1935
+ out[0] = (cp >> 24 & 3) | 248;
1936
+ out[1] = (cp >> 18 & 63) | 128;
1937
+ out[2] = (cp >> 12 & 63) | 128;
1938
+ out[3] = (cp >> 6 & 63) | 128;
1939
+ out[4] = (cp & 63) | 128;
1940
+ } else if (cp <= 0x7FFFFFFF) {
1941
+ rc = 6;
1942
+ out[0] = (cp >> 30 & 1) | 252;
1943
+ out[1] = (cp >> 24 & 63) | 128;
1944
+ out[2] = (cp >> 18 & 63) | 128;
1945
+ out[3] = (cp >> 12 & 63) | 128;
1946
+ out[4] = (cp >> 6 & 63) | 128;
1947
+ out[5] = (cp & 63) | 128;
1948
+ } else
1949
+ return 0;
1950
+
1951
+ out[rc] = '\0';
1952
+ return rc;
1953
+ }
1954
+
1955
+ /*
1956
+ * If the string contains escape sequences,
1957
+ * replace it with an allocated rendering and return 1,
1958
+ * such that the caller can free it after use.
1959
+ * Otherwise, do nothing and return 0.
1960
+ */
1961
+ static int
1962
+ render_string(char **public, size_t *psz)
1963
+ {
1964
+ const char *src, *scp, *addcp, *seq;
1965
+ char *dst;
1966
+ size_t ssz, dsz, addsz;
1967
+ char utfbuf[7], res[6];
1968
+ int seqlen, unicode;
1969
+
1970
+ res[0] = '\\';
1971
+ res[1] = '\t';
1972
+ res[2] = ASCII_NBRSP;
1973
+ res[3] = ASCII_HYPH;
1974
+ res[4] = ASCII_BREAK;
1975
+ res[5] = '\0';
1976
+
1977
+ src = scp = *public;
1978
+ ssz = *psz;
1979
+ dst = NULL;
1980
+ dsz = 0;
1981
+
1982
+ while (scp < src + *psz) {
1983
+
1984
+ /* Leave normal characters unchanged. */
1985
+
1986
+ if (strchr(res, *scp) == NULL) {
1987
+ if (dst != NULL)
1988
+ dst[dsz++] = *scp;
1989
+ scp++;
1990
+ continue;
1991
+ }
1992
+
1993
+ /*
1994
+ * Found something that requires replacing,
1995
+ * make sure we have a destination buffer.
1996
+ */
1997
+
1998
+ if (dst == NULL) {
1999
+ dst = mandoc_malloc(ssz + 1);
2000
+ dsz = scp - src;
2001
+ memcpy(dst, src, dsz);
2002
+ }
2003
+
2004
+ /* Handle single-char special characters. */
2005
+
2006
+ switch (*scp) {
2007
+ case '\\':
2008
+ break;
2009
+ case '\t':
2010
+ case ASCII_NBRSP:
2011
+ dst[dsz++] = ' ';
2012
+ scp++;
2013
+ continue;
2014
+ case ASCII_HYPH:
2015
+ dst[dsz++] = '-';
2016
+ /* FALLTHROUGH */
2017
+ case ASCII_BREAK:
2018
+ scp++;
2019
+ continue;
2020
+ default:
2021
+ abort();
2022
+ }
2023
+
2024
+ /*
2025
+ * Found an escape sequence.
2026
+ * Read past the slash, then parse it.
2027
+ * Ignore everything except characters.
2028
+ */
2029
+
2030
+ scp++;
2031
+ if (mandoc_escape(&scp, &seq, &seqlen) != ESCAPE_SPECIAL)
2032
+ continue;
2033
+
2034
+ /*
2035
+ * Render the special character
2036
+ * as either UTF-8 or ASCII.
2037
+ */
2038
+
2039
+ if (write_utf8) {
2040
+ unicode = mchars_spec2cp(seq, seqlen);
2041
+ if (unicode <= 0)
2042
+ continue;
2043
+ addsz = utf8(unicode, utfbuf);
2044
+ if (addsz == 0)
2045
+ continue;
2046
+ addcp = utfbuf;
2047
+ } else {
2048
+ addcp = mchars_spec2str(seq, seqlen, &addsz);
2049
+ if (addcp == NULL)
2050
+ continue;
2051
+ if (*addcp == ASCII_NBRSP) {
2052
+ addcp = " ";
2053
+ addsz = 1;
2054
+ }
2055
+ }
2056
+
2057
+ /* Copy the rendered glyph into the stream. */
2058
+
2059
+ ssz += addsz;
2060
+ dst = mandoc_realloc(dst, ssz + 1);
2061
+ memcpy(dst + dsz, addcp, addsz);
2062
+ dsz += addsz;
2063
+ }
2064
+ if (dst != NULL) {
2065
+ *public = dst;
2066
+ *psz = dsz;
2067
+ }
2068
+
2069
+ /* Trim trailing whitespace and NUL-terminate. */
2070
+
2071
+ while (*psz > 0 && (*public)[*psz - 1] == ' ')
2072
+ --*psz;
2073
+ if (dst != NULL) {
2074
+ (*public)[*psz] = '\0';
2075
+ return 1;
2076
+ } else
2077
+ return 0;
2078
+ }
2079
+
2080
+ static void
2081
+ dbadd_mlink(const struct mlink *mlink)
2082
+ {
2083
+ dba_page_alias(mlink->mpage->dba, mlink->name, NAME_FILE);
2084
+ dba_page_add(mlink->mpage->dba, DBP_SECT, mlink->dsec);
2085
+ dba_page_add(mlink->mpage->dba, DBP_SECT, mlink->fsec);
2086
+ dba_page_add(mlink->mpage->dba, DBP_ARCH, mlink->arch);
2087
+ dba_page_add(mlink->mpage->dba, DBP_FILE, mlink->file);
2088
+ }
2089
+
2090
+ /*
2091
+ * Flush the current page's terms (and their bits) into the database.
2092
+ * Also, handle escape sequences at the last possible moment.
2093
+ */
2094
+ static void
2095
+ dbadd(struct dba *dba, struct mpage *mpage)
2096
+ {
2097
+ struct mlink *mlink;
2098
+ struct str *key;
2099
+ char *cp;
2100
+ uint64_t mask;
2101
+ size_t i;
2102
+ unsigned int slot;
2103
+ int mustfree;
2104
+
2105
+ mlink = mpage->mlinks;
2106
+
2107
+ if (nodb) {
2108
+ for (key = ohash_first(&names, &slot); NULL != key;
2109
+ key = ohash_next(&names, &slot))
2110
+ free(key);
2111
+ for (key = ohash_first(&strings, &slot); NULL != key;
2112
+ key = ohash_next(&strings, &slot))
2113
+ free(key);
2114
+ if (0 == debug)
2115
+ return;
2116
+ while (NULL != mlink) {
2117
+ fputs(mlink->name, stdout);
2118
+ if (NULL == mlink->next ||
2119
+ strcmp(mlink->dsec, mlink->next->dsec) ||
2120
+ strcmp(mlink->fsec, mlink->next->fsec) ||
2121
+ strcmp(mlink->arch, mlink->next->arch)) {
2122
+ putchar('(');
2123
+ if ('\0' == *mlink->dsec)
2124
+ fputs(mlink->fsec, stdout);
2125
+ else
2126
+ fputs(mlink->dsec, stdout);
2127
+ if ('\0' != *mlink->arch)
2128
+ printf("/%s", mlink->arch);
2129
+ putchar(')');
2130
+ }
2131
+ mlink = mlink->next;
2132
+ if (NULL != mlink)
2133
+ fputs(", ", stdout);
2134
+ }
2135
+ printf(" - %s\n", mpage->desc);
2136
+ return;
2137
+ }
2138
+
2139
+ if (debug)
2140
+ say(mlink->file, "Adding to database");
2141
+
2142
+ cp = mpage->desc;
2143
+ i = strlen(cp);
2144
+ mustfree = render_string(&cp, &i);
2145
+ mpage->dba = dba_page_new(dba->pages,
2146
+ *mpage->arch == '\0' ? mlink->arch : mpage->arch,
2147
+ cp, mlink->file, mpage->form);
2148
+ if (mustfree)
2149
+ free(cp);
2150
+ dba_page_add(mpage->dba, DBP_SECT, mpage->sec);
2151
+
2152
+ while (mlink != NULL) {
2153
+ dbadd_mlink(mlink);
2154
+ mlink = mlink->next;
2155
+ }
2156
+
2157
+ for (key = ohash_first(&names, &slot); NULL != key;
2158
+ key = ohash_next(&names, &slot)) {
2159
+ assert(key->mpage == mpage);
2160
+ dba_page_alias(mpage->dba, key->key, key->mask);
2161
+ free(key);
2162
+ }
2163
+ for (key = ohash_first(&strings, &slot); NULL != key;
2164
+ key = ohash_next(&strings, &slot)) {
2165
+ assert(key->mpage == mpage);
2166
+ i = 0;
2167
+ for (mask = TYPE_Xr; mask <= TYPE_Lb; mask *= 2) {
2168
+ if (key->mask & mask)
2169
+ dba_macro_add(dba->macros, i,
2170
+ key->key, mpage->dba);
2171
+ i++;
2172
+ }
2173
+ free(key);
2174
+ }
2175
+ }
2176
+
2177
+ static void
2178
+ dbprune(struct dba *dba)
2179
+ {
2180
+ struct dba_array *page, *files;
2181
+ char *file;
2182
+
2183
+ dba_array_FOREACH(dba->pages, page) {
2184
+ files = dba_array_get(page, DBP_FILE);
2185
+ dba_array_FOREACH(files, file) {
2186
+ if (*file < ' ')
2187
+ file++;
2188
+ if (ohash_find(&mlinks, ohash_qlookup(&mlinks,
2189
+ file)) != NULL) {
2190
+ if (debug)
2191
+ say(file, "Deleting from database");
2192
+ dba_array_del(dba->pages);
2193
+ break;
2194
+ }
2195
+ }
2196
+ }
2197
+ }
2198
+
2199
+ /*
2200
+ * Write the database from memory to disk.
2201
+ */
2202
+ static void
2203
+ dbwrite(struct dba *dba)
2204
+ {
2205
+ struct stat sb1, sb2;
2206
+ char tfn[33], *cp1, *cp2;
2207
+ off_t i;
2208
+ int fd1, fd2;
2209
+
2210
+ /*
2211
+ * Do not write empty databases, and delete existing ones
2212
+ * when makewhatis -u causes them to become empty.
2213
+ */
2214
+
2215
+ dba_array_start(dba->pages);
2216
+ if (dba_array_next(dba->pages) == NULL) {
2217
+ if (unlink(MANDOC_DB) == -1 && errno != ENOENT)
2218
+ say(MANDOC_DB, "&unlink");
2219
+ return;
2220
+ }
2221
+
2222
+ /*
2223
+ * Build the database in a temporary file,
2224
+ * then atomically move it into place.
2225
+ */
2226
+
2227
+ if (dba_write(MANDOC_DB "~", dba) != -1) {
2228
+ if (rename(MANDOC_DB "~", MANDOC_DB) == -1) {
2229
+ exitcode = (int)MANDOCLEVEL_SYSERR;
2230
+ say(MANDOC_DB, "&rename");
2231
+ unlink(MANDOC_DB "~");
2232
+ }
2233
+ return;
2234
+ }
2235
+
2236
+ /*
2237
+ * We lack write permission and cannot replace the database
2238
+ * file, but let's at least check whether the data changed.
2239
+ */
2240
+
2241
+ (void)strlcpy(tfn, "/tmp/mandocdb.XXXXXXXX", sizeof(tfn));
2242
+ if (mkdtemp(tfn) == NULL) {
2243
+ exitcode = (int)MANDOCLEVEL_SYSERR;
2244
+ say("", "&%s", tfn);
2245
+ return;
2246
+ }
2247
+ cp1 = cp2 = MAP_FAILED;
2248
+ fd1 = fd2 = -1;
2249
+ (void)strlcat(tfn, "/" MANDOC_DB, sizeof(tfn));
2250
+ if (dba_write(tfn, dba) == -1) {
2251
+ say(tfn, "&dba_write");
2252
+ goto err;
2253
+ }
2254
+ if ((fd1 = open(MANDOC_DB, O_RDONLY, 0)) == -1) {
2255
+ say(MANDOC_DB, "&open");
2256
+ goto err;
2257
+ }
2258
+ if ((fd2 = open(tfn, O_RDONLY, 0)) == -1) {
2259
+ say(tfn, "&open");
2260
+ goto err;
2261
+ }
2262
+ if (fstat(fd1, &sb1) == -1) {
2263
+ say(MANDOC_DB, "&fstat");
2264
+ goto err;
2265
+ }
2266
+ if (fstat(fd2, &sb2) == -1) {
2267
+ say(tfn, "&fstat");
2268
+ goto err;
2269
+ }
2270
+ if (sb1.st_size != sb2.st_size)
2271
+ goto err;
2272
+ if ((cp1 = mmap(NULL, sb1.st_size, PROT_READ, MAP_PRIVATE,
2273
+ fd1, 0)) == MAP_FAILED) {
2274
+ say(MANDOC_DB, "&mmap");
2275
+ goto err;
2276
+ }
2277
+ if ((cp2 = mmap(NULL, sb2.st_size, PROT_READ, MAP_PRIVATE,
2278
+ fd2, 0)) == MAP_FAILED) {
2279
+ say(tfn, "&mmap");
2280
+ goto err;
2281
+ }
2282
+ for (i = 0; i < sb1.st_size; i++)
2283
+ if (cp1[i] != cp2[i])
2284
+ goto err;
2285
+ goto out;
2286
+
2287
+ err:
2288
+ exitcode = (int)MANDOCLEVEL_SYSERR;
2289
+ say(MANDOC_DB, "Data changed, but cannot replace database");
2290
+
2291
+ out:
2292
+ if (cp1 != MAP_FAILED)
2293
+ munmap(cp1, sb1.st_size);
2294
+ if (cp2 != MAP_FAILED)
2295
+ munmap(cp2, sb2.st_size);
2296
+ if (fd1 != -1)
2297
+ close(fd1);
2298
+ if (fd2 != -1)
2299
+ close(fd2);
2300
+ unlink(tfn);
2301
+ *strrchr(tfn, '/') = '\0';
2302
+ rmdir(tfn);
2303
+ }
2304
+
2305
+ static int
2306
+ set_basedir(const char *targetdir, int report_baddir)
2307
+ {
2308
+ static char startdir[PATH_MAX];
2309
+ static int getcwd_status; /* 1 = ok, 2 = failure */
2310
+ static int chdir_status; /* 1 = changed directory */
2311
+
2312
+ /*
2313
+ * Remember the original working directory, if possible.
2314
+ * This will be needed if the second or a later directory
2315
+ * on the command line is given as a relative path.
2316
+ * Do not error out if the current directory is not
2317
+ * searchable: Maybe it won't be needed after all.
2318
+ */
2319
+ if (getcwd_status == 0) {
2320
+ if (getcwd(startdir, sizeof(startdir)) == NULL) {
2321
+ getcwd_status = 2;
2322
+ (void)strlcpy(startdir, strerror(errno),
2323
+ sizeof(startdir));
2324
+ } else
2325
+ getcwd_status = 1;
2326
+ }
2327
+
2328
+ /*
2329
+ * We are leaving the old base directory.
2330
+ * Do not use it any longer, not even for messages.
2331
+ */
2332
+ *basedir = '\0';
2333
+ basedir_len = 0;
2334
+
2335
+ /*
2336
+ * If and only if the directory was changed earlier and
2337
+ * the next directory to process is given as a relative path,
2338
+ * first go back, or bail out if that is impossible.
2339
+ */
2340
+ if (chdir_status && *targetdir != '/') {
2341
+ if (getcwd_status == 2) {
2342
+ exitcode = (int)MANDOCLEVEL_SYSERR;
2343
+ say("", "getcwd: %s", startdir);
2344
+ return 0;
2345
+ }
2346
+ if (chdir(startdir) == -1) {
2347
+ exitcode = (int)MANDOCLEVEL_SYSERR;
2348
+ say("", "&chdir %s", startdir);
2349
+ return 0;
2350
+ }
2351
+ }
2352
+
2353
+ /*
2354
+ * Always resolve basedir to the canonicalized absolute
2355
+ * pathname and append a trailing slash, such that
2356
+ * we can reliably check whether files are inside.
2357
+ */
2358
+ if (realpath(targetdir, basedir) == NULL) {
2359
+ if (report_baddir || errno != ENOENT) {
2360
+ exitcode = (int)MANDOCLEVEL_BADARG;
2361
+ say("", "&%s: realpath", targetdir);
2362
+ }
2363
+ *basedir = '\0';
2364
+ return 0;
2365
+ } else if (chdir(basedir) == -1) {
2366
+ if (report_baddir || errno != ENOENT) {
2367
+ exitcode = (int)MANDOCLEVEL_BADARG;
2368
+ say("", "&chdir");
2369
+ }
2370
+ *basedir = '\0';
2371
+ return 0;
2372
+ }
2373
+ chdir_status = 1;
2374
+ basedir_len = strlen(basedir);
2375
+ if (basedir[basedir_len - 1] != '/') {
2376
+ if (basedir_len >= PATH_MAX - 1) {
2377
+ exitcode = (int)MANDOCLEVEL_SYSERR;
2378
+ say("", "Filename too long");
2379
+ *basedir = '\0';
2380
+ basedir_len = 0;
2381
+ return 0;
2382
+ }
2383
+ basedir[basedir_len++] = '/';
2384
+ basedir[basedir_len] = '\0';
2385
+ }
2386
+ return 1;
2387
+ }
2388
+
2389
+ #ifdef READ_ALLOWED_PATH
2390
+ static int
2391
+ read_allowed(const char *candidate)
2392
+ {
2393
+ const char *cp;
2394
+ size_t len;
2395
+
2396
+ for (cp = READ_ALLOWED_PATH;; cp += len) {
2397
+ while (*cp == ':')
2398
+ cp++;
2399
+ if (*cp == '\0')
2400
+ return 0;
2401
+ len = strcspn(cp, ":");
2402
+ if (strncmp(candidate, cp, len) == 0)
2403
+ return 1;
2404
+ }
2405
+ }
2406
+ #endif
2407
+
2408
+ static void
2409
+ say(const char *file, const char *format, ...)
2410
+ {
2411
+ va_list ap;
2412
+ int use_errno;
2413
+
2414
+ if (*basedir != '\0')
2415
+ fprintf(stderr, "%s", basedir);
2416
+ if (*basedir != '\0' && *file != '\0')
2417
+ fputc('/', stderr);
2418
+ if (*file != '\0')
2419
+ fprintf(stderr, "%s", file);
2420
+
2421
+ use_errno = 1;
2422
+ if (format != NULL) {
2423
+ switch (*format) {
2424
+ case '&':
2425
+ format++;
2426
+ break;
2427
+ case '\0':
2428
+ format = NULL;
2429
+ break;
2430
+ default:
2431
+ use_errno = 0;
2432
+ break;
2433
+ }
2434
+ }
2435
+ if (format != NULL) {
2436
+ if (*basedir != '\0' || *file != '\0')
2437
+ fputs(": ", stderr);
2438
+ va_start(ap, format);
2439
+ vfprintf(stderr, format, ap);
2440
+ va_end(ap);
2441
+ }
2442
+ if (use_errno) {
2443
+ if (*basedir != '\0' || *file != '\0' || format != NULL)
2444
+ fputs(": ", stderr);
2445
+ perror(NULL);
2446
+ } else
2447
+ fputc('\n', stderr);
2448
+ }