libarchive-static 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (157) hide show
  1. data/ext/Makefile +6 -0
  2. data/ext/extconf.rb +61 -0
  3. data/ext/libarchive-0.1.1/COPYING.libarchive +60 -0
  4. data/ext/libarchive-0.1.1/LICENSE.libbzip2 +42 -0
  5. data/ext/libarchive-0.1.1/README.txt +143 -0
  6. data/ext/libarchive-0.1.1/ext/Makefile.in +0 -0
  7. data/ext/libarchive-0.1.1/ext/archive_read_support_compression.c +31 -0
  8. data/ext/libarchive-0.1.1/ext/archive_read_support_compression.h +6 -0
  9. data/ext/libarchive-0.1.1/ext/archive_read_support_format.c +32 -0
  10. data/ext/libarchive-0.1.1/ext/archive_read_support_format.h +6 -0
  11. data/ext/libarchive-0.1.1/ext/archive_write_open_rb_str.c +29 -0
  12. data/ext/libarchive-0.1.1/ext/archive_write_open_rb_str.h +6 -0
  13. data/ext/libarchive-0.1.1/ext/archive_write_set_compression.c +32 -0
  14. data/ext/libarchive-0.1.1/ext/archive_write_set_compression.h +6 -0
  15. data/ext/libarchive-0.1.1/ext/config.h.in +22 -0
  16. data/ext/libarchive-0.1.1/ext/configure +3904 -0
  17. data/ext/libarchive-0.1.1/ext/configure.in +11 -0
  18. data/ext/libarchive-0.1.1/ext/depend +19 -0
  19. data/ext/libarchive-0.1.1/ext/extconf.rb +6 -0
  20. data/ext/libarchive-0.1.1/ext/install-sh +250 -0
  21. data/ext/libarchive-0.1.1/ext/libarchive.c +89 -0
  22. data/ext/libarchive-0.1.1/ext/libarchive_archive.c +84 -0
  23. data/ext/libarchive-0.1.1/ext/libarchive_entry.c +1015 -0
  24. data/ext/libarchive-0.1.1/ext/libarchive_internal.h +155 -0
  25. data/ext/libarchive-0.1.1/ext/libarchive_reader.c +328 -0
  26. data/ext/libarchive-0.1.1/ext/libarchive_win32.h +52 -0
  27. data/ext/libarchive-0.1.1/ext/libarchive_writer.c +246 -0
  28. data/ext/libarchive-0.1.1/libarchive.c +1762 -0
  29. data/ext/libarchive-2.8.4/Makefile.in +7076 -0
  30. data/ext/libarchive-2.8.4/build/autoconf/check_stdcall_func.m4 +51 -0
  31. data/ext/libarchive-2.8.4/build/autoconf/compile +143 -0
  32. data/ext/libarchive-2.8.4/build/autoconf/config.guess +1502 -0
  33. data/ext/libarchive-2.8.4/build/autoconf/config.sub +1708 -0
  34. data/ext/libarchive-2.8.4/build/autoconf/depcomp +630 -0
  35. data/ext/libarchive-2.8.4/build/autoconf/install-sh +291 -0
  36. data/ext/libarchive-2.8.4/build/autoconf/la_uid_t.m4 +20 -0
  37. data/ext/libarchive-2.8.4/build/autoconf/ltmain.sh +8406 -0
  38. data/ext/libarchive-2.8.4/build/autoconf/missing +376 -0
  39. data/ext/libarchive-2.8.4/build/pkgconfig/libarchive.pc.in +10 -0
  40. data/ext/libarchive-2.8.4/config.h.in +772 -0
  41. data/ext/libarchive-2.8.4/configure +17916 -0
  42. data/ext/libarchive-2.8.4/libarchive/archive.h +741 -0
  43. data/ext/libarchive-2.8.4/libarchive/archive_check_magic.c +134 -0
  44. data/ext/libarchive-2.8.4/libarchive/archive_crc32.h +66 -0
  45. data/ext/libarchive-2.8.4/libarchive/archive_endian.h +162 -0
  46. data/ext/libarchive-2.8.4/libarchive/archive_entry.c +2202 -0
  47. data/ext/libarchive-2.8.4/libarchive/archive_entry.h +524 -0
  48. data/ext/libarchive-2.8.4/libarchive/archive_entry_copy_bhfi.c +74 -0
  49. data/ext/libarchive-2.8.4/libarchive/archive_entry_copy_stat.c +77 -0
  50. data/ext/libarchive-2.8.4/libarchive/archive_entry_link_resolver.c +405 -0
  51. data/ext/libarchive-2.8.4/libarchive/archive_entry_private.h +184 -0
  52. data/ext/libarchive-2.8.4/libarchive/archive_entry_stat.c +118 -0
  53. data/ext/libarchive-2.8.4/libarchive/archive_entry_strmode.c +87 -0
  54. data/ext/libarchive-2.8.4/libarchive/archive_entry_xattr.c +158 -0
  55. data/ext/libarchive-2.8.4/libarchive/archive_hash.h +281 -0
  56. data/ext/libarchive-2.8.4/libarchive/archive_platform.h +165 -0
  57. data/ext/libarchive-2.8.4/libarchive/archive_private.h +124 -0
  58. data/ext/libarchive-2.8.4/libarchive/archive_read.c +1249 -0
  59. data/ext/libarchive-2.8.4/libarchive/archive_read_data_into_fd.c +93 -0
  60. data/ext/libarchive-2.8.4/libarchive/archive_read_disk.c +198 -0
  61. data/ext/libarchive-2.8.4/libarchive/archive_read_disk_entry_from_file.c +570 -0
  62. data/ext/libarchive-2.8.4/libarchive/archive_read_disk_private.h +62 -0
  63. data/ext/libarchive-2.8.4/libarchive/archive_read_disk_set_standard_lookup.c +303 -0
  64. data/ext/libarchive-2.8.4/libarchive/archive_read_extract.c +182 -0
  65. data/ext/libarchive-2.8.4/libarchive/archive_read_open_fd.c +190 -0
  66. data/ext/libarchive-2.8.4/libarchive/archive_read_open_file.c +165 -0
  67. data/ext/libarchive-2.8.4/libarchive/archive_read_open_filename.c +272 -0
  68. data/ext/libarchive-2.8.4/libarchive/archive_read_open_memory.c +156 -0
  69. data/ext/libarchive-2.8.4/libarchive/archive_read_private.h +199 -0
  70. data/ext/libarchive-2.8.4/libarchive/archive_read_support_compression_all.c +60 -0
  71. data/ext/libarchive-2.8.4/libarchive/archive_read_support_compression_bzip2.c +353 -0
  72. data/ext/libarchive-2.8.4/libarchive/archive_read_support_compression_compress.c +444 -0
  73. data/ext/libarchive-2.8.4/libarchive/archive_read_support_compression_gzip.c +465 -0
  74. data/ext/libarchive-2.8.4/libarchive/archive_read_support_compression_none.c +40 -0
  75. data/ext/libarchive-2.8.4/libarchive/archive_read_support_compression_program.c +459 -0
  76. data/ext/libarchive-2.8.4/libarchive/archive_read_support_compression_rpm.c +287 -0
  77. data/ext/libarchive-2.8.4/libarchive/archive_read_support_compression_uu.c +627 -0
  78. data/ext/libarchive-2.8.4/libarchive/archive_read_support_compression_xz.c +708 -0
  79. data/ext/libarchive-2.8.4/libarchive/archive_read_support_format_all.c +43 -0
  80. data/ext/libarchive-2.8.4/libarchive/archive_read_support_format_ar.c +584 -0
  81. data/ext/libarchive-2.8.4/libarchive/archive_read_support_format_cpio.c +777 -0
  82. data/ext/libarchive-2.8.4/libarchive/archive_read_support_format_empty.c +93 -0
  83. data/ext/libarchive-2.8.4/libarchive/archive_read_support_format_iso9660.c +2830 -0
  84. data/ext/libarchive-2.8.4/libarchive/archive_read_support_format_mtree.c +1304 -0
  85. data/ext/libarchive-2.8.4/libarchive/archive_read_support_format_raw.c +185 -0
  86. data/ext/libarchive-2.8.4/libarchive/archive_read_support_format_tar.c +2418 -0
  87. data/ext/libarchive-2.8.4/libarchive/archive_read_support_format_xar.c +3151 -0
  88. data/ext/libarchive-2.8.4/libarchive/archive_read_support_format_zip.c +903 -0
  89. data/ext/libarchive-2.8.4/libarchive/archive_string.c +453 -0
  90. data/ext/libarchive-2.8.4/libarchive/archive_string.h +148 -0
  91. data/ext/libarchive-2.8.4/libarchive/archive_string_sprintf.c +164 -0
  92. data/ext/libarchive-2.8.4/libarchive/archive_util.c +391 -0
  93. data/ext/libarchive-2.8.4/libarchive/archive_virtual.c +94 -0
  94. data/ext/libarchive-2.8.4/libarchive/archive_windows.c +1236 -0
  95. data/ext/libarchive-2.8.4/libarchive/archive_windows.h +347 -0
  96. data/ext/libarchive-2.8.4/libarchive/archive_write.c +466 -0
  97. data/ext/libarchive-2.8.4/libarchive/archive_write_disk.c +2628 -0
  98. data/ext/libarchive-2.8.4/libarchive/archive_write_disk_private.h +38 -0
  99. data/ext/libarchive-2.8.4/libarchive/archive_write_disk_set_standard_lookup.c +262 -0
  100. data/ext/libarchive-2.8.4/libarchive/archive_write_open_fd.c +141 -0
  101. data/ext/libarchive-2.8.4/libarchive/archive_write_open_file.c +105 -0
  102. data/ext/libarchive-2.8.4/libarchive/archive_write_open_filename.c +162 -0
  103. data/ext/libarchive-2.8.4/libarchive/archive_write_open_memory.c +126 -0
  104. data/ext/libarchive-2.8.4/libarchive/archive_write_private.h +122 -0
  105. data/ext/libarchive-2.8.4/libarchive/archive_write_set_compression_bzip2.c +408 -0
  106. data/ext/libarchive-2.8.4/libarchive/archive_write_set_compression_compress.c +492 -0
  107. data/ext/libarchive-2.8.4/libarchive/archive_write_set_compression_gzip.c +477 -0
  108. data/ext/libarchive-2.8.4/libarchive/archive_write_set_compression_none.c +257 -0
  109. data/ext/libarchive-2.8.4/libarchive/archive_write_set_compression_program.c +347 -0
  110. data/ext/libarchive-2.8.4/libarchive/archive_write_set_compression_xz.c +438 -0
  111. data/ext/libarchive-2.8.4/libarchive/archive_write_set_format.c +72 -0
  112. data/ext/libarchive-2.8.4/libarchive/archive_write_set_format_ar.c +550 -0
  113. data/ext/libarchive-2.8.4/libarchive/archive_write_set_format_by_name.c +76 -0
  114. data/ext/libarchive-2.8.4/libarchive/archive_write_set_format_cpio.c +344 -0
  115. data/ext/libarchive-2.8.4/libarchive/archive_write_set_format_cpio_newc.c +295 -0
  116. data/ext/libarchive-2.8.4/libarchive/archive_write_set_format_mtree.c +1050 -0
  117. data/ext/libarchive-2.8.4/libarchive/archive_write_set_format_pax.c +1386 -0
  118. data/ext/libarchive-2.8.4/libarchive/archive_write_set_format_shar.c +626 -0
  119. data/ext/libarchive-2.8.4/libarchive/archive_write_set_format_ustar.c +587 -0
  120. data/ext/libarchive-2.8.4/libarchive/archive_write_set_format_zip.c +667 -0
  121. data/ext/libarchive-2.8.4/libarchive/config_freebsd.h +154 -0
  122. data/ext/libarchive-2.8.4/libarchive/filter_fork.c +161 -0
  123. data/ext/libarchive-2.8.4/libarchive/filter_fork.h +41 -0
  124. data/ext/libarchive-2.8.4/libarchive/filter_fork_windows.c +113 -0
  125. data/ext/libarchive-static-makefile +80 -0
  126. data/ext/libarchive-static-wrapper-makefile +22 -0
  127. data/ext/zlib-1.2.5/Makefile.in +257 -0
  128. data/ext/zlib-1.2.5/adler32.c +169 -0
  129. data/ext/zlib-1.2.5/compress.c +80 -0
  130. data/ext/zlib-1.2.5/configure +596 -0
  131. data/ext/zlib-1.2.5/crc32.c +442 -0
  132. data/ext/zlib-1.2.5/crc32.h +441 -0
  133. data/ext/zlib-1.2.5/deflate.c +1834 -0
  134. data/ext/zlib-1.2.5/deflate.h +342 -0
  135. data/ext/zlib-1.2.5/example.c +565 -0
  136. data/ext/zlib-1.2.5/gzclose.c +25 -0
  137. data/ext/zlib-1.2.5/gzguts.h +132 -0
  138. data/ext/zlib-1.2.5/gzlib.c +537 -0
  139. data/ext/zlib-1.2.5/gzread.c +653 -0
  140. data/ext/zlib-1.2.5/gzwrite.c +531 -0
  141. data/ext/zlib-1.2.5/infback.c +632 -0
  142. data/ext/zlib-1.2.5/inffast.c +340 -0
  143. data/ext/zlib-1.2.5/inffast.h +11 -0
  144. data/ext/zlib-1.2.5/inffixed.h +94 -0
  145. data/ext/zlib-1.2.5/inflate.c +1480 -0
  146. data/ext/zlib-1.2.5/inflate.h +122 -0
  147. data/ext/zlib-1.2.5/inftrees.c +330 -0
  148. data/ext/zlib-1.2.5/inftrees.h +62 -0
  149. data/ext/zlib-1.2.5/minigzip.c +440 -0
  150. data/ext/zlib-1.2.5/trees.c +1244 -0
  151. data/ext/zlib-1.2.5/trees.h +128 -0
  152. data/ext/zlib-1.2.5/uncompr.c +59 -0
  153. data/ext/zlib-1.2.5/zconf.h +428 -0
  154. data/ext/zlib-1.2.5/zlib.h +1613 -0
  155. data/ext/zlib-1.2.5/zutil.c +318 -0
  156. data/ext/zlib-1.2.5/zutil.h +274 -0
  157. metadata +211 -0
@@ -0,0 +1,2418 @@
1
+ /*-
2
+ * Copyright (c) 2003-2007 Tim Kientzle
3
+ * All rights reserved.
4
+ *
5
+ * Redistribution and use in source and binary forms, with or without
6
+ * modification, are permitted provided that the following conditions
7
+ * are met:
8
+ * 1. Redistributions of source code must retain the above copyright
9
+ * notice, this list of conditions and the following disclaimer.
10
+ * 2. Redistributions in binary form must reproduce the above copyright
11
+ * notice, this list of conditions and the following disclaimer in the
12
+ * documentation and/or other materials provided with the distribution.
13
+ *
14
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
15
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
16
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
17
+ * IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
18
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
19
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
20
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
21
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
23
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24
+ */
25
+
26
+ #include "archive_platform.h"
27
+ __FBSDID("$FreeBSD: head/lib/libarchive/archive_read_support_format_tar.c 201161 2009-12-29 05:44:39Z kientzle $");
28
+
29
+ #ifdef HAVE_ERRNO_H
30
+ #include <errno.h>
31
+ #endif
32
+ #include <stddef.h>
33
+ /* #include <stdint.h> */ /* See archive_platform.h */
34
+ #ifdef HAVE_STDLIB_H
35
+ #include <stdlib.h>
36
+ #endif
37
+ #ifdef HAVE_STRING_H
38
+ #include <string.h>
39
+ #endif
40
+
41
+ /* Obtain suitable wide-character manipulation functions. */
42
+ #ifdef HAVE_WCHAR_H
43
+ #include <wchar.h>
44
+ #else
45
+ /* Good enough for equality testing, which is all we need. */
46
+ static int wcscmp(const wchar_t *s1, const wchar_t *s2)
47
+ {
48
+ int diff = *s1 - *s2;
49
+ while (*s1 && diff == 0)
50
+ diff = (int)*++s1 - (int)*++s2;
51
+ return diff;
52
+ }
53
+ /* Good enough for equality testing, which is all we need. */
54
+ static int wcsncmp(const wchar_t *s1, const wchar_t *s2, size_t n)
55
+ {
56
+ int diff = *s1 - *s2;
57
+ while (*s1 && diff == 0 && n-- > 0)
58
+ diff = (int)*++s1 - (int)*++s2;
59
+ return diff;
60
+ }
61
+ static size_t wcslen(const wchar_t *s)
62
+ {
63
+ const wchar_t *p = s;
64
+ while (*p)
65
+ p++;
66
+ return p - s;
67
+ }
68
+ #endif
69
+
70
+ #include "archive.h"
71
+ #include "archive_entry.h"
72
+ #include "archive_private.h"
73
+ #include "archive_read_private.h"
74
+
75
+ #define tar_min(a,b) ((a) < (b) ? (a) : (b))
76
+
77
+ /*
78
+ * Layout of POSIX 'ustar' tar header.
79
+ */
80
+ struct archive_entry_header_ustar {
81
+ char name[100];
82
+ char mode[8];
83
+ char uid[8];
84
+ char gid[8];
85
+ char size[12];
86
+ char mtime[12];
87
+ char checksum[8];
88
+ char typeflag[1];
89
+ char linkname[100]; /* "old format" header ends here */
90
+ char magic[6]; /* For POSIX: "ustar\0" */
91
+ char version[2]; /* For POSIX: "00" */
92
+ char uname[32];
93
+ char gname[32];
94
+ char rdevmajor[8];
95
+ char rdevminor[8];
96
+ char prefix[155];
97
+ };
98
+
99
+ /*
100
+ * Structure of GNU tar header
101
+ */
102
+ struct gnu_sparse {
103
+ char offset[12];
104
+ char numbytes[12];
105
+ };
106
+
107
+ struct archive_entry_header_gnutar {
108
+ char name[100];
109
+ char mode[8];
110
+ char uid[8];
111
+ char gid[8];
112
+ char size[12];
113
+ char mtime[12];
114
+ char checksum[8];
115
+ char typeflag[1];
116
+ char linkname[100];
117
+ char magic[8]; /* "ustar \0" (note blank/blank/null at end) */
118
+ char uname[32];
119
+ char gname[32];
120
+ char rdevmajor[8];
121
+ char rdevminor[8];
122
+ char atime[12];
123
+ char ctime[12];
124
+ char offset[12];
125
+ char longnames[4];
126
+ char unused[1];
127
+ struct gnu_sparse sparse[4];
128
+ char isextended[1];
129
+ char realsize[12];
130
+ /*
131
+ * Old GNU format doesn't use POSIX 'prefix' field; they use
132
+ * the 'L' (longname) entry instead.
133
+ */
134
+ };
135
+
136
+ /*
137
+ * Data specific to this format.
138
+ */
139
+ struct sparse_block {
140
+ struct sparse_block *next;
141
+ off_t offset;
142
+ off_t remaining;
143
+ };
144
+
145
+ struct tar {
146
+ struct archive_string acl_text;
147
+ struct archive_string entry_pathname;
148
+ /* For "GNU.sparse.name" and other similar path extensions. */
149
+ struct archive_string entry_pathname_override;
150
+ struct archive_string entry_linkpath;
151
+ struct archive_string entry_uname;
152
+ struct archive_string entry_gname;
153
+ struct archive_string longlink;
154
+ struct archive_string longname;
155
+ struct archive_string pax_header;
156
+ struct archive_string pax_global;
157
+ struct archive_string line;
158
+ int pax_hdrcharset_binary;
159
+ wchar_t *pax_entry;
160
+ size_t pax_entry_length;
161
+ int header_recursion_depth;
162
+ int64_t entry_bytes_remaining;
163
+ int64_t entry_offset;
164
+ int64_t entry_padding;
165
+ int64_t realsize;
166
+ struct sparse_block *sparse_list;
167
+ struct sparse_block *sparse_last;
168
+ int64_t sparse_offset;
169
+ int64_t sparse_numbytes;
170
+ int sparse_gnu_major;
171
+ int sparse_gnu_minor;
172
+ char sparse_gnu_pending;
173
+ };
174
+
175
+ static ssize_t UTF8_mbrtowc(wchar_t *pwc, const char *s, size_t n);
176
+ static int archive_block_is_null(const unsigned char *p);
177
+ static char *base64_decode(const char *, size_t, size_t *);
178
+ static void gnu_add_sparse_entry(struct tar *,
179
+ off_t offset, off_t remaining);
180
+ static void gnu_clear_sparse_list(struct tar *);
181
+ static int gnu_sparse_old_read(struct archive_read *, struct tar *,
182
+ const struct archive_entry_header_gnutar *header);
183
+ static void gnu_sparse_old_parse(struct tar *,
184
+ const struct gnu_sparse *sparse, int length);
185
+ static int gnu_sparse_01_parse(struct tar *, const char *);
186
+ static ssize_t gnu_sparse_10_read(struct archive_read *, struct tar *);
187
+ static int header_Solaris_ACL(struct archive_read *, struct tar *,
188
+ struct archive_entry *, const void *);
189
+ static int header_common(struct archive_read *, struct tar *,
190
+ struct archive_entry *, const void *);
191
+ static int header_old_tar(struct archive_read *, struct tar *,
192
+ struct archive_entry *, const void *);
193
+ static int header_pax_extensions(struct archive_read *, struct tar *,
194
+ struct archive_entry *, const void *);
195
+ static int header_pax_global(struct archive_read *, struct tar *,
196
+ struct archive_entry *, const void *h);
197
+ static int header_longlink(struct archive_read *, struct tar *,
198
+ struct archive_entry *, const void *h);
199
+ static int header_longname(struct archive_read *, struct tar *,
200
+ struct archive_entry *, const void *h);
201
+ static int header_volume(struct archive_read *, struct tar *,
202
+ struct archive_entry *, const void *h);
203
+ static int header_ustar(struct archive_read *, struct tar *,
204
+ struct archive_entry *, const void *h);
205
+ static int header_gnutar(struct archive_read *, struct tar *,
206
+ struct archive_entry *, const void *h);
207
+ static int archive_read_format_tar_bid(struct archive_read *);
208
+ static int archive_read_format_tar_cleanup(struct archive_read *);
209
+ static int archive_read_format_tar_read_data(struct archive_read *a,
210
+ const void **buff, size_t *size, off_t *offset);
211
+ static int archive_read_format_tar_skip(struct archive_read *a);
212
+ static int archive_read_format_tar_read_header(struct archive_read *,
213
+ struct archive_entry *);
214
+ static int checksum(struct archive_read *, const void *);
215
+ static int pax_attribute(struct tar *, struct archive_entry *,
216
+ char *key, char *value);
217
+ static int pax_header(struct archive_read *, struct tar *,
218
+ struct archive_entry *, char *attr);
219
+ static void pax_time(const char *, int64_t *sec, long *nanos);
220
+ static ssize_t readline(struct archive_read *, struct tar *, const char **,
221
+ ssize_t limit);
222
+ static int read_body_to_string(struct archive_read *, struct tar *,
223
+ struct archive_string *, const void *h);
224
+ static int64_t tar_atol(const char *, unsigned);
225
+ static int64_t tar_atol10(const char *, unsigned);
226
+ static int64_t tar_atol256(const char *, unsigned);
227
+ static int64_t tar_atol8(const char *, unsigned);
228
+ static int tar_read_header(struct archive_read *, struct tar *,
229
+ struct archive_entry *);
230
+ static int tohex(int c);
231
+ static char *url_decode(const char *);
232
+ static wchar_t *utf8_decode(struct tar *, const char *, size_t length);
233
+
234
+ int
235
+ archive_read_support_format_gnutar(struct archive *a)
236
+ {
237
+ return (archive_read_support_format_tar(a));
238
+ }
239
+
240
+
241
+ int
242
+ archive_read_support_format_tar(struct archive *_a)
243
+ {
244
+ struct archive_read *a = (struct archive_read *)_a;
245
+ struct tar *tar;
246
+ int r;
247
+
248
+ tar = (struct tar *)malloc(sizeof(*tar));
249
+ if (tar == NULL) {
250
+ archive_set_error(&a->archive, ENOMEM,
251
+ "Can't allocate tar data");
252
+ return (ARCHIVE_FATAL);
253
+ }
254
+ memset(tar, 0, sizeof(*tar));
255
+
256
+ r = __archive_read_register_format(a, tar, "tar",
257
+ archive_read_format_tar_bid,
258
+ NULL,
259
+ archive_read_format_tar_read_header,
260
+ archive_read_format_tar_read_data,
261
+ archive_read_format_tar_skip,
262
+ archive_read_format_tar_cleanup);
263
+
264
+ if (r != ARCHIVE_OK)
265
+ free(tar);
266
+ return (ARCHIVE_OK);
267
+ }
268
+
269
+ static int
270
+ archive_read_format_tar_cleanup(struct archive_read *a)
271
+ {
272
+ struct tar *tar;
273
+
274
+ tar = (struct tar *)(a->format->data);
275
+ gnu_clear_sparse_list(tar);
276
+ archive_string_free(&tar->acl_text);
277
+ archive_string_free(&tar->entry_pathname);
278
+ archive_string_free(&tar->entry_pathname_override);
279
+ archive_string_free(&tar->entry_linkpath);
280
+ archive_string_free(&tar->entry_uname);
281
+ archive_string_free(&tar->entry_gname);
282
+ archive_string_free(&tar->line);
283
+ archive_string_free(&tar->pax_global);
284
+ archive_string_free(&tar->pax_header);
285
+ archive_string_free(&tar->longname);
286
+ archive_string_free(&tar->longlink);
287
+ free(tar->pax_entry);
288
+ free(tar);
289
+ (a->format->data) = NULL;
290
+ return (ARCHIVE_OK);
291
+ }
292
+
293
+
294
+ static int
295
+ archive_read_format_tar_bid(struct archive_read *a)
296
+ {
297
+ int bid;
298
+ const void *h;
299
+ const struct archive_entry_header_ustar *header;
300
+
301
+ bid = 0;
302
+
303
+ /* Now let's look at the actual header and see if it matches. */
304
+ h = __archive_read_ahead(a, 512, NULL);
305
+ if (h == NULL)
306
+ return (-1);
307
+
308
+ /* If it's an end-of-archive mark, we can handle it. */
309
+ if ((*(const char *)h) == 0
310
+ && archive_block_is_null((const unsigned char *)h)) {
311
+ /*
312
+ * Usually, I bid the number of bits verified, but
313
+ * in this case, 4096 seems excessive so I picked 10 as
314
+ * an arbitrary but reasonable-seeming value.
315
+ */
316
+ return (10);
317
+ }
318
+
319
+ /* If it's not an end-of-archive mark, it must have a valid checksum.*/
320
+ if (!checksum(a, h))
321
+ return (0);
322
+ bid += 48; /* Checksum is usually 6 octal digits. */
323
+
324
+ header = (const struct archive_entry_header_ustar *)h;
325
+
326
+ /* Recognize POSIX formats. */
327
+ if ((memcmp(header->magic, "ustar\0", 6) == 0)
328
+ &&(memcmp(header->version, "00", 2)==0))
329
+ bid += 56;
330
+
331
+ /* Recognize GNU tar format. */
332
+ if ((memcmp(header->magic, "ustar ", 6) == 0)
333
+ &&(memcmp(header->version, " \0", 2)==0))
334
+ bid += 56;
335
+
336
+ /* Type flag must be null, digit or A-Z, a-z. */
337
+ if (header->typeflag[0] != 0 &&
338
+ !( header->typeflag[0] >= '0' && header->typeflag[0] <= '9') &&
339
+ !( header->typeflag[0] >= 'A' && header->typeflag[0] <= 'Z') &&
340
+ !( header->typeflag[0] >= 'a' && header->typeflag[0] <= 'z') )
341
+ return (0);
342
+ bid += 2; /* 6 bits of variation in an 8-bit field leaves 2 bits. */
343
+
344
+ /* Sanity check: Look at first byte of mode field. */
345
+ switch (255 & (unsigned)header->mode[0]) {
346
+ case 0: case 255:
347
+ /* Base-256 value: No further verification possible! */
348
+ break;
349
+ case ' ': /* Not recommended, but not illegal, either. */
350
+ break;
351
+ case '0': case '1': case '2': case '3':
352
+ case '4': case '5': case '6': case '7':
353
+ /* Octal Value. */
354
+ /* TODO: Check format of remainder of this field. */
355
+ break;
356
+ default:
357
+ /* Not a valid mode; bail out here. */
358
+ return (0);
359
+ }
360
+ /* TODO: Sanity test uid/gid/size/mtime/rdevmajor/rdevminor fields. */
361
+
362
+ return (bid);
363
+ }
364
+
365
+ /*
366
+ * The function invoked by archive_read_header(). This
367
+ * just sets up a few things and then calls the internal
368
+ * tar_read_header() function below.
369
+ */
370
+ static int
371
+ archive_read_format_tar_read_header(struct archive_read *a,
372
+ struct archive_entry *entry)
373
+ {
374
+ /*
375
+ * When converting tar archives to cpio archives, it is
376
+ * essential that each distinct file have a distinct inode
377
+ * number. To simplify this, we keep a static count here to
378
+ * assign fake dev/inode numbers to each tar entry. Note that
379
+ * pax format archives may overwrite this with something more
380
+ * useful.
381
+ *
382
+ * Ideally, we would track every file read from the archive so
383
+ * that we could assign the same dev/ino pair to hardlinks,
384
+ * but the memory required to store a complete lookup table is
385
+ * probably not worthwhile just to support the relatively
386
+ * obscure tar->cpio conversion case.
387
+ */
388
+ static int default_inode;
389
+ static int default_dev;
390
+ struct tar *tar;
391
+ struct sparse_block *sp;
392
+ const char *p;
393
+ int r;
394
+ size_t l;
395
+
396
+ /* Assign default device/inode values. */
397
+ archive_entry_set_dev(entry, 1 + default_dev); /* Don't use zero. */
398
+ archive_entry_set_ino(entry, ++default_inode); /* Don't use zero. */
399
+ /* Limit generated st_ino number to 16 bits. */
400
+ if (default_inode >= 0xffff) {
401
+ ++default_dev;
402
+ default_inode = 0;
403
+ }
404
+
405
+ tar = (struct tar *)(a->format->data);
406
+ tar->entry_offset = 0;
407
+ while (tar->sparse_list != NULL) {
408
+ sp = tar->sparse_list;
409
+ tar->sparse_list = sp->next;
410
+ free(sp);
411
+ }
412
+ tar->sparse_last = NULL;
413
+ tar->realsize = -1; /* Mark this as "unset" */
414
+
415
+ r = tar_read_header(a, tar, entry);
416
+
417
+ /*
418
+ * "non-sparse" files are really just sparse files with
419
+ * a single block.
420
+ */
421
+ if (tar->sparse_list == NULL)
422
+ gnu_add_sparse_entry(tar, 0, tar->entry_bytes_remaining);
423
+
424
+ if (r == ARCHIVE_OK) {
425
+ /*
426
+ * "Regular" entry with trailing '/' is really
427
+ * directory: This is needed for certain old tar
428
+ * variants and even for some broken newer ones.
429
+ */
430
+ p = archive_entry_pathname(entry);
431
+ l = strlen(p);
432
+ if (archive_entry_filetype(entry) == AE_IFREG
433
+ && p[l-1] == '/')
434
+ archive_entry_set_filetype(entry, AE_IFDIR);
435
+ }
436
+ return (r);
437
+ }
438
+
439
+ static int
440
+ archive_read_format_tar_read_data(struct archive_read *a,
441
+ const void **buff, size_t *size, off_t *offset)
442
+ {
443
+ ssize_t bytes_read;
444
+ struct tar *tar;
445
+ struct sparse_block *p;
446
+
447
+ tar = (struct tar *)(a->format->data);
448
+
449
+ if (tar->sparse_gnu_pending) {
450
+ if (tar->sparse_gnu_major == 1 && tar->sparse_gnu_minor == 0) {
451
+ tar->sparse_gnu_pending = 0;
452
+ /* Read initial sparse map. */
453
+ bytes_read = gnu_sparse_10_read(a, tar);
454
+ tar->entry_bytes_remaining -= bytes_read;
455
+ if (bytes_read < 0)
456
+ return (bytes_read);
457
+ } else {
458
+ *size = 0;
459
+ *offset = 0;
460
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
461
+ "Unrecognized GNU sparse file format");
462
+ return (ARCHIVE_WARN);
463
+ }
464
+ tar->sparse_gnu_pending = 0;
465
+ }
466
+
467
+ /* Remove exhausted entries from sparse list. */
468
+ while (tar->sparse_list != NULL &&
469
+ tar->sparse_list->remaining == 0) {
470
+ p = tar->sparse_list;
471
+ tar->sparse_list = p->next;
472
+ free(p);
473
+ }
474
+
475
+ /* If we're at end of file, return EOF. */
476
+ if (tar->sparse_list == NULL || tar->entry_bytes_remaining == 0) {
477
+ if (__archive_read_skip(a, tar->entry_padding) < 0)
478
+ return (ARCHIVE_FATAL);
479
+ tar->entry_padding = 0;
480
+ *buff = NULL;
481
+ *size = 0;
482
+ *offset = tar->realsize;
483
+ return (ARCHIVE_EOF);
484
+ }
485
+
486
+ *buff = __archive_read_ahead(a, 1, &bytes_read);
487
+ if (bytes_read < 0)
488
+ return (ARCHIVE_FATAL);
489
+ if (*buff == NULL) {
490
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
491
+ "Truncated tar archive");
492
+ return (ARCHIVE_FATAL);
493
+ }
494
+ if (bytes_read > tar->entry_bytes_remaining)
495
+ bytes_read = tar->entry_bytes_remaining;
496
+ /* Don't read more than is available in the
497
+ * current sparse block. */
498
+ if (tar->sparse_list->remaining < bytes_read)
499
+ bytes_read = tar->sparse_list->remaining;
500
+ *size = bytes_read;
501
+ *offset = tar->sparse_list->offset;
502
+ tar->sparse_list->remaining -= bytes_read;
503
+ tar->sparse_list->offset += bytes_read;
504
+ tar->entry_bytes_remaining -= bytes_read;
505
+ __archive_read_consume(a, bytes_read);
506
+ return (ARCHIVE_OK);
507
+ }
508
+
509
+ static int
510
+ archive_read_format_tar_skip(struct archive_read *a)
511
+ {
512
+ int64_t bytes_skipped;
513
+ struct tar* tar;
514
+
515
+ tar = (struct tar *)(a->format->data);
516
+
517
+ /*
518
+ * Compression layer skip functions are required to either skip the
519
+ * length requested or fail, so we can rely upon the entire entry
520
+ * plus padding being skipped.
521
+ */
522
+ bytes_skipped = __archive_read_skip(a,
523
+ tar->entry_bytes_remaining + tar->entry_padding);
524
+ if (bytes_skipped < 0)
525
+ return (ARCHIVE_FATAL);
526
+
527
+ tar->entry_bytes_remaining = 0;
528
+ tar->entry_padding = 0;
529
+
530
+ /* Free the sparse list. */
531
+ gnu_clear_sparse_list(tar);
532
+
533
+ return (ARCHIVE_OK);
534
+ }
535
+
536
+ /*
537
+ * This function recursively interprets all of the headers associated
538
+ * with a single entry.
539
+ */
540
+ static int
541
+ tar_read_header(struct archive_read *a, struct tar *tar,
542
+ struct archive_entry *entry)
543
+ {
544
+ ssize_t bytes;
545
+ int err;
546
+ const void *h;
547
+ const struct archive_entry_header_ustar *header;
548
+
549
+ /* Read 512-byte header record */
550
+ h = __archive_read_ahead(a, 512, &bytes);
551
+ if (bytes < 0)
552
+ return (bytes);
553
+ if (bytes < 512) { /* Short read or EOF. */
554
+ /* Try requesting just one byte and see what happens. */
555
+ (void)__archive_read_ahead(a, 1, &bytes);
556
+ if (bytes == 0) {
557
+ /*
558
+ * The archive ends at a 512-byte boundary but
559
+ * without a proper end-of-archive marker.
560
+ * Yes, there are tar writers that do this;
561
+ * hold our nose and accept it.
562
+ */
563
+ return (ARCHIVE_EOF);
564
+ }
565
+ /* Archive ends with a partial block; this is bad. */
566
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
567
+ "Truncated tar archive");
568
+ return (ARCHIVE_FATAL);
569
+ }
570
+ __archive_read_consume(a, 512);
571
+
572
+
573
+ /* Check for end-of-archive mark. */
574
+ if (((*(const char *)h)==0) && archive_block_is_null((const unsigned char *)h)) {
575
+ /* Try to consume a second all-null record, as well. */
576
+ h = __archive_read_ahead(a, 512, NULL);
577
+ if (h != NULL)
578
+ __archive_read_consume(a, 512);
579
+ archive_set_error(&a->archive, 0, NULL);
580
+ if (a->archive.archive_format_name == NULL) {
581
+ a->archive.archive_format = ARCHIVE_FORMAT_TAR;
582
+ a->archive.archive_format_name = "tar";
583
+ }
584
+ return (ARCHIVE_EOF);
585
+ }
586
+
587
+ /*
588
+ * Note: If the checksum fails and we return ARCHIVE_RETRY,
589
+ * then the client is likely to just retry. This is a very
590
+ * crude way to search for the next valid header!
591
+ *
592
+ * TODO: Improve this by implementing a real header scan.
593
+ */
594
+ if (!checksum(a, h)) {
595
+ archive_set_error(&a->archive, EINVAL, "Damaged tar archive");
596
+ return (ARCHIVE_RETRY); /* Retryable: Invalid header */
597
+ }
598
+
599
+ if (++tar->header_recursion_depth > 32) {
600
+ archive_set_error(&a->archive, EINVAL, "Too many special headers");
601
+ return (ARCHIVE_WARN);
602
+ }
603
+
604
+ /* Determine the format variant. */
605
+ header = (const struct archive_entry_header_ustar *)h;
606
+ switch(header->typeflag[0]) {
607
+ case 'A': /* Solaris tar ACL */
608
+ a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
609
+ a->archive.archive_format_name = "Solaris tar";
610
+ err = header_Solaris_ACL(a, tar, entry, h);
611
+ break;
612
+ case 'g': /* POSIX-standard 'g' header. */
613
+ a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
614
+ a->archive.archive_format_name = "POSIX pax interchange format";
615
+ err = header_pax_global(a, tar, entry, h);
616
+ break;
617
+ case 'K': /* Long link name (GNU tar, others) */
618
+ err = header_longlink(a, tar, entry, h);
619
+ break;
620
+ case 'L': /* Long filename (GNU tar, others) */
621
+ err = header_longname(a, tar, entry, h);
622
+ break;
623
+ case 'V': /* GNU volume header */
624
+ err = header_volume(a, tar, entry, h);
625
+ break;
626
+ case 'X': /* Used by SUN tar; same as 'x'. */
627
+ a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
628
+ a->archive.archive_format_name =
629
+ "POSIX pax interchange format (Sun variant)";
630
+ err = header_pax_extensions(a, tar, entry, h);
631
+ break;
632
+ case 'x': /* POSIX-standard 'x' header. */
633
+ a->archive.archive_format = ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE;
634
+ a->archive.archive_format_name = "POSIX pax interchange format";
635
+ err = header_pax_extensions(a, tar, entry, h);
636
+ break;
637
+ default:
638
+ if (memcmp(header->magic, "ustar \0", 8) == 0) {
639
+ a->archive.archive_format = ARCHIVE_FORMAT_TAR_GNUTAR;
640
+ a->archive.archive_format_name = "GNU tar format";
641
+ err = header_gnutar(a, tar, entry, h);
642
+ } else if (memcmp(header->magic, "ustar", 5) == 0) {
643
+ if (a->archive.archive_format != ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) {
644
+ a->archive.archive_format = ARCHIVE_FORMAT_TAR_USTAR;
645
+ a->archive.archive_format_name = "POSIX ustar format";
646
+ }
647
+ err = header_ustar(a, tar, entry, h);
648
+ } else {
649
+ a->archive.archive_format = ARCHIVE_FORMAT_TAR;
650
+ a->archive.archive_format_name = "tar (non-POSIX)";
651
+ err = header_old_tar(a, tar, entry, h);
652
+ }
653
+ }
654
+ --tar->header_recursion_depth;
655
+ /* We return warnings or success as-is. Anything else is fatal. */
656
+ if (err == ARCHIVE_WARN || err == ARCHIVE_OK)
657
+ return (err);
658
+ if (err == ARCHIVE_EOF)
659
+ /* EOF when recursively reading a header is bad. */
660
+ archive_set_error(&a->archive, EINVAL, "Damaged tar archive");
661
+ return (ARCHIVE_FATAL);
662
+ }
663
+
664
+ /*
665
+ * Return true if block checksum is correct.
666
+ */
667
+ static int
668
+ checksum(struct archive_read *a, const void *h)
669
+ {
670
+ const unsigned char *bytes;
671
+ const struct archive_entry_header_ustar *header;
672
+ int check, i, sum;
673
+
674
+ (void)a; /* UNUSED */
675
+ bytes = (const unsigned char *)h;
676
+ header = (const struct archive_entry_header_ustar *)h;
677
+
678
+ /*
679
+ * Test the checksum. Note that POSIX specifies _unsigned_
680
+ * bytes for this calculation.
681
+ */
682
+ sum = tar_atol(header->checksum, sizeof(header->checksum));
683
+ check = 0;
684
+ for (i = 0; i < 148; i++)
685
+ check += (unsigned char)bytes[i];
686
+ for (; i < 156; i++)
687
+ check += 32;
688
+ for (; i < 512; i++)
689
+ check += (unsigned char)bytes[i];
690
+ if (sum == check)
691
+ return (1);
692
+
693
+ /*
694
+ * Repeat test with _signed_ bytes, just in case this archive
695
+ * was created by an old BSD, Solaris, or HP-UX tar with a
696
+ * broken checksum calculation.
697
+ */
698
+ check = 0;
699
+ for (i = 0; i < 148; i++)
700
+ check += (signed char)bytes[i];
701
+ for (; i < 156; i++)
702
+ check += 32;
703
+ for (; i < 512; i++)
704
+ check += (signed char)bytes[i];
705
+ if (sum == check)
706
+ return (1);
707
+
708
+ return (0);
709
+ }
710
+
711
+ /*
712
+ * Return true if this block contains only nulls.
713
+ */
714
+ static int
715
+ archive_block_is_null(const unsigned char *p)
716
+ {
717
+ unsigned i;
718
+
719
+ for (i = 0; i < 512; i++)
720
+ if (*p++)
721
+ return (0);
722
+ return (1);
723
+ }
724
+
725
+ /*
726
+ * Interpret 'A' Solaris ACL header
727
+ */
728
+ static int
729
+ header_Solaris_ACL(struct archive_read *a, struct tar *tar,
730
+ struct archive_entry *entry, const void *h)
731
+ {
732
+ const struct archive_entry_header_ustar *header;
733
+ size_t size;
734
+ int err;
735
+ int64_t type;
736
+ char *acl, *p;
737
+ wchar_t *wp;
738
+
739
+ /*
740
+ * read_body_to_string adds a NUL terminator, but we need a little
741
+ * more to make sure that we don't overrun acl_text later.
742
+ */
743
+ header = (const struct archive_entry_header_ustar *)h;
744
+ size = tar_atol(header->size, sizeof(header->size));
745
+ err = read_body_to_string(a, tar, &(tar->acl_text), h);
746
+ if (err != ARCHIVE_OK)
747
+ return (err);
748
+ /* Recursively read next header */
749
+ err = tar_read_header(a, tar, entry);
750
+ if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
751
+ return (err);
752
+
753
+ /* TODO: Examine the first characters to see if this
754
+ * is an AIX ACL descriptor. We'll likely never support
755
+ * them, but it would be polite to recognize and warn when
756
+ * we do see them. */
757
+
758
+ /* Leading octal number indicates ACL type and number of entries. */
759
+ p = acl = tar->acl_text.s;
760
+ type = 0;
761
+ while (*p != '\0' && p < acl + size) {
762
+ if (*p < '0' || *p > '7') {
763
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
764
+ "Malformed Solaris ACL attribute (invalid digit)");
765
+ return(ARCHIVE_WARN);
766
+ }
767
+ type <<= 3;
768
+ type += *p - '0';
769
+ if (type > 077777777) {
770
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
771
+ "Malformed Solaris ACL attribute (count too large)");
772
+ return (ARCHIVE_WARN);
773
+ }
774
+ p++;
775
+ }
776
+ switch ((int)type & ~0777777) {
777
+ case 01000000:
778
+ /* POSIX.1e ACL */
779
+ break;
780
+ case 03000000:
781
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
782
+ "Solaris NFSv4 ACLs not supported");
783
+ return (ARCHIVE_WARN);
784
+ default:
785
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
786
+ "Malformed Solaris ACL attribute (unsupported type %o)",
787
+ (int)type);
788
+ return (ARCHIVE_WARN);
789
+ }
790
+ p++;
791
+
792
+ if (p >= acl + size) {
793
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
794
+ "Malformed Solaris ACL attribute (body overflow)");
795
+ return(ARCHIVE_WARN);
796
+ }
797
+
798
+ /* ACL text is null-terminated; find the end. */
799
+ size -= (p - acl);
800
+ acl = p;
801
+
802
+ while (*p != '\0' && p < acl + size)
803
+ p++;
804
+
805
+ wp = utf8_decode(tar, acl, p - acl);
806
+ err = __archive_entry_acl_parse_w(entry, wp,
807
+ ARCHIVE_ENTRY_ACL_TYPE_ACCESS);
808
+ if (err != ARCHIVE_OK)
809
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
810
+ "Malformed Solaris ACL attribute (unparsable)");
811
+ return (err);
812
+ }
813
+
814
+ /*
815
+ * Interpret 'K' long linkname header.
816
+ */
817
+ static int
818
+ header_longlink(struct archive_read *a, struct tar *tar,
819
+ struct archive_entry *entry, const void *h)
820
+ {
821
+ int err;
822
+
823
+ err = read_body_to_string(a, tar, &(tar->longlink), h);
824
+ if (err != ARCHIVE_OK)
825
+ return (err);
826
+ err = tar_read_header(a, tar, entry);
827
+ if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
828
+ return (err);
829
+ /* Set symlink if symlink already set, else hardlink. */
830
+ archive_entry_copy_link(entry, tar->longlink.s);
831
+ return (ARCHIVE_OK);
832
+ }
833
+
834
+ /*
835
+ * Interpret 'L' long filename header.
836
+ */
837
+ static int
838
+ header_longname(struct archive_read *a, struct tar *tar,
839
+ struct archive_entry *entry, const void *h)
840
+ {
841
+ int err;
842
+
843
+ err = read_body_to_string(a, tar, &(tar->longname), h);
844
+ if (err != ARCHIVE_OK)
845
+ return (err);
846
+ /* Read and parse "real" header, then override name. */
847
+ err = tar_read_header(a, tar, entry);
848
+ if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
849
+ return (err);
850
+ archive_entry_copy_pathname(entry, tar->longname.s);
851
+ return (ARCHIVE_OK);
852
+ }
853
+
854
+
855
+ /*
856
+ * Interpret 'V' GNU tar volume header.
857
+ */
858
+ static int
859
+ header_volume(struct archive_read *a, struct tar *tar,
860
+ struct archive_entry *entry, const void *h)
861
+ {
862
+ (void)h;
863
+
864
+ /* Just skip this and read the next header. */
865
+ return (tar_read_header(a, tar, entry));
866
+ }
867
+
868
+ /*
869
+ * Read body of an archive entry into an archive_string object.
870
+ */
871
+ static int
872
+ read_body_to_string(struct archive_read *a, struct tar *tar,
873
+ struct archive_string *as, const void *h)
874
+ {
875
+ off_t size, padded_size;
876
+ const struct archive_entry_header_ustar *header;
877
+ const void *src;
878
+
879
+ (void)tar; /* UNUSED */
880
+ header = (const struct archive_entry_header_ustar *)h;
881
+ size = tar_atol(header->size, sizeof(header->size));
882
+ if ((size > 1048576) || (size < 0)) {
883
+ archive_set_error(&a->archive, EINVAL,
884
+ "Special header too large");
885
+ return (ARCHIVE_FATAL);
886
+ }
887
+
888
+ /* Fail if we can't make our buffer big enough. */
889
+ if (archive_string_ensure(as, size+1) == NULL) {
890
+ archive_set_error(&a->archive, ENOMEM,
891
+ "No memory");
892
+ return (ARCHIVE_FATAL);
893
+ }
894
+
895
+ /* Read the body into the string. */
896
+ padded_size = (size + 511) & ~ 511;
897
+ src = __archive_read_ahead(a, padded_size, NULL);
898
+ if (src == NULL)
899
+ return (ARCHIVE_FATAL);
900
+ memcpy(as->s, src, size);
901
+ __archive_read_consume(a, padded_size);
902
+ as->s[size] = '\0';
903
+ return (ARCHIVE_OK);
904
+ }
905
+
906
+ /*
907
+ * Parse out common header elements.
908
+ *
909
+ * This would be the same as header_old_tar, except that the
910
+ * filename is handled slightly differently for old and POSIX
911
+ * entries (POSIX entries support a 'prefix'). This factoring
912
+ * allows header_old_tar and header_ustar
913
+ * to handle filenames differently, while still putting most of the
914
+ * common parsing into one place.
915
+ */
916
+ static int
917
+ header_common(struct archive_read *a, struct tar *tar,
918
+ struct archive_entry *entry, const void *h)
919
+ {
920
+ const struct archive_entry_header_ustar *header;
921
+ char tartype;
922
+
923
+ (void)a; /* UNUSED */
924
+
925
+ header = (const struct archive_entry_header_ustar *)h;
926
+ if (header->linkname[0])
927
+ archive_strncpy(&(tar->entry_linkpath), header->linkname,
928
+ sizeof(header->linkname));
929
+ else
930
+ archive_string_empty(&(tar->entry_linkpath));
931
+
932
+ /* Parse out the numeric fields (all are octal) */
933
+ archive_entry_set_mode(entry, tar_atol(header->mode, sizeof(header->mode)));
934
+ archive_entry_set_uid(entry, tar_atol(header->uid, sizeof(header->uid)));
935
+ archive_entry_set_gid(entry, tar_atol(header->gid, sizeof(header->gid)));
936
+ tar->entry_bytes_remaining = tar_atol(header->size, sizeof(header->size));
937
+ tar->realsize = tar->entry_bytes_remaining;
938
+ archive_entry_set_size(entry, tar->entry_bytes_remaining);
939
+ archive_entry_set_mtime(entry, tar_atol(header->mtime, sizeof(header->mtime)), 0);
940
+
941
+ /* Handle the tar type flag appropriately. */
942
+ tartype = header->typeflag[0];
943
+
944
+ switch (tartype) {
945
+ case '1': /* Hard link */
946
+ archive_entry_copy_hardlink(entry, tar->entry_linkpath.s);
947
+ /*
948
+ * The following may seem odd, but: Technically, tar
949
+ * does not store the file type for a "hard link"
950
+ * entry, only the fact that it is a hard link. So, I
951
+ * leave the type zero normally. But, pax interchange
952
+ * format allows hard links to have data, which
953
+ * implies that the underlying entry is a regular
954
+ * file.
955
+ */
956
+ if (archive_entry_size(entry) > 0)
957
+ archive_entry_set_filetype(entry, AE_IFREG);
958
+
959
+ /*
960
+ * A tricky point: Traditionally, tar readers have
961
+ * ignored the size field when reading hardlink
962
+ * entries, and some writers put non-zero sizes even
963
+ * though the body is empty. POSIX blessed this
964
+ * convention in the 1988 standard, but broke with
965
+ * this tradition in 2001 by permitting hardlink
966
+ * entries to store valid bodies in pax interchange
967
+ * format, but not in ustar format. Since there is no
968
+ * hard and fast way to distinguish pax interchange
969
+ * from earlier archives (the 'x' and 'g' entries are
970
+ * optional, after all), we need a heuristic.
971
+ */
972
+ if (archive_entry_size(entry) == 0) {
973
+ /* If the size is already zero, we're done. */
974
+ } else if (a->archive.archive_format
975
+ == ARCHIVE_FORMAT_TAR_PAX_INTERCHANGE) {
976
+ /* Definitely pax extended; must obey hardlink size. */
977
+ } else if (a->archive.archive_format == ARCHIVE_FORMAT_TAR
978
+ || a->archive.archive_format == ARCHIVE_FORMAT_TAR_GNUTAR)
979
+ {
980
+ /* Old-style or GNU tar: we must ignore the size. */
981
+ archive_entry_set_size(entry, 0);
982
+ tar->entry_bytes_remaining = 0;
983
+ } else if (archive_read_format_tar_bid(a) > 50) {
984
+ /*
985
+ * We don't know if it's pax: If the bid
986
+ * function sees a valid ustar header
987
+ * immediately following, then let's ignore
988
+ * the hardlink size.
989
+ */
990
+ archive_entry_set_size(entry, 0);
991
+ tar->entry_bytes_remaining = 0;
992
+ }
993
+ /*
994
+ * TODO: There are still two cases I'd like to handle:
995
+ * = a ustar non-pax archive with a hardlink entry at
996
+ * end-of-archive. (Look for block of nulls following?)
997
+ * = a pax archive that has not seen any pax headers
998
+ * and has an entry which is a hardlink entry storing
999
+ * a body containing an uncompressed tar archive.
1000
+ * The first is worth addressing; I don't see any reliable
1001
+ * way to deal with the second possibility.
1002
+ */
1003
+ break;
1004
+ case '2': /* Symlink */
1005
+ archive_entry_set_filetype(entry, AE_IFLNK);
1006
+ archive_entry_set_size(entry, 0);
1007
+ tar->entry_bytes_remaining = 0;
1008
+ archive_entry_copy_symlink(entry, tar->entry_linkpath.s);
1009
+ break;
1010
+ case '3': /* Character device */
1011
+ archive_entry_set_filetype(entry, AE_IFCHR);
1012
+ archive_entry_set_size(entry, 0);
1013
+ tar->entry_bytes_remaining = 0;
1014
+ break;
1015
+ case '4': /* Block device */
1016
+ archive_entry_set_filetype(entry, AE_IFBLK);
1017
+ archive_entry_set_size(entry, 0);
1018
+ tar->entry_bytes_remaining = 0;
1019
+ break;
1020
+ case '5': /* Dir */
1021
+ archive_entry_set_filetype(entry, AE_IFDIR);
1022
+ archive_entry_set_size(entry, 0);
1023
+ tar->entry_bytes_remaining = 0;
1024
+ break;
1025
+ case '6': /* FIFO device */
1026
+ archive_entry_set_filetype(entry, AE_IFIFO);
1027
+ archive_entry_set_size(entry, 0);
1028
+ tar->entry_bytes_remaining = 0;
1029
+ break;
1030
+ case 'D': /* GNU incremental directory type */
1031
+ /*
1032
+ * No special handling is actually required here.
1033
+ * It might be nice someday to preprocess the file list and
1034
+ * provide it to the client, though.
1035
+ */
1036
+ archive_entry_set_filetype(entry, AE_IFDIR);
1037
+ break;
1038
+ case 'M': /* GNU "Multi-volume" (remainder of file from last archive)*/
1039
+ /*
1040
+ * As far as I can tell, this is just like a regular file
1041
+ * entry, except that the contents should be _appended_ to
1042
+ * the indicated file at the indicated offset. This may
1043
+ * require some API work to fully support.
1044
+ */
1045
+ break;
1046
+ case 'N': /* Old GNU "long filename" entry. */
1047
+ /* The body of this entry is a script for renaming
1048
+ * previously-extracted entries. Ugh. It will never
1049
+ * be supported by libarchive. */
1050
+ archive_entry_set_filetype(entry, AE_IFREG);
1051
+ break;
1052
+ case 'S': /* GNU sparse files */
1053
+ /*
1054
+ * Sparse files are really just regular files with
1055
+ * sparse information in the extended area.
1056
+ */
1057
+ /* FALLTHROUGH */
1058
+ default: /* Regular file and non-standard types */
1059
+ /*
1060
+ * Per POSIX: non-recognized types should always be
1061
+ * treated as regular files.
1062
+ */
1063
+ archive_entry_set_filetype(entry, AE_IFREG);
1064
+ break;
1065
+ }
1066
+ return (0);
1067
+ }
1068
+
1069
+ /*
1070
+ * Parse out header elements for "old-style" tar archives.
1071
+ */
1072
+ static int
1073
+ header_old_tar(struct archive_read *a, struct tar *tar,
1074
+ struct archive_entry *entry, const void *h)
1075
+ {
1076
+ const struct archive_entry_header_ustar *header;
1077
+
1078
+ /* Copy filename over (to ensure null termination). */
1079
+ header = (const struct archive_entry_header_ustar *)h;
1080
+ archive_strncpy(&(tar->entry_pathname), header->name, sizeof(header->name));
1081
+ archive_entry_copy_pathname(entry, tar->entry_pathname.s);
1082
+
1083
+ /* Grab rest of common fields */
1084
+ header_common(a, tar, entry, h);
1085
+
1086
+ tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1087
+ return (0);
1088
+ }
1089
+
1090
+ /*
1091
+ * Parse a file header for a pax extended archive entry.
1092
+ */
1093
+ static int
1094
+ header_pax_global(struct archive_read *a, struct tar *tar,
1095
+ struct archive_entry *entry, const void *h)
1096
+ {
1097
+ int err;
1098
+
1099
+ err = read_body_to_string(a, tar, &(tar->pax_global), h);
1100
+ if (err != ARCHIVE_OK)
1101
+ return (err);
1102
+ err = tar_read_header(a, tar, entry);
1103
+ return (err);
1104
+ }
1105
+
1106
+ static int
1107
+ header_pax_extensions(struct archive_read *a, struct tar *tar,
1108
+ struct archive_entry *entry, const void *h)
1109
+ {
1110
+ int err, err2;
1111
+
1112
+ err = read_body_to_string(a, tar, &(tar->pax_header), h);
1113
+ if (err != ARCHIVE_OK)
1114
+ return (err);
1115
+
1116
+ /* Parse the next header. */
1117
+ err = tar_read_header(a, tar, entry);
1118
+ if ((err != ARCHIVE_OK) && (err != ARCHIVE_WARN))
1119
+ return (err);
1120
+
1121
+ /*
1122
+ * TODO: Parse global/default options into 'entry' struct here
1123
+ * before handling file-specific options.
1124
+ *
1125
+ * This design (parse standard header, then overwrite with pax
1126
+ * extended attribute data) usually works well, but isn't ideal;
1127
+ * it would be better to parse the pax extended attributes first
1128
+ * and then skip any fields in the standard header that were
1129
+ * defined in the pax header.
1130
+ */
1131
+ err2 = pax_header(a, tar, entry, tar->pax_header.s);
1132
+ err = err_combine(err, err2);
1133
+ tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1134
+ return (err);
1135
+ }
1136
+
1137
+
1138
+ /*
1139
+ * Parse a file header for a Posix "ustar" archive entry. This also
1140
+ * handles "pax" or "extended ustar" entries.
1141
+ */
1142
+ static int
1143
+ header_ustar(struct archive_read *a, struct tar *tar,
1144
+ struct archive_entry *entry, const void *h)
1145
+ {
1146
+ const struct archive_entry_header_ustar *header;
1147
+ struct archive_string *as;
1148
+
1149
+ header = (const struct archive_entry_header_ustar *)h;
1150
+
1151
+ /* Copy name into an internal buffer to ensure null-termination. */
1152
+ as = &(tar->entry_pathname);
1153
+ if (header->prefix[0]) {
1154
+ archive_strncpy(as, header->prefix, sizeof(header->prefix));
1155
+ if (as->s[archive_strlen(as) - 1] != '/')
1156
+ archive_strappend_char(as, '/');
1157
+ archive_strncat(as, header->name, sizeof(header->name));
1158
+ } else
1159
+ archive_strncpy(as, header->name, sizeof(header->name));
1160
+
1161
+ archive_entry_copy_pathname(entry, as->s);
1162
+
1163
+ /* Handle rest of common fields. */
1164
+ header_common(a, tar, entry, h);
1165
+
1166
+ /* Handle POSIX ustar fields. */
1167
+ archive_strncpy(&(tar->entry_uname), header->uname,
1168
+ sizeof(header->uname));
1169
+ archive_entry_copy_uname(entry, tar->entry_uname.s);
1170
+
1171
+ archive_strncpy(&(tar->entry_gname), header->gname,
1172
+ sizeof(header->gname));
1173
+ archive_entry_copy_gname(entry, tar->entry_gname.s);
1174
+
1175
+ /* Parse out device numbers only for char and block specials. */
1176
+ if (header->typeflag[0] == '3' || header->typeflag[0] == '4') {
1177
+ archive_entry_set_rdevmajor(entry,
1178
+ tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
1179
+ archive_entry_set_rdevminor(entry,
1180
+ tar_atol(header->rdevminor, sizeof(header->rdevminor)));
1181
+ }
1182
+
1183
+ tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1184
+
1185
+ return (0);
1186
+ }
1187
+
1188
+
1189
+ /*
1190
+ * Parse the pax extended attributes record.
1191
+ *
1192
+ * Returns non-zero if there's an error in the data.
1193
+ */
1194
+ static int
1195
+ pax_header(struct archive_read *a, struct tar *tar,
1196
+ struct archive_entry *entry, char *attr)
1197
+ {
1198
+ size_t attr_length, l, line_length;
1199
+ char *p;
1200
+ char *key, *value;
1201
+ int err, err2;
1202
+
1203
+ attr_length = strlen(attr);
1204
+ tar->pax_hdrcharset_binary = 0;
1205
+ archive_string_empty(&(tar->entry_gname));
1206
+ archive_string_empty(&(tar->entry_linkpath));
1207
+ archive_string_empty(&(tar->entry_pathname));
1208
+ archive_string_empty(&(tar->entry_pathname_override));
1209
+ archive_string_empty(&(tar->entry_uname));
1210
+ err = ARCHIVE_OK;
1211
+ while (attr_length > 0) {
1212
+ /* Parse decimal length field at start of line. */
1213
+ line_length = 0;
1214
+ l = attr_length;
1215
+ p = attr; /* Record start of line. */
1216
+ while (l>0) {
1217
+ if (*p == ' ') {
1218
+ p++;
1219
+ l--;
1220
+ break;
1221
+ }
1222
+ if (*p < '0' || *p > '9') {
1223
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1224
+ "Ignoring malformed pax extended attributes");
1225
+ return (ARCHIVE_WARN);
1226
+ }
1227
+ line_length *= 10;
1228
+ line_length += *p - '0';
1229
+ if (line_length > 999999) {
1230
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1231
+ "Rejecting pax extended attribute > 1MB");
1232
+ return (ARCHIVE_WARN);
1233
+ }
1234
+ p++;
1235
+ l--;
1236
+ }
1237
+
1238
+ /*
1239
+ * Parsed length must be no bigger than available data,
1240
+ * at least 1, and the last character of the line must
1241
+ * be '\n'.
1242
+ */
1243
+ if (line_length > attr_length
1244
+ || line_length < 1
1245
+ || attr[line_length - 1] != '\n')
1246
+ {
1247
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1248
+ "Ignoring malformed pax extended attribute");
1249
+ return (ARCHIVE_WARN);
1250
+ }
1251
+
1252
+ /* Null-terminate the line. */
1253
+ attr[line_length - 1] = '\0';
1254
+
1255
+ /* Find end of key and null terminate it. */
1256
+ key = p;
1257
+ if (key[0] == '=')
1258
+ return (-1);
1259
+ while (*p && *p != '=')
1260
+ ++p;
1261
+ if (*p == '\0') {
1262
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_MISC,
1263
+ "Invalid pax extended attributes");
1264
+ return (ARCHIVE_WARN);
1265
+ }
1266
+ *p = '\0';
1267
+
1268
+ /* Identify null-terminated 'value' portion. */
1269
+ value = p + 1;
1270
+
1271
+ /* Identify this attribute and set it in the entry. */
1272
+ err2 = pax_attribute(tar, entry, key, value);
1273
+ err = err_combine(err, err2);
1274
+
1275
+ /* Skip to next line */
1276
+ attr += line_length;
1277
+ attr_length -= line_length;
1278
+ }
1279
+ if (archive_strlen(&(tar->entry_gname)) > 0) {
1280
+ value = tar->entry_gname.s;
1281
+ if (tar->pax_hdrcharset_binary)
1282
+ archive_entry_copy_gname(entry, value);
1283
+ else {
1284
+ if (!archive_entry_update_gname_utf8(entry, value)) {
1285
+ err = ARCHIVE_WARN;
1286
+ archive_set_error(&a->archive,
1287
+ ARCHIVE_ERRNO_FILE_FORMAT,
1288
+ "Gname in pax header can't "
1289
+ "be converted to current locale.");
1290
+ }
1291
+ }
1292
+ }
1293
+ if (archive_strlen(&(tar->entry_linkpath)) > 0) {
1294
+ value = tar->entry_linkpath.s;
1295
+ if (tar->pax_hdrcharset_binary)
1296
+ archive_entry_copy_link(entry, value);
1297
+ else {
1298
+ if (!archive_entry_update_link_utf8(entry, value)) {
1299
+ err = ARCHIVE_WARN;
1300
+ archive_set_error(&a->archive,
1301
+ ARCHIVE_ERRNO_FILE_FORMAT,
1302
+ "Linkname in pax header can't "
1303
+ "be converted to current locale.");
1304
+ }
1305
+ }
1306
+ }
1307
+ /*
1308
+ * Some extensions (such as the GNU sparse file extensions)
1309
+ * deliberately store a synthetic name under the regular 'path'
1310
+ * attribute and the real file name under a different attribute.
1311
+ * Since we're supposed to not care about the order, we
1312
+ * have no choice but to store all of the various filenames
1313
+ * we find and figure it all out afterwards. This is the
1314
+ * figuring out part.
1315
+ */
1316
+ value = NULL;
1317
+ if (archive_strlen(&(tar->entry_pathname_override)) > 0)
1318
+ value = tar->entry_pathname_override.s;
1319
+ else if (archive_strlen(&(tar->entry_pathname)) > 0)
1320
+ value = tar->entry_pathname.s;
1321
+ if (value != NULL) {
1322
+ if (tar->pax_hdrcharset_binary)
1323
+ archive_entry_copy_pathname(entry, value);
1324
+ else {
1325
+ if (!archive_entry_update_pathname_utf8(entry, value)) {
1326
+ err = ARCHIVE_WARN;
1327
+ archive_set_error(&a->archive,
1328
+ ARCHIVE_ERRNO_FILE_FORMAT,
1329
+ "Pathname in pax header can't be "
1330
+ "converted to current locale.");
1331
+ }
1332
+ }
1333
+ }
1334
+ if (archive_strlen(&(tar->entry_uname)) > 0) {
1335
+ value = tar->entry_uname.s;
1336
+ if (tar->pax_hdrcharset_binary)
1337
+ archive_entry_copy_uname(entry, value);
1338
+ else {
1339
+ if (!archive_entry_update_uname_utf8(entry, value)) {
1340
+ err = ARCHIVE_WARN;
1341
+ archive_set_error(&a->archive,
1342
+ ARCHIVE_ERRNO_FILE_FORMAT,
1343
+ "Uname in pax header can't "
1344
+ "be converted to current locale.");
1345
+ }
1346
+ }
1347
+ }
1348
+ return (err);
1349
+ }
1350
+
1351
+ static int
1352
+ pax_attribute_xattr(struct archive_entry *entry,
1353
+ char *name, char *value)
1354
+ {
1355
+ char *name_decoded;
1356
+ void *value_decoded;
1357
+ size_t value_len;
1358
+
1359
+ if (strlen(name) < 18 || (strncmp(name, "LIBARCHIVE.xattr.", 17)) != 0)
1360
+ return 3;
1361
+
1362
+ name += 17;
1363
+
1364
+ /* URL-decode name */
1365
+ name_decoded = url_decode(name);
1366
+ if (name_decoded == NULL)
1367
+ return 2;
1368
+
1369
+ /* Base-64 decode value */
1370
+ value_decoded = base64_decode(value, strlen(value), &value_len);
1371
+ if (value_decoded == NULL) {
1372
+ free(name_decoded);
1373
+ return 1;
1374
+ }
1375
+
1376
+ archive_entry_xattr_add_entry(entry, name_decoded,
1377
+ value_decoded, value_len);
1378
+
1379
+ free(name_decoded);
1380
+ free(value_decoded);
1381
+ return 0;
1382
+ }
1383
+
1384
+ /*
1385
+ * Parse a single key=value attribute. key/value pointers are
1386
+ * assumed to point into reasonably long-lived storage.
1387
+ *
1388
+ * Note that POSIX reserves all-lowercase keywords. Vendor-specific
1389
+ * extensions should always have keywords of the form "VENDOR.attribute"
1390
+ * In particular, it's quite feasible to support many different
1391
+ * vendor extensions here. I'm using "LIBARCHIVE" for extensions
1392
+ * unique to this library.
1393
+ *
1394
+ * Investigate other vendor-specific extensions and see if
1395
+ * any of them look useful.
1396
+ */
1397
+ static int
1398
+ pax_attribute(struct tar *tar, struct archive_entry *entry,
1399
+ char *key, char *value)
1400
+ {
1401
+ int64_t s;
1402
+ long n;
1403
+ wchar_t *wp;
1404
+
1405
+ switch (key[0]) {
1406
+ case 'G':
1407
+ /* GNU "0.0" sparse pax format. */
1408
+ if (strcmp(key, "GNU.sparse.numblocks") == 0) {
1409
+ tar->sparse_offset = -1;
1410
+ tar->sparse_numbytes = -1;
1411
+ tar->sparse_gnu_major = 0;
1412
+ tar->sparse_gnu_minor = 0;
1413
+ }
1414
+ if (strcmp(key, "GNU.sparse.offset") == 0) {
1415
+ tar->sparse_offset = tar_atol10(value, strlen(value));
1416
+ if (tar->sparse_numbytes != -1) {
1417
+ gnu_add_sparse_entry(tar,
1418
+ tar->sparse_offset, tar->sparse_numbytes);
1419
+ tar->sparse_offset = -1;
1420
+ tar->sparse_numbytes = -1;
1421
+ }
1422
+ }
1423
+ if (strcmp(key, "GNU.sparse.numbytes") == 0) {
1424
+ tar->sparse_numbytes = tar_atol10(value, strlen(value));
1425
+ if (tar->sparse_numbytes != -1) {
1426
+ gnu_add_sparse_entry(tar,
1427
+ tar->sparse_offset, tar->sparse_numbytes);
1428
+ tar->sparse_offset = -1;
1429
+ tar->sparse_numbytes = -1;
1430
+ }
1431
+ }
1432
+ if (strcmp(key, "GNU.sparse.size") == 0) {
1433
+ tar->realsize = tar_atol10(value, strlen(value));
1434
+ archive_entry_set_size(entry, tar->realsize);
1435
+ }
1436
+
1437
+ /* GNU "0.1" sparse pax format. */
1438
+ if (strcmp(key, "GNU.sparse.map") == 0) {
1439
+ tar->sparse_gnu_major = 0;
1440
+ tar->sparse_gnu_minor = 1;
1441
+ if (gnu_sparse_01_parse(tar, value) != ARCHIVE_OK)
1442
+ return (ARCHIVE_WARN);
1443
+ }
1444
+
1445
+ /* GNU "1.0" sparse pax format */
1446
+ if (strcmp(key, "GNU.sparse.major") == 0) {
1447
+ tar->sparse_gnu_major = tar_atol10(value, strlen(value));
1448
+ tar->sparse_gnu_pending = 1;
1449
+ }
1450
+ if (strcmp(key, "GNU.sparse.minor") == 0) {
1451
+ tar->sparse_gnu_minor = tar_atol10(value, strlen(value));
1452
+ tar->sparse_gnu_pending = 1;
1453
+ }
1454
+ if (strcmp(key, "GNU.sparse.name") == 0) {
1455
+ /*
1456
+ * The real filename; when storing sparse
1457
+ * files, GNU tar puts a synthesized name into
1458
+ * the regular 'path' attribute in an attempt
1459
+ * to limit confusion. ;-)
1460
+ */
1461
+ archive_strcpy(&(tar->entry_pathname_override), value);
1462
+ }
1463
+ if (strcmp(key, "GNU.sparse.realsize") == 0) {
1464
+ tar->realsize = tar_atol10(value, strlen(value));
1465
+ archive_entry_set_size(entry, tar->realsize);
1466
+ }
1467
+ break;
1468
+ case 'L':
1469
+ /* Our extensions */
1470
+ /* TODO: Handle arbitrary extended attributes... */
1471
+ /*
1472
+ if (strcmp(key, "LIBARCHIVE.xxxxxxx")==0)
1473
+ archive_entry_set_xxxxxx(entry, value);
1474
+ */
1475
+ if (strcmp(key, "LIBARCHIVE.creationtime")==0) {
1476
+ pax_time(value, &s, &n);
1477
+ archive_entry_set_birthtime(entry, s, n);
1478
+ }
1479
+ if (strncmp(key, "LIBARCHIVE.xattr.", 17)==0)
1480
+ pax_attribute_xattr(entry, key, value);
1481
+ break;
1482
+ case 'S':
1483
+ /* We support some keys used by the "star" archiver */
1484
+ if (strcmp(key, "SCHILY.acl.access")==0) {
1485
+ wp = utf8_decode(tar, value, strlen(value));
1486
+ /* TODO: if (wp == NULL) */
1487
+ __archive_entry_acl_parse_w(entry, wp,
1488
+ ARCHIVE_ENTRY_ACL_TYPE_ACCESS);
1489
+ } else if (strcmp(key, "SCHILY.acl.default")==0) {
1490
+ wp = utf8_decode(tar, value, strlen(value));
1491
+ /* TODO: if (wp == NULL) */
1492
+ __archive_entry_acl_parse_w(entry, wp,
1493
+ ARCHIVE_ENTRY_ACL_TYPE_DEFAULT);
1494
+ } else if (strcmp(key, "SCHILY.devmajor")==0) {
1495
+ archive_entry_set_rdevmajor(entry,
1496
+ tar_atol10(value, strlen(value)));
1497
+ } else if (strcmp(key, "SCHILY.devminor")==0) {
1498
+ archive_entry_set_rdevminor(entry,
1499
+ tar_atol10(value, strlen(value)));
1500
+ } else if (strcmp(key, "SCHILY.fflags")==0) {
1501
+ archive_entry_copy_fflags_text(entry, value);
1502
+ } else if (strcmp(key, "SCHILY.dev")==0) {
1503
+ archive_entry_set_dev(entry,
1504
+ tar_atol10(value, strlen(value)));
1505
+ } else if (strcmp(key, "SCHILY.ino")==0) {
1506
+ archive_entry_set_ino(entry,
1507
+ tar_atol10(value, strlen(value)));
1508
+ } else if (strcmp(key, "SCHILY.nlink")==0) {
1509
+ archive_entry_set_nlink(entry,
1510
+ tar_atol10(value, strlen(value)));
1511
+ } else if (strcmp(key, "SCHILY.realsize")==0) {
1512
+ tar->realsize = tar_atol10(value, strlen(value));
1513
+ archive_entry_set_size(entry, tar->realsize);
1514
+ }
1515
+ break;
1516
+ case 'a':
1517
+ if (strcmp(key, "atime")==0) {
1518
+ pax_time(value, &s, &n);
1519
+ archive_entry_set_atime(entry, s, n);
1520
+ }
1521
+ break;
1522
+ case 'c':
1523
+ if (strcmp(key, "ctime")==0) {
1524
+ pax_time(value, &s, &n);
1525
+ archive_entry_set_ctime(entry, s, n);
1526
+ } else if (strcmp(key, "charset")==0) {
1527
+ /* TODO: Publish charset information in entry. */
1528
+ } else if (strcmp(key, "comment")==0) {
1529
+ /* TODO: Publish comment in entry. */
1530
+ }
1531
+ break;
1532
+ case 'g':
1533
+ if (strcmp(key, "gid")==0) {
1534
+ archive_entry_set_gid(entry,
1535
+ tar_atol10(value, strlen(value)));
1536
+ } else if (strcmp(key, "gname")==0) {
1537
+ archive_strcpy(&(tar->entry_gname), value);
1538
+ }
1539
+ break;
1540
+ case 'h':
1541
+ if (strcmp(key, "hdrcharset") == 0) {
1542
+ if (strcmp(value, "BINARY") == 0)
1543
+ tar->pax_hdrcharset_binary = 1;
1544
+ else if (strcmp(value, "ISO-IR 10646 2000 UTF-8") == 0)
1545
+ tar->pax_hdrcharset_binary = 0;
1546
+ else {
1547
+ /* TODO: Warn about unsupported hdrcharset */
1548
+ }
1549
+ }
1550
+ break;
1551
+ case 'l':
1552
+ /* pax interchange doesn't distinguish hardlink vs. symlink. */
1553
+ if (strcmp(key, "linkpath")==0) {
1554
+ archive_strcpy(&(tar->entry_linkpath), value);
1555
+ }
1556
+ break;
1557
+ case 'm':
1558
+ if (strcmp(key, "mtime")==0) {
1559
+ pax_time(value, &s, &n);
1560
+ archive_entry_set_mtime(entry, s, n);
1561
+ }
1562
+ break;
1563
+ case 'p':
1564
+ if (strcmp(key, "path")==0) {
1565
+ archive_strcpy(&(tar->entry_pathname), value);
1566
+ }
1567
+ break;
1568
+ case 'r':
1569
+ /* POSIX has reserved 'realtime.*' */
1570
+ break;
1571
+ case 's':
1572
+ /* POSIX has reserved 'security.*' */
1573
+ /* Someday: if (strcmp(key, "security.acl")==0) { ... } */
1574
+ if (strcmp(key, "size")==0) {
1575
+ /* "size" is the size of the data in the entry. */
1576
+ tar->entry_bytes_remaining
1577
+ = tar_atol10(value, strlen(value));
1578
+ /*
1579
+ * But, "size" is not necessarily the size of
1580
+ * the file on disk; if this is a sparse file,
1581
+ * the disk size may have already been set from
1582
+ * GNU.sparse.realsize or GNU.sparse.size or
1583
+ * an old GNU header field or SCHILY.realsize
1584
+ * or ....
1585
+ */
1586
+ if (tar->realsize < 0) {
1587
+ archive_entry_set_size(entry,
1588
+ tar->entry_bytes_remaining);
1589
+ tar->realsize
1590
+ = tar->entry_bytes_remaining;
1591
+ }
1592
+ }
1593
+ break;
1594
+ case 'u':
1595
+ if (strcmp(key, "uid")==0) {
1596
+ archive_entry_set_uid(entry,
1597
+ tar_atol10(value, strlen(value)));
1598
+ } else if (strcmp(key, "uname")==0) {
1599
+ archive_strcpy(&(tar->entry_uname), value);
1600
+ }
1601
+ break;
1602
+ }
1603
+ return (0);
1604
+ }
1605
+
1606
+
1607
+
1608
+ /*
1609
+ * parse a decimal time value, which may include a fractional portion
1610
+ */
1611
+ static void
1612
+ pax_time(const char *p, int64_t *ps, long *pn)
1613
+ {
1614
+ char digit;
1615
+ int64_t s;
1616
+ unsigned long l;
1617
+ int sign;
1618
+ int64_t limit, last_digit_limit;
1619
+
1620
+ limit = INT64_MAX / 10;
1621
+ last_digit_limit = INT64_MAX % 10;
1622
+
1623
+ s = 0;
1624
+ sign = 1;
1625
+ if (*p == '-') {
1626
+ sign = -1;
1627
+ p++;
1628
+ }
1629
+ while (*p >= '0' && *p <= '9') {
1630
+ digit = *p - '0';
1631
+ if (s > limit ||
1632
+ (s == limit && digit > last_digit_limit)) {
1633
+ s = INT64_MAX;
1634
+ break;
1635
+ }
1636
+ s = (s * 10) + digit;
1637
+ ++p;
1638
+ }
1639
+
1640
+ *ps = s * sign;
1641
+
1642
+ /* Calculate nanoseconds. */
1643
+ *pn = 0;
1644
+
1645
+ if (*p != '.')
1646
+ return;
1647
+
1648
+ l = 100000000UL;
1649
+ do {
1650
+ ++p;
1651
+ if (*p >= '0' && *p <= '9')
1652
+ *pn += (*p - '0') * l;
1653
+ else
1654
+ break;
1655
+ } while (l /= 10);
1656
+ }
1657
+
1658
+ /*
1659
+ * Parse GNU tar header
1660
+ */
1661
+ static int
1662
+ header_gnutar(struct archive_read *a, struct tar *tar,
1663
+ struct archive_entry *entry, const void *h)
1664
+ {
1665
+ const struct archive_entry_header_gnutar *header;
1666
+
1667
+ (void)a;
1668
+
1669
+ /*
1670
+ * GNU header is like POSIX ustar, except 'prefix' is
1671
+ * replaced with some other fields. This also means the
1672
+ * filename is stored as in old-style archives.
1673
+ */
1674
+
1675
+ /* Grab fields common to all tar variants. */
1676
+ header_common(a, tar, entry, h);
1677
+
1678
+ /* Copy filename over (to ensure null termination). */
1679
+ header = (const struct archive_entry_header_gnutar *)h;
1680
+ archive_strncpy(&(tar->entry_pathname), header->name,
1681
+ sizeof(header->name));
1682
+ archive_entry_copy_pathname(entry, tar->entry_pathname.s);
1683
+
1684
+ /* Fields common to ustar and GNU */
1685
+ /* XXX Can the following be factored out since it's common
1686
+ * to ustar and gnu tar? Is it okay to move it down into
1687
+ * header_common, perhaps? */
1688
+ archive_strncpy(&(tar->entry_uname),
1689
+ header->uname, sizeof(header->uname));
1690
+ archive_entry_copy_uname(entry, tar->entry_uname.s);
1691
+
1692
+ archive_strncpy(&(tar->entry_gname),
1693
+ header->gname, sizeof(header->gname));
1694
+ archive_entry_copy_gname(entry, tar->entry_gname.s);
1695
+
1696
+ /* Parse out device numbers only for char and block specials */
1697
+ if (header->typeflag[0] == '3' || header->typeflag[0] == '4') {
1698
+ archive_entry_set_rdevmajor(entry,
1699
+ tar_atol(header->rdevmajor, sizeof(header->rdevmajor)));
1700
+ archive_entry_set_rdevminor(entry,
1701
+ tar_atol(header->rdevminor, sizeof(header->rdevminor)));
1702
+ } else
1703
+ archive_entry_set_rdev(entry, 0);
1704
+
1705
+ tar->entry_padding = 0x1ff & (-tar->entry_bytes_remaining);
1706
+
1707
+ /* Grab GNU-specific fields. */
1708
+ archive_entry_set_atime(entry,
1709
+ tar_atol(header->atime, sizeof(header->atime)), 0);
1710
+ archive_entry_set_ctime(entry,
1711
+ tar_atol(header->ctime, sizeof(header->ctime)), 0);
1712
+ if (header->realsize[0] != 0) {
1713
+ tar->realsize
1714
+ = tar_atol(header->realsize, sizeof(header->realsize));
1715
+ archive_entry_set_size(entry, tar->realsize);
1716
+ }
1717
+
1718
+ if (header->sparse[0].offset[0] != 0) {
1719
+ gnu_sparse_old_read(a, tar, header);
1720
+ } else {
1721
+ if (header->isextended[0] != 0) {
1722
+ /* XXX WTF? XXX */
1723
+ }
1724
+ }
1725
+
1726
+ return (0);
1727
+ }
1728
+
1729
+ static void
1730
+ gnu_add_sparse_entry(struct tar *tar, off_t offset, off_t remaining)
1731
+ {
1732
+ struct sparse_block *p;
1733
+
1734
+ p = (struct sparse_block *)malloc(sizeof(*p));
1735
+ if (p == NULL)
1736
+ __archive_errx(1, "Out of memory");
1737
+ memset(p, 0, sizeof(*p));
1738
+ if (tar->sparse_last != NULL)
1739
+ tar->sparse_last->next = p;
1740
+ else
1741
+ tar->sparse_list = p;
1742
+ tar->sparse_last = p;
1743
+ p->offset = offset;
1744
+ p->remaining = remaining;
1745
+ }
1746
+
1747
+ static void
1748
+ gnu_clear_sparse_list(struct tar *tar)
1749
+ {
1750
+ struct sparse_block *p;
1751
+
1752
+ while (tar->sparse_list != NULL) {
1753
+ p = tar->sparse_list;
1754
+ tar->sparse_list = p->next;
1755
+ free(p);
1756
+ }
1757
+ tar->sparse_last = NULL;
1758
+ }
1759
+
1760
+ /*
1761
+ * GNU tar old-format sparse data.
1762
+ *
1763
+ * GNU old-format sparse data is stored in a fixed-field
1764
+ * format. Offset/size values are 11-byte octal fields (same
1765
+ * format as 'size' field in ustart header). These are
1766
+ * stored in the header, allocating subsequent header blocks
1767
+ * as needed. Extending the header in this way is a pretty
1768
+ * severe POSIX violation; this design has earned GNU tar a
1769
+ * lot of criticism.
1770
+ */
1771
+
1772
+ static int
1773
+ gnu_sparse_old_read(struct archive_read *a, struct tar *tar,
1774
+ const struct archive_entry_header_gnutar *header)
1775
+ {
1776
+ ssize_t bytes_read;
1777
+ const void *data;
1778
+ struct extended {
1779
+ struct gnu_sparse sparse[21];
1780
+ char isextended[1];
1781
+ char padding[7];
1782
+ };
1783
+ const struct extended *ext;
1784
+
1785
+ gnu_sparse_old_parse(tar, header->sparse, 4);
1786
+ if (header->isextended[0] == 0)
1787
+ return (ARCHIVE_OK);
1788
+
1789
+ do {
1790
+ data = __archive_read_ahead(a, 512, &bytes_read);
1791
+ if (bytes_read < 0)
1792
+ return (ARCHIVE_FATAL);
1793
+ if (bytes_read < 512) {
1794
+ archive_set_error(&a->archive, ARCHIVE_ERRNO_FILE_FORMAT,
1795
+ "Truncated tar archive "
1796
+ "detected while reading sparse file data");
1797
+ return (ARCHIVE_FATAL);
1798
+ }
1799
+ __archive_read_consume(a, 512);
1800
+ ext = (const struct extended *)data;
1801
+ gnu_sparse_old_parse(tar, ext->sparse, 21);
1802
+ } while (ext->isextended[0] != 0);
1803
+ if (tar->sparse_list != NULL)
1804
+ tar->entry_offset = tar->sparse_list->offset;
1805
+ return (ARCHIVE_OK);
1806
+ }
1807
+
1808
+ static void
1809
+ gnu_sparse_old_parse(struct tar *tar,
1810
+ const struct gnu_sparse *sparse, int length)
1811
+ {
1812
+ while (length > 0 && sparse->offset[0] != 0) {
1813
+ gnu_add_sparse_entry(tar,
1814
+ tar_atol(sparse->offset, sizeof(sparse->offset)),
1815
+ tar_atol(sparse->numbytes, sizeof(sparse->numbytes)));
1816
+ sparse++;
1817
+ length--;
1818
+ }
1819
+ }
1820
+
1821
+ /*
1822
+ * GNU tar sparse format 0.0
1823
+ *
1824
+ * Beginning with GNU tar 1.15, sparse files are stored using
1825
+ * information in the pax extended header. The GNU tar maintainers
1826
+ * have gone through a number of variations in the process of working
1827
+ * out this scheme; furtunately, they're all numbered.
1828
+ *
1829
+ * Sparse format 0.0 uses attribute GNU.sparse.numblocks to store the
1830
+ * number of blocks, and GNU.sparse.offset/GNU.sparse.numbytes to
1831
+ * store offset/size for each block. The repeated instances of these
1832
+ * latter fields violate the pax specification (which frowns on
1833
+ * duplicate keys), so this format was quickly replaced.
1834
+ */
1835
+
1836
+ /*
1837
+ * GNU tar sparse format 0.1
1838
+ *
1839
+ * This version replaced the offset/numbytes attributes with
1840
+ * a single "map" attribute that stored a list of integers. This
1841
+ * format had two problems: First, the "map" attribute could be very
1842
+ * long, which caused problems for some implementations. More
1843
+ * importantly, the sparse data was lost when extracted by archivers
1844
+ * that didn't recognize this extension.
1845
+ */
1846
+
1847
+ static int
1848
+ gnu_sparse_01_parse(struct tar *tar, const char *p)
1849
+ {
1850
+ const char *e;
1851
+ off_t offset = -1, size = -1;
1852
+
1853
+ for (;;) {
1854
+ e = p;
1855
+ while (*e != '\0' && *e != ',') {
1856
+ if (*e < '0' || *e > '9')
1857
+ return (ARCHIVE_WARN);
1858
+ e++;
1859
+ }
1860
+ if (offset < 0) {
1861
+ offset = tar_atol10(p, e - p);
1862
+ if (offset < 0)
1863
+ return (ARCHIVE_WARN);
1864
+ } else {
1865
+ size = tar_atol10(p, e - p);
1866
+ if (size < 0)
1867
+ return (ARCHIVE_WARN);
1868
+ gnu_add_sparse_entry(tar, offset, size);
1869
+ offset = -1;
1870
+ }
1871
+ if (*e == '\0')
1872
+ return (ARCHIVE_OK);
1873
+ p = e + 1;
1874
+ }
1875
+ }
1876
+
1877
+ /*
1878
+ * GNU tar sparse format 1.0
1879
+ *
1880
+ * The idea: The offset/size data is stored as a series of base-10
1881
+ * ASCII numbers prepended to the file data, so that dearchivers that
1882
+ * don't support this format will extract the block map along with the
1883
+ * data and a separate post-process can restore the sparseness.
1884
+ *
1885
+ * Unfortunately, GNU tar 1.16 had a bug that added unnecessary
1886
+ * padding to the body of the file when using this format. GNU tar
1887
+ * 1.17 corrected this bug without bumping the version number, so
1888
+ * it's not possible to support both variants. This code supports
1889
+ * the later variant at the expense of not supporting the former.
1890
+ *
1891
+ * This variant also replaced GNU.sparse.size with GNU.sparse.realsize
1892
+ * and introduced the GNU.sparse.major/GNU.sparse.minor attributes.
1893
+ */
1894
+
1895
+ /*
1896
+ * Read the next line from the input, and parse it as a decimal
1897
+ * integer followed by '\n'. Returns positive integer value or
1898
+ * negative on error.
1899
+ */
1900
+ static int64_t
1901
+ gnu_sparse_10_atol(struct archive_read *a, struct tar *tar,
1902
+ ssize_t *remaining)
1903
+ {
1904
+ int64_t l, limit, last_digit_limit;
1905
+ const char *p;
1906
+ ssize_t bytes_read;
1907
+ int base, digit;
1908
+
1909
+ base = 10;
1910
+ limit = INT64_MAX / base;
1911
+ last_digit_limit = INT64_MAX % base;
1912
+
1913
+ /*
1914
+ * Skip any lines starting with '#'; GNU tar specs
1915
+ * don't require this, but they should.
1916
+ */
1917
+ do {
1918
+ bytes_read = readline(a, tar, &p, tar_min(*remaining, 100));
1919
+ if (bytes_read <= 0)
1920
+ return (ARCHIVE_FATAL);
1921
+ *remaining -= bytes_read;
1922
+ } while (p[0] == '#');
1923
+
1924
+ l = 0;
1925
+ while (bytes_read > 0) {
1926
+ if (*p == '\n')
1927
+ return (l);
1928
+ if (*p < '0' || *p >= '0' + base)
1929
+ return (ARCHIVE_WARN);
1930
+ digit = *p - '0';
1931
+ if (l > limit || (l == limit && digit > last_digit_limit))
1932
+ l = INT64_MAX; /* Truncate on overflow. */
1933
+ else
1934
+ l = (l * base) + digit;
1935
+ p++;
1936
+ bytes_read--;
1937
+ }
1938
+ /* TODO: Error message. */
1939
+ return (ARCHIVE_WARN);
1940
+ }
1941
+
1942
+ /*
1943
+ * Returns length (in bytes) of the sparse data description
1944
+ * that was read.
1945
+ */
1946
+ static ssize_t
1947
+ gnu_sparse_10_read(struct archive_read *a, struct tar *tar)
1948
+ {
1949
+ ssize_t remaining, bytes_read;
1950
+ int entries;
1951
+ off_t offset, size, to_skip;
1952
+
1953
+ /* Clear out the existing sparse list. */
1954
+ gnu_clear_sparse_list(tar);
1955
+
1956
+ remaining = tar->entry_bytes_remaining;
1957
+
1958
+ /* Parse entries. */
1959
+ entries = gnu_sparse_10_atol(a, tar, &remaining);
1960
+ if (entries < 0)
1961
+ return (ARCHIVE_FATAL);
1962
+ /* Parse the individual entries. */
1963
+ while (entries-- > 0) {
1964
+ /* Parse offset/size */
1965
+ offset = gnu_sparse_10_atol(a, tar, &remaining);
1966
+ if (offset < 0)
1967
+ return (ARCHIVE_FATAL);
1968
+ size = gnu_sparse_10_atol(a, tar, &remaining);
1969
+ if (size < 0)
1970
+ return (ARCHIVE_FATAL);
1971
+ /* Add a new sparse entry. */
1972
+ gnu_add_sparse_entry(tar, offset, size);
1973
+ }
1974
+ /* Skip rest of block... */
1975
+ bytes_read = tar->entry_bytes_remaining - remaining;
1976
+ to_skip = 0x1ff & -bytes_read;
1977
+ if (to_skip != __archive_read_skip(a, to_skip))
1978
+ return (ARCHIVE_FATAL);
1979
+ return (bytes_read + to_skip);
1980
+ }
1981
+
1982
+ /*-
1983
+ * Convert text->integer.
1984
+ *
1985
+ * Traditional tar formats (including POSIX) specify base-8 for
1986
+ * all of the standard numeric fields. This is a significant limitation
1987
+ * in practice:
1988
+ * = file size is limited to 8GB
1989
+ * = rdevmajor and rdevminor are limited to 21 bits
1990
+ * = uid/gid are limited to 21 bits
1991
+ *
1992
+ * There are two workarounds for this:
1993
+ * = pax extended headers, which use variable-length string fields
1994
+ * = GNU tar and STAR both allow either base-8 or base-256 in
1995
+ * most fields. The high bit is set to indicate base-256.
1996
+ *
1997
+ * On read, this implementation supports both extensions.
1998
+ */
1999
+ static int64_t
2000
+ tar_atol(const char *p, unsigned char_cnt)
2001
+ {
2002
+ /*
2003
+ * Technically, GNU tar considers a field to be in base-256
2004
+ * only if the first byte is 0xff or 0x80.
2005
+ */
2006
+ if (*p & 0x80)
2007
+ return (tar_atol256(p, char_cnt));
2008
+ return (tar_atol8(p, char_cnt));
2009
+ }
2010
+
2011
+ /*
2012
+ * Note that this implementation does not (and should not!) obey
2013
+ * locale settings; you cannot simply substitute strtol here, since
2014
+ * it does obey locale.
2015
+ */
2016
+ static int64_t
2017
+ tar_atol8(const char *p, unsigned char_cnt)
2018
+ {
2019
+ int64_t l, limit, last_digit_limit;
2020
+ int digit, sign, base;
2021
+
2022
+ base = 8;
2023
+ limit = INT64_MAX / base;
2024
+ last_digit_limit = INT64_MAX % base;
2025
+
2026
+ while (*p == ' ' || *p == '\t')
2027
+ p++;
2028
+ if (*p == '-') {
2029
+ sign = -1;
2030
+ p++;
2031
+ } else
2032
+ sign = 1;
2033
+
2034
+ l = 0;
2035
+ digit = *p - '0';
2036
+ while (digit >= 0 && digit < base && char_cnt-- > 0) {
2037
+ if (l>limit || (l == limit && digit > last_digit_limit)) {
2038
+ l = INT64_MAX; /* Truncate on overflow. */
2039
+ break;
2040
+ }
2041
+ l = (l * base) + digit;
2042
+ digit = *++p - '0';
2043
+ }
2044
+ return (sign < 0) ? -l : l;
2045
+ }
2046
+
2047
+ /*
2048
+ * Note that this implementation does not (and should not!) obey
2049
+ * locale settings; you cannot simply substitute strtol here, since
2050
+ * it does obey locale.
2051
+ */
2052
+ static int64_t
2053
+ tar_atol10(const char *p, unsigned char_cnt)
2054
+ {
2055
+ int64_t l, limit, last_digit_limit;
2056
+ int base, digit, sign;
2057
+
2058
+ base = 10;
2059
+ limit = INT64_MAX / base;
2060
+ last_digit_limit = INT64_MAX % base;
2061
+
2062
+ while (*p == ' ' || *p == '\t')
2063
+ p++;
2064
+ if (*p == '-') {
2065
+ sign = -1;
2066
+ p++;
2067
+ } else
2068
+ sign = 1;
2069
+
2070
+ l = 0;
2071
+ digit = *p - '0';
2072
+ while (digit >= 0 && digit < base && char_cnt-- > 0) {
2073
+ if (l > limit || (l == limit && digit > last_digit_limit)) {
2074
+ l = INT64_MAX; /* Truncate on overflow. */
2075
+ break;
2076
+ }
2077
+ l = (l * base) + digit;
2078
+ digit = *++p - '0';
2079
+ }
2080
+ return (sign < 0) ? -l : l;
2081
+ }
2082
+
2083
+ /*
2084
+ * Parse a base-256 integer. This is just a straight signed binary
2085
+ * value in big-endian order, except that the high-order bit is
2086
+ * ignored.
2087
+ */
2088
+ static int64_t
2089
+ tar_atol256(const char *_p, unsigned char_cnt)
2090
+ {
2091
+ int64_t l, upper_limit, lower_limit;
2092
+ const unsigned char *p = (const unsigned char *)_p;
2093
+
2094
+ upper_limit = INT64_MAX / 256;
2095
+ lower_limit = INT64_MIN / 256;
2096
+
2097
+ /* Pad with 1 or 0 bits, depending on sign. */
2098
+ if ((0x40 & *p) == 0x40)
2099
+ l = (int64_t)-1;
2100
+ else
2101
+ l = 0;
2102
+ l = (l << 6) | (0x3f & *p++);
2103
+ while (--char_cnt > 0) {
2104
+ if (l > upper_limit) {
2105
+ l = INT64_MAX; /* Truncate on overflow */
2106
+ break;
2107
+ } else if (l < lower_limit) {
2108
+ l = INT64_MIN;
2109
+ break;
2110
+ }
2111
+ l = (l << 8) | (0xff & (int64_t)*p++);
2112
+ }
2113
+ return (l);
2114
+ }
2115
+
2116
+ /*
2117
+ * Returns length of line (including trailing newline)
2118
+ * or negative on error. 'start' argument is updated to
2119
+ * point to first character of line. This avoids copying
2120
+ * when possible.
2121
+ */
2122
+ static ssize_t
2123
+ readline(struct archive_read *a, struct tar *tar, const char **start,
2124
+ ssize_t limit)
2125
+ {
2126
+ ssize_t bytes_read;
2127
+ ssize_t total_size = 0;
2128
+ const void *t;
2129
+ const char *s;
2130
+ void *p;
2131
+
2132
+ t = __archive_read_ahead(a, 1, &bytes_read);
2133
+ if (bytes_read <= 0)
2134
+ return (ARCHIVE_FATAL);
2135
+ s = t; /* Start of line? */
2136
+ p = memchr(t, '\n', bytes_read);
2137
+ /* If we found '\n' in the read buffer, return pointer to that. */
2138
+ if (p != NULL) {
2139
+ bytes_read = 1 + ((const char *)p) - s;
2140
+ if (bytes_read > limit) {
2141
+ archive_set_error(&a->archive,
2142
+ ARCHIVE_ERRNO_FILE_FORMAT,
2143
+ "Line too long");
2144
+ return (ARCHIVE_FATAL);
2145
+ }
2146
+ __archive_read_consume(a, bytes_read);
2147
+ *start = s;
2148
+ return (bytes_read);
2149
+ }
2150
+ /* Otherwise, we need to accumulate in a line buffer. */
2151
+ for (;;) {
2152
+ if (total_size + bytes_read > limit) {
2153
+ archive_set_error(&a->archive,
2154
+ ARCHIVE_ERRNO_FILE_FORMAT,
2155
+ "Line too long");
2156
+ return (ARCHIVE_FATAL);
2157
+ }
2158
+ if (archive_string_ensure(&tar->line, total_size + bytes_read) == NULL) {
2159
+ archive_set_error(&a->archive, ENOMEM,
2160
+ "Can't allocate working buffer");
2161
+ return (ARCHIVE_FATAL);
2162
+ }
2163
+ memcpy(tar->line.s + total_size, t, bytes_read);
2164
+ __archive_read_consume(a, bytes_read);
2165
+ total_size += bytes_read;
2166
+ /* If we found '\n', clean up and return. */
2167
+ if (p != NULL) {
2168
+ *start = tar->line.s;
2169
+ return (total_size);
2170
+ }
2171
+ /* Read some more. */
2172
+ t = __archive_read_ahead(a, 1, &bytes_read);
2173
+ if (bytes_read <= 0)
2174
+ return (ARCHIVE_FATAL);
2175
+ s = t; /* Start of line? */
2176
+ p = memchr(t, '\n', bytes_read);
2177
+ /* If we found '\n', trim the read. */
2178
+ if (p != NULL) {
2179
+ bytes_read = 1 + ((const char *)p) - s;
2180
+ }
2181
+ }
2182
+ }
2183
+
2184
+ static wchar_t *
2185
+ utf8_decode(struct tar *tar, const char *src, size_t length)
2186
+ {
2187
+ wchar_t *dest;
2188
+ ssize_t n;
2189
+
2190
+ /* Ensure pax_entry buffer is big enough. */
2191
+ if (tar->pax_entry_length <= length) {
2192
+ wchar_t *old_entry;
2193
+
2194
+ if (tar->pax_entry_length <= 0)
2195
+ tar->pax_entry_length = 1024;
2196
+ while (tar->pax_entry_length <= length + 1)
2197
+ tar->pax_entry_length *= 2;
2198
+
2199
+ old_entry = tar->pax_entry;
2200
+ tar->pax_entry = (wchar_t *)realloc(tar->pax_entry,
2201
+ tar->pax_entry_length * sizeof(wchar_t));
2202
+ if (tar->pax_entry == NULL) {
2203
+ free(old_entry);
2204
+ /* TODO: Handle this error. */
2205
+ return (NULL);
2206
+ }
2207
+ }
2208
+
2209
+ dest = tar->pax_entry;
2210
+ while (length > 0) {
2211
+ n = UTF8_mbrtowc(dest, src, length);
2212
+ if (n < 0)
2213
+ return (NULL);
2214
+ if (n == 0)
2215
+ break;
2216
+ dest++;
2217
+ src += n;
2218
+ length -= n;
2219
+ }
2220
+ *dest = L'\0';
2221
+ return (tar->pax_entry);
2222
+ }
2223
+
2224
+ /*
2225
+ * Copied and simplified from FreeBSD libc/locale.
2226
+ */
2227
+ static ssize_t
2228
+ UTF8_mbrtowc(wchar_t *pwc, const char *s, size_t n)
2229
+ {
2230
+ int ch, i, len, mask;
2231
+ unsigned long wch;
2232
+
2233
+ if (s == NULL || n == 0 || pwc == NULL)
2234
+ return (0);
2235
+
2236
+ /*
2237
+ * Determine the number of octets that make up this character from
2238
+ * the first octet, and a mask that extracts the interesting bits of
2239
+ * the first octet.
2240
+ */
2241
+ ch = (unsigned char)*s;
2242
+ if ((ch & 0x80) == 0) {
2243
+ mask = 0x7f;
2244
+ len = 1;
2245
+ } else if ((ch & 0xe0) == 0xc0) {
2246
+ mask = 0x1f;
2247
+ len = 2;
2248
+ } else if ((ch & 0xf0) == 0xe0) {
2249
+ mask = 0x0f;
2250
+ len = 3;
2251
+ } else if ((ch & 0xf8) == 0xf0) {
2252
+ mask = 0x07;
2253
+ len = 4;
2254
+ } else {
2255
+ /* Invalid first byte. */
2256
+ return (-1);
2257
+ }
2258
+
2259
+ if (n < (size_t)len) {
2260
+ /* Valid first byte but truncated. */
2261
+ return (-2);
2262
+ }
2263
+
2264
+ /*
2265
+ * Decode the octet sequence representing the character in chunks
2266
+ * of 6 bits, most significant first.
2267
+ */
2268
+ wch = (unsigned char)*s++ & mask;
2269
+ i = len;
2270
+ while (--i != 0) {
2271
+ if ((*s & 0xc0) != 0x80) {
2272
+ /* Invalid intermediate byte; consume one byte and
2273
+ * emit '?' */
2274
+ *pwc = '?';
2275
+ return (1);
2276
+ }
2277
+ wch <<= 6;
2278
+ wch |= *s++ & 0x3f;
2279
+ }
2280
+
2281
+ /* Assign the value to the output; out-of-range values
2282
+ * just get truncated. */
2283
+ *pwc = (wchar_t)wch;
2284
+ #ifdef WCHAR_MAX
2285
+ /*
2286
+ * If platform has WCHAR_MAX, we can do something
2287
+ * more sensible with out-of-range values.
2288
+ */
2289
+ if (wch >= WCHAR_MAX)
2290
+ *pwc = '?';
2291
+ #endif
2292
+ /* Return number of bytes input consumed: 0 for end-of-string. */
2293
+ return (wch == L'\0' ? 0 : len);
2294
+ }
2295
+
2296
+
2297
+ /*
2298
+ * base64_decode - Base64 decode
2299
+ *
2300
+ * This accepts most variations of base-64 encoding, including:
2301
+ * * with or without line breaks
2302
+ * * with or without the final group padded with '=' or '_' characters
2303
+ * (The most economical Base-64 variant does not pad the last group and
2304
+ * omits line breaks; RFC1341 used for MIME requires both.)
2305
+ */
2306
+ static char *
2307
+ base64_decode(const char *s, size_t len, size_t *out_len)
2308
+ {
2309
+ static const unsigned char digits[64] = {
2310
+ 'A','B','C','D','E','F','G','H','I','J','K','L','M','N',
2311
+ 'O','P','Q','R','S','T','U','V','W','X','Y','Z','a','b',
2312
+ 'c','d','e','f','g','h','i','j','k','l','m','n','o','p',
2313
+ 'q','r','s','t','u','v','w','x','y','z','0','1','2','3',
2314
+ '4','5','6','7','8','9','+','/' };
2315
+ static unsigned char decode_table[128];
2316
+ char *out, *d;
2317
+ const unsigned char *src = (const unsigned char *)s;
2318
+
2319
+ /* If the decode table is not yet initialized, prepare it. */
2320
+ if (decode_table[digits[1]] != 1) {
2321
+ unsigned i;
2322
+ memset(decode_table, 0xff, sizeof(decode_table));
2323
+ for (i = 0; i < sizeof(digits); i++)
2324
+ decode_table[digits[i]] = i;
2325
+ }
2326
+
2327
+ /* Allocate enough space to hold the entire output. */
2328
+ /* Note that we may not use all of this... */
2329
+ out = (char *)malloc(len - len / 4 + 1);
2330
+ if (out == NULL) {
2331
+ *out_len = 0;
2332
+ return (NULL);
2333
+ }
2334
+ d = out;
2335
+
2336
+ while (len > 0) {
2337
+ /* Collect the next group of (up to) four characters. */
2338
+ int v = 0;
2339
+ int group_size = 0;
2340
+ while (group_size < 4 && len > 0) {
2341
+ /* '=' or '_' padding indicates final group. */
2342
+ if (*src == '=' || *src == '_') {
2343
+ len = 0;
2344
+ break;
2345
+ }
2346
+ /* Skip illegal characters (including line breaks) */
2347
+ if (*src > 127 || *src < 32
2348
+ || decode_table[*src] == 0xff) {
2349
+ len--;
2350
+ src++;
2351
+ continue;
2352
+ }
2353
+ v <<= 6;
2354
+ v |= decode_table[*src++];
2355
+ len --;
2356
+ group_size++;
2357
+ }
2358
+ /* Align a short group properly. */
2359
+ v <<= 6 * (4 - group_size);
2360
+ /* Unpack the group we just collected. */
2361
+ switch (group_size) {
2362
+ case 4: d[2] = v & 0xff;
2363
+ /* FALLTHROUGH */
2364
+ case 3: d[1] = (v >> 8) & 0xff;
2365
+ /* FALLTHROUGH */
2366
+ case 2: d[0] = (v >> 16) & 0xff;
2367
+ break;
2368
+ case 1: /* this is invalid! */
2369
+ break;
2370
+ }
2371
+ d += group_size * 3 / 4;
2372
+ }
2373
+
2374
+ *out_len = d - out;
2375
+ return (out);
2376
+ }
2377
+
2378
+ static char *
2379
+ url_decode(const char *in)
2380
+ {
2381
+ char *out, *d;
2382
+ const char *s;
2383
+
2384
+ out = (char *)malloc(strlen(in) + 1);
2385
+ if (out == NULL)
2386
+ return (NULL);
2387
+ for (s = in, d = out; *s != '\0'; ) {
2388
+ if (s[0] == '%' && s[1] != '\0' && s[2] != '\0') {
2389
+ /* Try to convert % escape */
2390
+ int digit1 = tohex(s[1]);
2391
+ int digit2 = tohex(s[2]);
2392
+ if (digit1 >= 0 && digit2 >= 0) {
2393
+ /* Looks good, consume three chars */
2394
+ s += 3;
2395
+ /* Convert output */
2396
+ *d++ = ((digit1 << 4) | digit2);
2397
+ continue;
2398
+ }
2399
+ /* Else fall through and treat '%' as normal char */
2400
+ }
2401
+ *d++ = *s++;
2402
+ }
2403
+ *d = '\0';
2404
+ return (out);
2405
+ }
2406
+
2407
+ static int
2408
+ tohex(int c)
2409
+ {
2410
+ if (c >= '0' && c <= '9')
2411
+ return (c - '0');
2412
+ else if (c >= 'A' && c <= 'F')
2413
+ return (c - 'A' + 10);
2414
+ else if (c >= 'a' && c <= 'f')
2415
+ return (c - 'a' + 10);
2416
+ else
2417
+ return (-1);
2418
+ }