libarchive-static 1.0.5 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/ext/extconf.rb +2 -9
- data/ext/libarchive-0.1.1/ext/archive_read_support_compression.c +6 -6
- data/ext/libarchive-0.1.1/ext/archive_read_support_compression.o +0 -0
- data/ext/libarchive-0.1.1/ext/archive_read_support_format.o +0 -0
- data/ext/libarchive-0.1.1/ext/archive_write_open_rb_str.c +1 -1
- data/ext/libarchive-0.1.1/ext/archive_write_open_rb_str.o +0 -0
- data/ext/libarchive-0.1.1/ext/archive_write_set_compression.c +5 -5
- data/ext/libarchive-0.1.1/ext/archive_write_set_compression.o +0 -0
- data/ext/libarchive-0.1.1/ext/config.h +23 -0
- data/ext/libarchive-0.1.1/ext/config.log +230 -0
- data/ext/libarchive-0.1.1/ext/config.status +671 -0
- data/ext/libarchive-0.1.1/ext/libarchive.c +1 -1
- data/ext/libarchive-0.1.1/ext/libarchive.o +0 -0
- data/ext/libarchive-0.1.1/ext/libarchive_archive.c +7 -7
- data/ext/libarchive-0.1.1/ext/libarchive_archive.o +0 -0
- data/ext/libarchive-0.1.1/ext/libarchive_entry.c +6 -0
- data/ext/libarchive-0.1.1/ext/libarchive_entry.o +0 -0
- data/ext/libarchive-0.1.1/ext/libarchive_internal.h +0 -1
- data/ext/libarchive-0.1.1/ext/libarchive_reader.c +6 -4
- data/ext/libarchive-0.1.1/ext/libarchive_reader.o +0 -0
- data/ext/libarchive-0.1.1/ext/libarchive_ruby.so +0 -0
- data/ext/libarchive-0.1.1/ext/libarchive_win32.h +1 -1
- data/ext/libarchive-0.1.1/ext/libarchive_writer.c +2 -2
- data/ext/libarchive-0.1.1/ext/libarchive_writer.o +0 -0
- data/ext/libarchive-3.6.2/Makefile.in +16892 -0
- data/ext/libarchive-3.6.2/build/autoconf/ax_append_compile_flags.m4 +67 -0
- data/ext/libarchive-3.6.2/build/autoconf/ax_append_flag.m4 +71 -0
- data/ext/libarchive-3.6.2/build/autoconf/ax_check_compile_flag.m4 +74 -0
- data/ext/libarchive-3.6.2/build/autoconf/ax_require_defined.m4 +37 -0
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/build/autoconf/check_stdcall_func.m4 +0 -0
- data/ext/libarchive-3.6.2/build/autoconf/compile +348 -0
- data/ext/libarchive-3.6.2/build/autoconf/config.guess +1754 -0
- data/ext/libarchive-3.6.2/build/autoconf/config.rpath +696 -0
- data/ext/libarchive-3.6.2/build/autoconf/config.sub +1890 -0
- data/ext/libarchive-3.6.2/build/autoconf/depcomp +791 -0
- data/ext/libarchive-3.6.2/build/autoconf/iconv.m4 +271 -0
- data/ext/libarchive-3.6.2/build/autoconf/install-sh +541 -0
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/build/autoconf/la_uid_t.m4 +0 -0
- data/ext/libarchive-3.6.2/build/autoconf/lib-ld.m4 +109 -0
- data/ext/libarchive-3.6.2/build/autoconf/lib-link.m4 +777 -0
- data/ext/libarchive-3.6.2/build/autoconf/lib-prefix.m4 +224 -0
- data/ext/libarchive-3.6.2/build/autoconf/ltmain.sh +11251 -0
- data/ext/libarchive-3.6.2/build/autoconf/m4_ax_compile_check_sizeof.m4 +115 -0
- data/ext/libarchive-3.6.2/build/autoconf/missing +215 -0
- data/ext/libarchive-3.6.2/build/autoconf/test-driver +153 -0
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/build/pkgconfig/libarchive.pc.in +4 -1
- data/ext/libarchive-3.6.2/config.h.in +1504 -0
- data/ext/libarchive-3.6.2/configure +25558 -0
- data/ext/libarchive-3.6.2/libarchive/archive.h +1212 -0
- data/ext/libarchive-3.6.2/libarchive/archive_acl.c +2097 -0
- data/ext/libarchive-3.6.2/libarchive/archive_acl_private.h +83 -0
- data/ext/libarchive-3.6.2/libarchive/archive_blake2.h +197 -0
- data/ext/libarchive-3.6.2/libarchive/archive_blake2_impl.h +161 -0
- data/ext/libarchive-3.6.2/libarchive/archive_blake2s_ref.c +369 -0
- data/ext/libarchive-3.6.2/libarchive/archive_blake2sp_ref.c +361 -0
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_check_magic.c +63 -22
- data/ext/libarchive-3.6.2/libarchive/archive_cmdline.c +227 -0
- data/ext/libarchive-3.6.2/libarchive/archive_cmdline_private.h +47 -0
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_crc32.h +17 -0
- data/ext/libarchive-3.6.2/libarchive/archive_cryptor.c +534 -0
- data/ext/libarchive-3.6.2/libarchive/archive_cryptor_private.h +188 -0
- data/ext/libarchive-3.6.2/libarchive/archive_digest.c +1505 -0
- data/ext/libarchive-3.6.2/libarchive/archive_digest_private.h +416 -0
- data/ext/libarchive-3.6.2/libarchive/archive_disk_acl_darwin.c +559 -0
- data/ext/libarchive-3.6.2/libarchive/archive_disk_acl_freebsd.c +712 -0
- data/ext/libarchive-3.6.2/libarchive/archive_disk_acl_linux.c +760 -0
- data/ext/libarchive-3.6.2/libarchive/archive_disk_acl_sunos.c +824 -0
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_endian.h +48 -15
- data/ext/libarchive-3.6.2/libarchive/archive_entry.c +2149 -0
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_entry.h +305 -106
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_entry_copy_bhfi.c +5 -4
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_entry_copy_stat.c +9 -3
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_entry_link_resolver.c +104 -62
- data/ext/libarchive-3.6.2/libarchive/archive_entry_locale.h +92 -0
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_entry_private.h +65 -49
- data/ext/libarchive-3.6.2/libarchive/archive_entry_sparse.c +156 -0
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_entry_stat.c +6 -6
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_entry_strmode.c +1 -1
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_entry_xattr.c +4 -6
- data/ext/libarchive-3.6.2/libarchive/archive_getdate.c +1165 -0
- data/ext/libarchive-3.6.2/libarchive/archive_getdate.h +39 -0
- data/ext/libarchive-3.6.2/libarchive/archive_hmac.c +334 -0
- data/ext/libarchive-3.6.2/libarchive/archive_hmac_private.h +117 -0
- data/ext/libarchive-3.6.2/libarchive/archive_match.c +1875 -0
- data/ext/libarchive-3.6.2/libarchive/archive_openssl_evp_private.h +53 -0
- data/ext/libarchive-3.6.2/libarchive/archive_openssl_hmac_private.h +54 -0
- data/ext/libarchive-3.6.2/libarchive/archive_options.c +218 -0
- data/ext/libarchive-3.6.2/libarchive/archive_options_private.h +51 -0
- data/ext/libarchive-3.6.2/libarchive/archive_pack_dev.c +337 -0
- data/ext/libarchive-3.6.2/libarchive/archive_pack_dev.h +49 -0
- data/ext/libarchive-3.6.2/libarchive/archive_pathmatch.c +463 -0
- data/ext/libarchive-3.6.2/libarchive/archive_pathmatch.h +52 -0
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_platform.h +77 -9
- data/ext/libarchive-3.6.2/libarchive/archive_platform_acl.h +55 -0
- data/ext/libarchive-3.6.2/libarchive/archive_platform_xattr.h +47 -0
- data/ext/libarchive-3.6.2/libarchive/archive_ppmd7.c +1168 -0
- data/ext/libarchive-3.6.2/libarchive/archive_ppmd7_private.h +119 -0
- data/ext/libarchive-3.6.2/libarchive/archive_ppmd8.c +1287 -0
- data/ext/libarchive-3.6.2/libarchive/archive_ppmd8_private.h +148 -0
- data/ext/libarchive-3.6.2/libarchive/archive_ppmd_private.h +151 -0
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_private.h +74 -18
- data/ext/libarchive-3.6.2/libarchive/archive_random.c +272 -0
- data/ext/libarchive-3.6.2/libarchive/archive_random_private.h +36 -0
- data/ext/libarchive-3.6.2/libarchive/archive_rb.c +709 -0
- data/ext/libarchive-3.6.2/libarchive/archive_rb.h +113 -0
- data/ext/libarchive-3.6.2/libarchive/archive_read.c +1756 -0
- data/ext/libarchive-3.6.2/libarchive/archive_read_add_passphrase.c +190 -0
- data/ext/libarchive-3.6.2/libarchive/archive_read_append_filter.c +204 -0
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_read_data_into_fd.c +64 -18
- data/ext/libarchive-3.6.2/libarchive/archive_read_disk_entry_from_file.c +1086 -0
- data/ext/libarchive-3.6.2/libarchive/archive_read_disk_posix.c +2732 -0
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_read_disk_private.h +40 -4
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_read_disk_set_standard_lookup.c +21 -11
- data/ext/libarchive-3.6.2/libarchive/archive_read_disk_windows.c +2479 -0
- data/ext/libarchive-3.6.2/libarchive/archive_read_extract.c +60 -0
- data/ext/{libarchive-2.8.4/libarchive/archive_read_extract.c → libarchive-3.6.2/libarchive/archive_read_extract2.c} +34 -61
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_read_open_fd.c +70 -49
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_read_open_file.c +38 -23
- data/ext/libarchive-3.6.2/libarchive/archive_read_open_filename.c +586 -0
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_read_open_memory.c +58 -28
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_read_private.h +127 -59
- data/ext/libarchive-3.6.2/libarchive/archive_read_set_format.c +117 -0
- data/ext/libarchive-3.6.2/libarchive/archive_read_set_options.c +133 -0
- data/ext/{libarchive-2.8.4/libarchive/archive_read_support_compression_all.c → libarchive-3.6.2/libarchive/archive_read_support_filter_all.c} +35 -10
- data/ext/libarchive-3.6.2/libarchive/archive_read_support_filter_by_code.c +83 -0
- data/ext/{libarchive-2.8.4/libarchive/archive_read_support_compression_bzip2.c → libarchive-3.6.2/libarchive/archive_read_support_filter_bzip2.c} +38 -26
- data/ext/{libarchive-2.8.4/libarchive/archive_read_support_compression_compress.c → libarchive-3.6.2/libarchive/archive_read_support_filter_compress.c} +52 -44
- data/ext/libarchive-3.6.2/libarchive/archive_read_support_filter_grzip.c +112 -0
- data/ext/{libarchive-2.8.4/libarchive/archive_read_support_compression_gzip.c → libarchive-3.6.2/libarchive/archive_read_support_filter_gzip.c} +108 -37
- data/ext/libarchive-3.6.2/libarchive/archive_read_support_filter_lrzip.c +122 -0
- data/ext/libarchive-3.6.2/libarchive/archive_read_support_filter_lz4.c +742 -0
- data/ext/libarchive-3.6.2/libarchive/archive_read_support_filter_lzop.c +499 -0
- data/ext/{libarchive-2.8.4/libarchive/archive_read_support_compression_none.c → libarchive-3.6.2/libarchive/archive_read_support_filter_none.c} +15 -3
- data/ext/{libarchive-2.8.4/libarchive/archive_read_support_compression_program.c → libarchive-3.6.2/libarchive/archive_read_support_filter_program.c} +114 -77
- data/ext/{libarchive-2.8.4/libarchive/archive_read_support_compression_rpm.c → libarchive-3.6.2/libarchive/archive_read_support_filter_rpm.c} +31 -31
- data/ext/{libarchive-2.8.4/libarchive/archive_read_support_compression_uu.c → libarchive-3.6.2/libarchive/archive_read_support_filter_uu.c} +141 -85
- data/ext/{libarchive-2.8.4/libarchive/archive_read_support_compression_xz.c → libarchive-3.6.2/libarchive/archive_read_support_filter_xz.c} +369 -284
- data/ext/libarchive-3.6.2/libarchive/archive_read_support_filter_zstd.c +297 -0
- data/ext/libarchive-3.6.2/libarchive/archive_read_support_format_7zip.c +3900 -0
- data/ext/libarchive-3.6.2/libarchive/archive_read_support_format_all.c +89 -0
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_read_support_format_ar.c +126 -72
- data/ext/libarchive-3.6.2/libarchive/archive_read_support_format_by_code.c +92 -0
- data/ext/libarchive-3.6.2/libarchive/archive_read_support_format_cab.c +3228 -0
- data/ext/libarchive-3.6.2/libarchive/archive_read_support_format_cpio.c +1104 -0
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_read_support_format_empty.c +14 -11
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_read_support_format_iso9660.c +990 -541
- data/ext/libarchive-3.6.2/libarchive/archive_read_support_format_lha.c +2916 -0
- data/ext/libarchive-3.6.2/libarchive/archive_read_support_format_mtree.c +2150 -0
- data/ext/libarchive-3.6.2/libarchive/archive_read_support_format_rar.c +3797 -0
- data/ext/libarchive-3.6.2/libarchive/archive_read_support_format_rar5.c +4251 -0
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_read_support_format_raw.c +38 -31
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_read_support_format_tar.c +1157 -629
- data/ext/libarchive-3.6.2/libarchive/archive_read_support_format_warc.c +848 -0
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_read_support_format_xar.c +439 -258
- data/ext/libarchive-3.6.2/libarchive/archive_read_support_format_zip.c +4270 -0
- data/ext/libarchive-3.6.2/libarchive/archive_string.c +4240 -0
- data/ext/libarchive-3.6.2/libarchive/archive_string.h +243 -0
- data/ext/libarchive-3.6.2/libarchive/archive_string_composition.h +2292 -0
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_string_sprintf.c +44 -16
- data/ext/libarchive-3.6.2/libarchive/archive_util.c +655 -0
- data/ext/libarchive-3.6.2/libarchive/archive_version_details.c +151 -0
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_virtual.c +85 -16
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_windows.c +214 -541
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_windows.h +74 -106
- data/ext/libarchive-3.6.2/libarchive/archive_write.c +828 -0
- data/ext/libarchive-3.6.2/libarchive/archive_write_add_filter.c +72 -0
- data/ext/libarchive-3.6.2/libarchive/archive_write_add_filter_b64encode.c +304 -0
- data/ext/libarchive-3.6.2/libarchive/archive_write_add_filter_by_name.c +77 -0
- data/ext/libarchive-3.6.2/libarchive/archive_write_add_filter_bzip2.c +401 -0
- data/ext/{libarchive-2.8.4/libarchive/archive_write_set_compression_compress.c → libarchive-3.6.2/libarchive/archive_write_add_filter_compress.c} +86 -131
- data/ext/libarchive-3.6.2/libarchive/archive_write_add_filter_grzip.c +135 -0
- data/ext/libarchive-3.6.2/libarchive/archive_write_add_filter_gzip.c +442 -0
- data/ext/libarchive-3.6.2/libarchive/archive_write_add_filter_lrzip.c +197 -0
- data/ext/libarchive-3.6.2/libarchive/archive_write_add_filter_lz4.c +700 -0
- data/ext/libarchive-3.6.2/libarchive/archive_write_add_filter_lzop.c +478 -0
- data/ext/{libarchive-2.8.4/libarchive/archive_read_support_format_all.c → libarchive-3.6.2/libarchive/archive_write_add_filter_none.c} +11 -11
- data/ext/libarchive-3.6.2/libarchive/archive_write_add_filter_program.c +391 -0
- data/ext/libarchive-3.6.2/libarchive/archive_write_add_filter_uuencode.c +295 -0
- data/ext/libarchive-3.6.2/libarchive/archive_write_add_filter_xz.c +545 -0
- data/ext/libarchive-3.6.2/libarchive/archive_write_add_filter_zstd.c +418 -0
- data/ext/libarchive-3.6.2/libarchive/archive_write_disk_posix.c +4711 -0
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_write_disk_private.h +9 -2
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_write_disk_set_standard_lookup.c +30 -29
- data/ext/libarchive-3.6.2/libarchive/archive_write_disk_windows.c +2842 -0
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_write_open_fd.c +15 -10
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_write_open_file.c +15 -9
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_write_open_filename.c +128 -20
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_write_open_memory.c +7 -18
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_write_private.h +72 -29
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_write_set_format.c +56 -3
- data/ext/libarchive-3.6.2/libarchive/archive_write_set_format_7zip.c +2322 -0
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_write_set_format_ar.c +54 -34
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_write_set_format_by_name.c +20 -2
- data/ext/libarchive-3.6.2/libarchive/archive_write_set_format_cpio.c +11 -0
- data/ext/libarchive-3.6.2/libarchive/archive_write_set_format_cpio_binary.c +610 -0
- data/ext/libarchive-3.6.2/libarchive/archive_write_set_format_cpio_newc.c +457 -0
- data/ext/libarchive-3.6.2/libarchive/archive_write_set_format_cpio_odc.c +500 -0
- data/ext/libarchive-3.6.2/libarchive/archive_write_set_format_filter_by_ext.c +142 -0
- data/ext/libarchive-3.6.2/libarchive/archive_write_set_format_gnutar.c +755 -0
- data/ext/libarchive-3.6.2/libarchive/archive_write_set_format_iso9660.c +8165 -0
- data/ext/libarchive-3.6.2/libarchive/archive_write_set_format_mtree.c +2217 -0
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_write_set_format_pax.c +1049 -387
- data/ext/libarchive-3.6.2/libarchive/archive_write_set_format_private.h +42 -0
- data/ext/libarchive-3.6.2/libarchive/archive_write_set_format_raw.c +125 -0
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_write_set_format_shar.c +62 -47
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/archive_write_set_format_ustar.c +279 -108
- data/ext/libarchive-3.6.2/libarchive/archive_write_set_format_v7tar.c +638 -0
- data/ext/libarchive-3.6.2/libarchive/archive_write_set_format_warc.c +453 -0
- data/ext/libarchive-3.6.2/libarchive/archive_write_set_format_xar.c +3259 -0
- data/ext/libarchive-3.6.2/libarchive/archive_write_set_format_zip.c +1704 -0
- data/ext/libarchive-3.6.2/libarchive/archive_write_set_options.c +130 -0
- data/ext/libarchive-3.6.2/libarchive/archive_write_set_passphrase.c +95 -0
- data/ext/libarchive-3.6.2/libarchive/archive_xxhash.h +48 -0
- data/ext/libarchive-3.6.2/libarchive/config_freebsd.h +271 -0
- data/ext/{libarchive-2.8.4 → libarchive-3.6.2}/libarchive/filter_fork.h +10 -5
- data/ext/{libarchive-2.8.4/libarchive/filter_fork.c → libarchive-3.6.2/libarchive/filter_fork_posix.c} +98 -19
- data/ext/libarchive-3.6.2/libarchive/filter_fork_windows.c +236 -0
- data/ext/libarchive-3.6.2/libarchive/xxhash.c +525 -0
- data/ext/libarchive-static-makefile +144 -80
- data/ext/libarchive-static-wrapper-makefile +1 -1
- data/ext/zlib-1.2.13/Makefile.in +404 -0
- data/ext/{zlib-1.2.5 → zlib-1.2.13}/adler32.c +51 -34
- data/ext/{zlib-1.2.5 → zlib-1.2.13}/compress.c +27 -21
- data/ext/zlib-1.2.13/configure +922 -0
- data/ext/zlib-1.2.13/crc32.c +1125 -0
- data/ext/zlib-1.2.13/crc32.h +9446 -0
- data/ext/{zlib-1.2.5 → zlib-1.2.13}/deflate.c +842 -459
- data/ext/{zlib-1.2.5 → zlib-1.2.13}/deflate.h +37 -33
- data/ext/{zlib-1.2.5 → zlib-1.2.13}/gzclose.c +0 -0
- data/ext/{zlib-1.2.5 → zlib-1.2.13}/gzguts.h +103 -16
- data/ext/{zlib-1.2.5 → zlib-1.2.13}/gzlib.c +155 -53
- data/ext/zlib-1.2.13/gzread.c +650 -0
- data/ext/zlib-1.2.13/gzwrite.c +677 -0
- data/ext/{zlib-1.2.5 → zlib-1.2.13}/infback.c +24 -12
- data/ext/{zlib-1.2.5 → zlib-1.2.13}/inffast.c +49 -66
- data/ext/{zlib-1.2.5 → zlib-1.2.13}/inffast.h +0 -0
- data/ext/{zlib-1.2.5 → zlib-1.2.13}/inffixed.h +3 -3
- data/ext/{zlib-1.2.5 → zlib-1.2.13}/inflate.c +209 -94
- data/ext/{zlib-1.2.5 → zlib-1.2.13}/inflate.h +9 -5
- data/ext/{zlib-1.2.5 → zlib-1.2.13}/inftrees.c +24 -50
- data/ext/{zlib-1.2.5 → zlib-1.2.13}/inftrees.h +1 -1
- data/ext/{zlib-1.2.5 → zlib-1.2.13}/trees.c +135 -198
- data/ext/{zlib-1.2.5 → zlib-1.2.13}/trees.h +0 -0
- data/ext/zlib-1.2.13/uncompr.c +93 -0
- data/ext/{zlib-1.2.5 → zlib-1.2.13}/zconf.h +182 -63
- data/ext/{zlib-1.2.5 → zlib-1.2.13}/zlib.h +617 -295
- data/ext/{zlib-1.2.5 → zlib-1.2.13}/zutil.c +50 -41
- data/ext/{zlib-1.2.5 → zlib-1.2.13}/zutil.h +83 -82
- metadata +244 -137
- data/ext/libarchive-0.1.1/libarchive.c +0 -1762
- data/ext/libarchive-2.8.4/Makefile.in +0 -7076
- data/ext/libarchive-2.8.4/build/autoconf/compile +0 -143
- data/ext/libarchive-2.8.4/build/autoconf/config.guess +0 -1502
- data/ext/libarchive-2.8.4/build/autoconf/config.sub +0 -1708
- data/ext/libarchive-2.8.4/build/autoconf/depcomp +0 -630
- data/ext/libarchive-2.8.4/build/autoconf/install-sh +0 -291
- data/ext/libarchive-2.8.4/build/autoconf/ltmain.sh +0 -8406
- data/ext/libarchive-2.8.4/build/autoconf/missing +0 -376
- data/ext/libarchive-2.8.4/config.h.in +0 -772
- data/ext/libarchive-2.8.4/configure +0 -17916
- data/ext/libarchive-2.8.4/libarchive/archive.h +0 -741
- data/ext/libarchive-2.8.4/libarchive/archive_entry.c +0 -2202
- data/ext/libarchive-2.8.4/libarchive/archive_hash.h +0 -281
- data/ext/libarchive-2.8.4/libarchive/archive_read.c +0 -1249
- data/ext/libarchive-2.8.4/libarchive/archive_read_disk.c +0 -198
- data/ext/libarchive-2.8.4/libarchive/archive_read_disk_entry_from_file.c +0 -570
- data/ext/libarchive-2.8.4/libarchive/archive_read_open_filename.c +0 -272
- data/ext/libarchive-2.8.4/libarchive/archive_read_support_format_cpio.c +0 -777
- data/ext/libarchive-2.8.4/libarchive/archive_read_support_format_mtree.c +0 -1304
- data/ext/libarchive-2.8.4/libarchive/archive_read_support_format_zip.c +0 -903
- data/ext/libarchive-2.8.4/libarchive/archive_string.c +0 -453
- data/ext/libarchive-2.8.4/libarchive/archive_string.h +0 -148
- data/ext/libarchive-2.8.4/libarchive/archive_util.c +0 -391
- data/ext/libarchive-2.8.4/libarchive/archive_write.c +0 -466
- data/ext/libarchive-2.8.4/libarchive/archive_write_disk.c +0 -2628
- data/ext/libarchive-2.8.4/libarchive/archive_write_set_compression_bzip2.c +0 -408
- data/ext/libarchive-2.8.4/libarchive/archive_write_set_compression_gzip.c +0 -477
- data/ext/libarchive-2.8.4/libarchive/archive_write_set_compression_none.c +0 -257
- data/ext/libarchive-2.8.4/libarchive/archive_write_set_compression_program.c +0 -347
- data/ext/libarchive-2.8.4/libarchive/archive_write_set_compression_xz.c +0 -438
- data/ext/libarchive-2.8.4/libarchive/archive_write_set_format_cpio.c +0 -344
- data/ext/libarchive-2.8.4/libarchive/archive_write_set_format_cpio_newc.c +0 -295
- data/ext/libarchive-2.8.4/libarchive/archive_write_set_format_mtree.c +0 -1050
- data/ext/libarchive-2.8.4/libarchive/archive_write_set_format_zip.c +0 -667
- data/ext/libarchive-2.8.4/libarchive/config_freebsd.h +0 -154
- data/ext/libarchive-2.8.4/libarchive/filter_fork_windows.c +0 -113
- data/ext/zlib-1.2.5/Makefile.in +0 -257
- data/ext/zlib-1.2.5/configure +0 -596
- data/ext/zlib-1.2.5/crc32.c +0 -442
- data/ext/zlib-1.2.5/crc32.h +0 -441
- data/ext/zlib-1.2.5/example.c +0 -565
- data/ext/zlib-1.2.5/gzread.c +0 -653
- data/ext/zlib-1.2.5/gzwrite.c +0 -531
- data/ext/zlib-1.2.5/minigzip.c +0 -440
- data/ext/zlib-1.2.5/uncompr.c +0 -59
@@ -0,0 +1,848 @@
|
|
1
|
+
/*-
|
2
|
+
* Copyright (c) 2014 Sebastian Freundt
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* Redistribution and use in source and binary forms, with or without
|
6
|
+
* modification, are permitted provided that the following conditions
|
7
|
+
* are met:
|
8
|
+
* 1. Redistributions of source code must retain the above copyright
|
9
|
+
* notice, this list of conditions and the following disclaimer.
|
10
|
+
* 2. Redistributions in binary form must reproduce the above copyright
|
11
|
+
* notice, this list of conditions and the following disclaimer in the
|
12
|
+
* documentation and/or other materials provided with the distribution.
|
13
|
+
*
|
14
|
+
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR(S) ``AS IS'' AND ANY EXPRESS OR
|
15
|
+
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
16
|
+
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
17
|
+
* IN NO EVENT SHALL THE AUTHOR(S) BE LIABLE FOR ANY DIRECT, INDIRECT,
|
18
|
+
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
19
|
+
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
20
|
+
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
21
|
+
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
22
|
+
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
23
|
+
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
24
|
+
*/
|
25
|
+
|
26
|
+
#include "archive_platform.h"
|
27
|
+
__FBSDID("$FreeBSD$");
|
28
|
+
|
29
|
+
/**
|
30
|
+
* WARC is standardised by ISO TC46/SC4/WG12 and currently available as
|
31
|
+
* ISO 28500:2009.
|
32
|
+
* For the purposes of this file we used the final draft from:
|
33
|
+
* http://bibnum.bnf.fr/warc/WARC_ISO_28500_version1_latestdraft.pdf
|
34
|
+
*
|
35
|
+
* Todo:
|
36
|
+
* [ ] real-world warcs can contain resources at endpoints ending in /
|
37
|
+
* e.g. http://bibnum.bnf.fr/warc/
|
38
|
+
* if you're lucky their response contains a Content-Location: header
|
39
|
+
* pointing to a unix-compliant filename, in the example above it's
|
40
|
+
* Content-Location: http://bibnum.bnf.fr/warc/index.html
|
41
|
+
* however, that's not mandated and github for example doesn't follow
|
42
|
+
* this convention.
|
43
|
+
* We need a set of archive options to control what to do with
|
44
|
+
* entries like these, at the moment care is taken to skip them.
|
45
|
+
*
|
46
|
+
**/
|
47
|
+
|
48
|
+
#ifdef HAVE_SYS_STAT_H
|
49
|
+
#include <sys/stat.h>
|
50
|
+
#endif
|
51
|
+
#ifdef HAVE_ERRNO_H
|
52
|
+
#include <errno.h>
|
53
|
+
#endif
|
54
|
+
#ifdef HAVE_STDLIB_H
|
55
|
+
#include <stdlib.h>
|
56
|
+
#endif
|
57
|
+
#ifdef HAVE_STRING_H
|
58
|
+
#include <string.h>
|
59
|
+
#endif
|
60
|
+
#ifdef HAVE_LIMITS_H
|
61
|
+
#include <limits.h>
|
62
|
+
#endif
|
63
|
+
#ifdef HAVE_CTYPE_H
|
64
|
+
#include <ctype.h>
|
65
|
+
#endif
|
66
|
+
#ifdef HAVE_TIME_H
|
67
|
+
#include <time.h>
|
68
|
+
#endif
|
69
|
+
|
70
|
+
#include "archive.h"
|
71
|
+
#include "archive_entry.h"
|
72
|
+
#include "archive_private.h"
|
73
|
+
#include "archive_read_private.h"
|
74
|
+
|
75
|
+
typedef enum {
|
76
|
+
WT_NONE,
|
77
|
+
/* warcinfo */
|
78
|
+
WT_INFO,
|
79
|
+
/* metadata */
|
80
|
+
WT_META,
|
81
|
+
/* resource */
|
82
|
+
WT_RSRC,
|
83
|
+
/* request, unsupported */
|
84
|
+
WT_REQ,
|
85
|
+
/* response, unsupported */
|
86
|
+
WT_RSP,
|
87
|
+
/* revisit, unsupported */
|
88
|
+
WT_RVIS,
|
89
|
+
/* conversion, unsupported */
|
90
|
+
WT_CONV,
|
91
|
+
/* continuation, unsupported at the moment */
|
92
|
+
WT_CONT,
|
93
|
+
/* invalid type */
|
94
|
+
LAST_WT
|
95
|
+
} warc_type_t;
|
96
|
+
|
97
|
+
typedef struct {
|
98
|
+
size_t len;
|
99
|
+
const char *str;
|
100
|
+
} warc_string_t;
|
101
|
+
|
102
|
+
typedef struct {
|
103
|
+
size_t len;
|
104
|
+
char *str;
|
105
|
+
} warc_strbuf_t;
|
106
|
+
|
107
|
+
struct warc_s {
|
108
|
+
/* content length ahead */
|
109
|
+
size_t cntlen;
|
110
|
+
/* and how much we've processed so far */
|
111
|
+
size_t cntoff;
|
112
|
+
/* and how much we need to consume between calls */
|
113
|
+
size_t unconsumed;
|
114
|
+
|
115
|
+
/* string pool */
|
116
|
+
warc_strbuf_t pool;
|
117
|
+
/* previous version */
|
118
|
+
unsigned int pver;
|
119
|
+
/* stringified format name */
|
120
|
+
struct archive_string sver;
|
121
|
+
};
|
122
|
+
|
123
|
+
static int _warc_bid(struct archive_read *a, int);
|
124
|
+
static int _warc_cleanup(struct archive_read *a);
|
125
|
+
static int _warc_read(struct archive_read*, const void**, size_t*, int64_t*);
|
126
|
+
static int _warc_skip(struct archive_read *a);
|
127
|
+
static int _warc_rdhdr(struct archive_read *a, struct archive_entry *e);
|
128
|
+
|
129
|
+
/* private routines */
|
130
|
+
static unsigned int _warc_rdver(const char *buf, size_t bsz);
|
131
|
+
static unsigned int _warc_rdtyp(const char *buf, size_t bsz);
|
132
|
+
static warc_string_t _warc_rduri(const char *buf, size_t bsz);
|
133
|
+
static ssize_t _warc_rdlen(const char *buf, size_t bsz);
|
134
|
+
static time_t _warc_rdrtm(const char *buf, size_t bsz);
|
135
|
+
static time_t _warc_rdmtm(const char *buf, size_t bsz);
|
136
|
+
static const char *_warc_find_eoh(const char *buf, size_t bsz);
|
137
|
+
static const char *_warc_find_eol(const char *buf, size_t bsz);
|
138
|
+
|
139
|
+
int
|
140
|
+
archive_read_support_format_warc(struct archive *_a)
|
141
|
+
{
|
142
|
+
struct archive_read *a = (struct archive_read *)_a;
|
143
|
+
struct warc_s *w;
|
144
|
+
int r;
|
145
|
+
|
146
|
+
archive_check_magic(_a, ARCHIVE_READ_MAGIC,
|
147
|
+
ARCHIVE_STATE_NEW, "archive_read_support_format_warc");
|
148
|
+
|
149
|
+
if ((w = calloc(1, sizeof(*w))) == NULL) {
|
150
|
+
archive_set_error(&a->archive, ENOMEM,
|
151
|
+
"Can't allocate warc data");
|
152
|
+
return (ARCHIVE_FATAL);
|
153
|
+
}
|
154
|
+
|
155
|
+
r = __archive_read_register_format(
|
156
|
+
a, w, "warc",
|
157
|
+
_warc_bid, NULL, _warc_rdhdr, _warc_read,
|
158
|
+
_warc_skip, NULL, _warc_cleanup, NULL, NULL);
|
159
|
+
|
160
|
+
if (r != ARCHIVE_OK) {
|
161
|
+
free(w);
|
162
|
+
return (r);
|
163
|
+
}
|
164
|
+
return (ARCHIVE_OK);
|
165
|
+
}
|
166
|
+
|
167
|
+
static int
|
168
|
+
_warc_cleanup(struct archive_read *a)
|
169
|
+
{
|
170
|
+
struct warc_s *w = a->format->data;
|
171
|
+
|
172
|
+
if (w->pool.len > 0U) {
|
173
|
+
free(w->pool.str);
|
174
|
+
}
|
175
|
+
archive_string_free(&w->sver);
|
176
|
+
free(w);
|
177
|
+
a->format->data = NULL;
|
178
|
+
return (ARCHIVE_OK);
|
179
|
+
}
|
180
|
+
|
181
|
+
static int
|
182
|
+
_warc_bid(struct archive_read *a, int best_bid)
|
183
|
+
{
|
184
|
+
const char *hdr;
|
185
|
+
ssize_t nrd;
|
186
|
+
unsigned int ver;
|
187
|
+
|
188
|
+
(void)best_bid; /* UNUSED */
|
189
|
+
|
190
|
+
/* check first line of file, it should be a record already */
|
191
|
+
if ((hdr = __archive_read_ahead(a, 12U, &nrd)) == NULL) {
|
192
|
+
/* no idea what to do */
|
193
|
+
return -1;
|
194
|
+
} else if (nrd < 12) {
|
195
|
+
/* nah, not for us, our magic cookie is at least 12 bytes */
|
196
|
+
return -1;
|
197
|
+
}
|
198
|
+
|
199
|
+
/* otherwise snarf the record's version number */
|
200
|
+
ver = _warc_rdver(hdr, nrd);
|
201
|
+
if (ver < 1200U || ver > 10000U) {
|
202
|
+
/* we only support WARC 0.12 to 1.0 */
|
203
|
+
return -1;
|
204
|
+
}
|
205
|
+
|
206
|
+
/* otherwise be confident */
|
207
|
+
return (64);
|
208
|
+
}
|
209
|
+
|
210
|
+
static int
|
211
|
+
_warc_rdhdr(struct archive_read *a, struct archive_entry *entry)
|
212
|
+
{
|
213
|
+
#define HDR_PROBE_LEN (12U)
|
214
|
+
struct warc_s *w = a->format->data;
|
215
|
+
unsigned int ver;
|
216
|
+
const char *buf;
|
217
|
+
ssize_t nrd;
|
218
|
+
const char *eoh;
|
219
|
+
/* for the file name, saves some strndup()'ing */
|
220
|
+
warc_string_t fnam;
|
221
|
+
/* warc record type, not that we really use it a lot */
|
222
|
+
warc_type_t ftyp;
|
223
|
+
/* content-length+error monad */
|
224
|
+
ssize_t cntlen;
|
225
|
+
/* record time is the WARC-Date time we reinterpret it as ctime */
|
226
|
+
time_t rtime;
|
227
|
+
/* mtime is the Last-Modified time which will be the entry's mtime */
|
228
|
+
time_t mtime;
|
229
|
+
|
230
|
+
start_over:
|
231
|
+
/* just use read_ahead() they keep track of unconsumed
|
232
|
+
* bits and bobs for us; no need to put an extra shift in
|
233
|
+
* and reproduce that functionality here */
|
234
|
+
buf = __archive_read_ahead(a, HDR_PROBE_LEN, &nrd);
|
235
|
+
|
236
|
+
if (nrd < 0) {
|
237
|
+
/* no good */
|
238
|
+
archive_set_error(
|
239
|
+
&a->archive, ARCHIVE_ERRNO_MISC,
|
240
|
+
"Bad record header");
|
241
|
+
return (ARCHIVE_FATAL);
|
242
|
+
} else if (buf == NULL) {
|
243
|
+
/* there should be room for at least WARC/bla\r\n
|
244
|
+
* must be EOF therefore */
|
245
|
+
return (ARCHIVE_EOF);
|
246
|
+
}
|
247
|
+
/* looks good so far, try and find the end of the header now */
|
248
|
+
eoh = _warc_find_eoh(buf, nrd);
|
249
|
+
if (eoh == NULL) {
|
250
|
+
/* still no good, the header end might be beyond the
|
251
|
+
* probe we've requested, but then again who'd cram
|
252
|
+
* so much stuff into the header *and* be 28500-compliant */
|
253
|
+
archive_set_error(
|
254
|
+
&a->archive, ARCHIVE_ERRNO_MISC,
|
255
|
+
"Bad record header");
|
256
|
+
return (ARCHIVE_FATAL);
|
257
|
+
}
|
258
|
+
ver = _warc_rdver(buf, eoh - buf);
|
259
|
+
/* we currently support WARC 0.12 to 1.0 */
|
260
|
+
if (ver == 0U) {
|
261
|
+
archive_set_error(
|
262
|
+
&a->archive, ARCHIVE_ERRNO_MISC,
|
263
|
+
"Invalid record version");
|
264
|
+
return (ARCHIVE_FATAL);
|
265
|
+
} else if (ver < 1200U || ver > 10000U) {
|
266
|
+
archive_set_error(
|
267
|
+
&a->archive, ARCHIVE_ERRNO_MISC,
|
268
|
+
"Unsupported record version: %u.%u",
|
269
|
+
ver / 10000, (ver % 10000) / 100);
|
270
|
+
return (ARCHIVE_FATAL);
|
271
|
+
}
|
272
|
+
cntlen = _warc_rdlen(buf, eoh - buf);
|
273
|
+
if (cntlen < 0) {
|
274
|
+
/* nightmare! the specs say content-length is mandatory
|
275
|
+
* so I don't feel overly bad stopping the reader here */
|
276
|
+
archive_set_error(
|
277
|
+
&a->archive, EINVAL,
|
278
|
+
"Bad content length");
|
279
|
+
return (ARCHIVE_FATAL);
|
280
|
+
}
|
281
|
+
rtime = _warc_rdrtm(buf, eoh - buf);
|
282
|
+
if (rtime == (time_t)-1) {
|
283
|
+
/* record time is mandatory as per WARC/1.0,
|
284
|
+
* so just barf here, fast and loud */
|
285
|
+
archive_set_error(
|
286
|
+
&a->archive, EINVAL,
|
287
|
+
"Bad record time");
|
288
|
+
return (ARCHIVE_FATAL);
|
289
|
+
}
|
290
|
+
|
291
|
+
/* let the world know we're a WARC archive */
|
292
|
+
a->archive.archive_format = ARCHIVE_FORMAT_WARC;
|
293
|
+
if (ver != w->pver) {
|
294
|
+
/* stringify this entry's version */
|
295
|
+
archive_string_sprintf(&w->sver,
|
296
|
+
"WARC/%u.%u", ver / 10000, (ver % 10000) / 100);
|
297
|
+
/* remember the version */
|
298
|
+
w->pver = ver;
|
299
|
+
}
|
300
|
+
/* start off with the type */
|
301
|
+
ftyp = _warc_rdtyp(buf, eoh - buf);
|
302
|
+
/* and let future calls know about the content */
|
303
|
+
w->cntlen = cntlen;
|
304
|
+
w->cntoff = 0U;
|
305
|
+
mtime = 0;/* Avoid compiling error on some platform. */
|
306
|
+
|
307
|
+
switch (ftyp) {
|
308
|
+
case WT_RSRC:
|
309
|
+
case WT_RSP:
|
310
|
+
/* only try and read the filename in the cases that are
|
311
|
+
* guaranteed to have one */
|
312
|
+
fnam = _warc_rduri(buf, eoh - buf);
|
313
|
+
/* check the last character in the URI to avoid creating
|
314
|
+
* directory endpoints as files, see Todo above */
|
315
|
+
if (fnam.len == 0 || fnam.str[fnam.len - 1] == '/') {
|
316
|
+
/* break here for now */
|
317
|
+
fnam.len = 0U;
|
318
|
+
fnam.str = NULL;
|
319
|
+
break;
|
320
|
+
}
|
321
|
+
/* bang to our string pool, so we save a
|
322
|
+
* malloc()+free() roundtrip */
|
323
|
+
if (fnam.len + 1U > w->pool.len) {
|
324
|
+
w->pool.len = ((fnam.len + 64U) / 64U) * 64U;
|
325
|
+
w->pool.str = realloc(w->pool.str, w->pool.len);
|
326
|
+
}
|
327
|
+
memcpy(w->pool.str, fnam.str, fnam.len);
|
328
|
+
w->pool.str[fnam.len] = '\0';
|
329
|
+
/* let no one else know about the pool, it's a secret, shhh */
|
330
|
+
fnam.str = w->pool.str;
|
331
|
+
|
332
|
+
/* snarf mtime or deduce from rtime
|
333
|
+
* this is a custom header added by our writer, it's quite
|
334
|
+
* hard to believe anyone else would go through with it
|
335
|
+
* (apart from being part of some http responses of course) */
|
336
|
+
if ((mtime = _warc_rdmtm(buf, eoh - buf)) == (time_t)-1) {
|
337
|
+
mtime = rtime;
|
338
|
+
}
|
339
|
+
break;
|
340
|
+
case WT_NONE:
|
341
|
+
case WT_INFO:
|
342
|
+
case WT_META:
|
343
|
+
case WT_REQ:
|
344
|
+
case WT_RVIS:
|
345
|
+
case WT_CONV:
|
346
|
+
case WT_CONT:
|
347
|
+
case LAST_WT:
|
348
|
+
default:
|
349
|
+
fnam.len = 0U;
|
350
|
+
fnam.str = NULL;
|
351
|
+
break;
|
352
|
+
}
|
353
|
+
|
354
|
+
/* now eat some of those delicious buffer bits */
|
355
|
+
__archive_read_consume(a, eoh - buf);
|
356
|
+
|
357
|
+
switch (ftyp) {
|
358
|
+
case WT_RSRC:
|
359
|
+
case WT_RSP:
|
360
|
+
if (fnam.len > 0U) {
|
361
|
+
/* populate entry object */
|
362
|
+
archive_entry_set_filetype(entry, AE_IFREG);
|
363
|
+
archive_entry_copy_pathname(entry, fnam.str);
|
364
|
+
archive_entry_set_size(entry, cntlen);
|
365
|
+
archive_entry_set_perm(entry, 0644);
|
366
|
+
/* rtime is the new ctime, mtime stays mtime */
|
367
|
+
archive_entry_set_ctime(entry, rtime, 0L);
|
368
|
+
archive_entry_set_mtime(entry, mtime, 0L);
|
369
|
+
break;
|
370
|
+
}
|
371
|
+
/* FALLTHROUGH */
|
372
|
+
case WT_NONE:
|
373
|
+
case WT_INFO:
|
374
|
+
case WT_META:
|
375
|
+
case WT_REQ:
|
376
|
+
case WT_RVIS:
|
377
|
+
case WT_CONV:
|
378
|
+
case WT_CONT:
|
379
|
+
case LAST_WT:
|
380
|
+
default:
|
381
|
+
/* consume the content and start over */
|
382
|
+
_warc_skip(a);
|
383
|
+
goto start_over;
|
384
|
+
}
|
385
|
+
return (ARCHIVE_OK);
|
386
|
+
}
|
387
|
+
|
388
|
+
static int
|
389
|
+
_warc_read(struct archive_read *a, const void **buf, size_t *bsz, int64_t *off)
|
390
|
+
{
|
391
|
+
struct warc_s *w = a->format->data;
|
392
|
+
const char *rab;
|
393
|
+
ssize_t nrd;
|
394
|
+
|
395
|
+
if (w->cntoff >= w->cntlen) {
|
396
|
+
eof:
|
397
|
+
/* it's our lucky day, no work, we can leave early */
|
398
|
+
*buf = NULL;
|
399
|
+
*bsz = 0U;
|
400
|
+
*off = w->cntoff + 4U/*for \r\n\r\n separator*/;
|
401
|
+
w->unconsumed = 0U;
|
402
|
+
return (ARCHIVE_EOF);
|
403
|
+
}
|
404
|
+
|
405
|
+
if (w->unconsumed) {
|
406
|
+
__archive_read_consume(a, w->unconsumed);
|
407
|
+
w->unconsumed = 0U;
|
408
|
+
}
|
409
|
+
|
410
|
+
rab = __archive_read_ahead(a, 1U, &nrd);
|
411
|
+
if (nrd < 0) {
|
412
|
+
*bsz = 0U;
|
413
|
+
/* big catastrophe */
|
414
|
+
return (int)nrd;
|
415
|
+
} else if (nrd == 0) {
|
416
|
+
goto eof;
|
417
|
+
} else if ((size_t)nrd > w->cntlen - w->cntoff) {
|
418
|
+
/* clamp to content-length */
|
419
|
+
nrd = w->cntlen - w->cntoff;
|
420
|
+
}
|
421
|
+
*off = w->cntoff;
|
422
|
+
*bsz = nrd;
|
423
|
+
*buf = rab;
|
424
|
+
|
425
|
+
w->cntoff += nrd;
|
426
|
+
w->unconsumed = (size_t)nrd;
|
427
|
+
return (ARCHIVE_OK);
|
428
|
+
}
|
429
|
+
|
430
|
+
static int
|
431
|
+
_warc_skip(struct archive_read *a)
|
432
|
+
{
|
433
|
+
struct warc_s *w = a->format->data;
|
434
|
+
|
435
|
+
__archive_read_consume(a, w->cntlen + 4U/*\r\n\r\n separator*/);
|
436
|
+
w->cntlen = 0U;
|
437
|
+
w->cntoff = 0U;
|
438
|
+
return (ARCHIVE_OK);
|
439
|
+
}
|
440
|
+
|
441
|
+
|
442
|
+
/* private routines */
|
443
|
+
static void*
|
444
|
+
deconst(const void *c)
|
445
|
+
{
|
446
|
+
return (void *)(uintptr_t)c;
|
447
|
+
}
|
448
|
+
|
449
|
+
static char*
|
450
|
+
xmemmem(const char *hay, const size_t haysize,
|
451
|
+
const char *needle, const size_t needlesize)
|
452
|
+
{
|
453
|
+
const char *const eoh = hay + haysize;
|
454
|
+
const char *const eon = needle + needlesize;
|
455
|
+
const char *hp;
|
456
|
+
const char *np;
|
457
|
+
const char *cand;
|
458
|
+
unsigned int hsum;
|
459
|
+
unsigned int nsum;
|
460
|
+
unsigned int eqp;
|
461
|
+
|
462
|
+
/* trivial checks first
|
463
|
+
* a 0-sized needle is defined to be found anywhere in haystack
|
464
|
+
* then run strchr() to find a candidate in HAYSTACK (i.e. a portion
|
465
|
+
* that happens to begin with *NEEDLE) */
|
466
|
+
if (needlesize == 0UL) {
|
467
|
+
return deconst(hay);
|
468
|
+
} else if ((hay = memchr(hay, *needle, haysize)) == NULL) {
|
469
|
+
/* trivial */
|
470
|
+
return NULL;
|
471
|
+
}
|
472
|
+
|
473
|
+
/* First characters of haystack and needle are the same now. Both are
|
474
|
+
* guaranteed to be at least one character long. Now computes the sum
|
475
|
+
* of characters values of needle together with the sum of the first
|
476
|
+
* needle_len characters of haystack. */
|
477
|
+
for (hp = hay + 1U, np = needle + 1U, hsum = *hay, nsum = *hay, eqp = 1U;
|
478
|
+
hp < eoh && np < eon;
|
479
|
+
hsum ^= *hp, nsum ^= *np, eqp &= *hp == *np, hp++, np++);
|
480
|
+
|
481
|
+
/* HP now references the (NEEDLESIZE + 1)-th character. */
|
482
|
+
if (np < eon) {
|
483
|
+
/* haystack is smaller than needle, :O */
|
484
|
+
return NULL;
|
485
|
+
} else if (eqp) {
|
486
|
+
/* found a match */
|
487
|
+
return deconst(hay);
|
488
|
+
}
|
489
|
+
|
490
|
+
/* now loop through the rest of haystack,
|
491
|
+
* updating the sum iteratively */
|
492
|
+
for (cand = hay; hp < eoh; hp++) {
|
493
|
+
hsum ^= *cand++;
|
494
|
+
hsum ^= *hp;
|
495
|
+
|
496
|
+
/* Since the sum of the characters is already known to be
|
497
|
+
* equal at that point, it is enough to check just NEEDLESIZE - 1
|
498
|
+
* characters for equality,
|
499
|
+
* also CAND is by design < HP, so no need for range checks */
|
500
|
+
if (hsum == nsum && memcmp(cand, needle, needlesize - 1U) == 0) {
|
501
|
+
return deconst(cand);
|
502
|
+
}
|
503
|
+
}
|
504
|
+
return NULL;
|
505
|
+
}
|
506
|
+
|
507
|
+
static int
|
508
|
+
strtoi_lim(const char *str, const char **ep, int llim, int ulim)
|
509
|
+
{
|
510
|
+
int res = 0;
|
511
|
+
const char *sp;
|
512
|
+
/* we keep track of the number of digits via rulim */
|
513
|
+
int rulim;
|
514
|
+
|
515
|
+
for (sp = str, rulim = ulim > 10 ? ulim : 10;
|
516
|
+
res * 10 <= ulim && rulim && *sp >= '0' && *sp <= '9';
|
517
|
+
sp++, rulim /= 10) {
|
518
|
+
res *= 10;
|
519
|
+
res += *sp - '0';
|
520
|
+
}
|
521
|
+
if (sp == str) {
|
522
|
+
res = -1;
|
523
|
+
} else if (res < llim || res > ulim) {
|
524
|
+
res = -2;
|
525
|
+
}
|
526
|
+
*ep = (const char*)sp;
|
527
|
+
return res;
|
528
|
+
}
|
529
|
+
|
530
|
+
static time_t
|
531
|
+
time_from_tm(struct tm *t)
|
532
|
+
{
|
533
|
+
#if HAVE_TIMEGM
|
534
|
+
/* Use platform timegm() if available. */
|
535
|
+
return (timegm(t));
|
536
|
+
#elif HAVE__MKGMTIME64
|
537
|
+
return (_mkgmtime64(t));
|
538
|
+
#else
|
539
|
+
/* Else use direct calculation using POSIX assumptions. */
|
540
|
+
/* First, fix up tm_yday based on the year/month/day. */
|
541
|
+
if (mktime(t) == (time_t)-1)
|
542
|
+
return ((time_t)-1);
|
543
|
+
/* Then we can compute timegm() from first principles. */
|
544
|
+
return (t->tm_sec
|
545
|
+
+ t->tm_min * 60
|
546
|
+
+ t->tm_hour * 3600
|
547
|
+
+ t->tm_yday * 86400
|
548
|
+
+ (t->tm_year - 70) * 31536000
|
549
|
+
+ ((t->tm_year - 69) / 4) * 86400
|
550
|
+
- ((t->tm_year - 1) / 100) * 86400
|
551
|
+
+ ((t->tm_year + 299) / 400) * 86400);
|
552
|
+
#endif
|
553
|
+
}
|
554
|
+
|
555
|
+
static time_t
|
556
|
+
xstrpisotime(const char *s, char **endptr)
|
557
|
+
{
|
558
|
+
/** like strptime() but strictly for ISO 8601 Zulu strings */
|
559
|
+
struct tm tm;
|
560
|
+
time_t res = (time_t)-1;
|
561
|
+
|
562
|
+
/* make sure tm is clean */
|
563
|
+
memset(&tm, 0, sizeof(tm));
|
564
|
+
|
565
|
+
/* as a courtesy to our callers, and since this is a non-standard
|
566
|
+
* routine, we skip leading whitespace */
|
567
|
+
while (*s == ' ' || *s == '\t')
|
568
|
+
++s;
|
569
|
+
|
570
|
+
/* read year */
|
571
|
+
if ((tm.tm_year = strtoi_lim(s, &s, 1583, 4095)) < 0 || *s++ != '-') {
|
572
|
+
goto out;
|
573
|
+
}
|
574
|
+
/* read month */
|
575
|
+
if ((tm.tm_mon = strtoi_lim(s, &s, 1, 12)) < 0 || *s++ != '-') {
|
576
|
+
goto out;
|
577
|
+
}
|
578
|
+
/* read day-of-month */
|
579
|
+
if ((tm.tm_mday = strtoi_lim(s, &s, 1, 31)) < 0 || *s++ != 'T') {
|
580
|
+
goto out;
|
581
|
+
}
|
582
|
+
/* read hour */
|
583
|
+
if ((tm.tm_hour = strtoi_lim(s, &s, 0, 23)) < 0 || *s++ != ':') {
|
584
|
+
goto out;
|
585
|
+
}
|
586
|
+
/* read minute */
|
587
|
+
if ((tm.tm_min = strtoi_lim(s, &s, 0, 59)) < 0 || *s++ != ':') {
|
588
|
+
goto out;
|
589
|
+
}
|
590
|
+
/* read second */
|
591
|
+
if ((tm.tm_sec = strtoi_lim(s, &s, 0, 60)) < 0 || *s++ != 'Z') {
|
592
|
+
goto out;
|
593
|
+
}
|
594
|
+
|
595
|
+
/* massage TM to fulfill some of POSIX' constraints */
|
596
|
+
tm.tm_year -= 1900;
|
597
|
+
tm.tm_mon--;
|
598
|
+
|
599
|
+
/* now convert our custom tm struct to a unix stamp using UTC */
|
600
|
+
res = time_from_tm(&tm);
|
601
|
+
|
602
|
+
out:
|
603
|
+
if (endptr != NULL) {
|
604
|
+
*endptr = deconst(s);
|
605
|
+
}
|
606
|
+
return res;
|
607
|
+
}
|
608
|
+
|
609
|
+
static unsigned int
|
610
|
+
_warc_rdver(const char *buf, size_t bsz)
|
611
|
+
{
|
612
|
+
static const char magic[] = "WARC/";
|
613
|
+
const char *c;
|
614
|
+
unsigned int ver = 0U;
|
615
|
+
unsigned int end = 0U;
|
616
|
+
|
617
|
+
if (bsz < 12 || memcmp(buf, magic, sizeof(magic) - 1U) != 0) {
|
618
|
+
/* buffer too small or invalid magic */
|
619
|
+
return ver;
|
620
|
+
}
|
621
|
+
/* looks good so far, read the version number for a laugh */
|
622
|
+
buf += sizeof(magic) - 1U;
|
623
|
+
|
624
|
+
if (isdigit((unsigned char)buf[0U]) && (buf[1U] == '.') &&
|
625
|
+
isdigit((unsigned char)buf[2U])) {
|
626
|
+
/* we support a maximum of 2 digits in the minor version */
|
627
|
+
if (isdigit((unsigned char)buf[3U]))
|
628
|
+
end = 1U;
|
629
|
+
/* set up major version */
|
630
|
+
ver = (buf[0U] - '0') * 10000U;
|
631
|
+
/* set up minor version */
|
632
|
+
if (end == 1U) {
|
633
|
+
ver += (buf[2U] - '0') * 1000U;
|
634
|
+
ver += (buf[3U] - '0') * 100U;
|
635
|
+
} else
|
636
|
+
ver += (buf[2U] - '0') * 100U;
|
637
|
+
/*
|
638
|
+
* WARC below version 0.12 has a space-separated header
|
639
|
+
* WARC 0.12 and above terminates the version with a CRLF
|
640
|
+
*/
|
641
|
+
c = buf + 3U + end;
|
642
|
+
if (ver >= 1200U) {
|
643
|
+
if (memcmp(c, "\r\n", 2U) != 0)
|
644
|
+
ver = 0U;
|
645
|
+
} else {
|
646
|
+
/* ver < 1200U */
|
647
|
+
if (*c != ' ' && *c != '\t')
|
648
|
+
ver = 0U;
|
649
|
+
}
|
650
|
+
}
|
651
|
+
return ver;
|
652
|
+
}
|
653
|
+
|
654
|
+
static unsigned int
|
655
|
+
_warc_rdtyp(const char *buf, size_t bsz)
|
656
|
+
{
|
657
|
+
static const char _key[] = "\r\nWARC-Type:";
|
658
|
+
const char *val, *eol;
|
659
|
+
|
660
|
+
if ((val = xmemmem(buf, bsz, _key, sizeof(_key) - 1U)) == NULL) {
|
661
|
+
/* no bother */
|
662
|
+
return WT_NONE;
|
663
|
+
}
|
664
|
+
val += sizeof(_key) - 1U;
|
665
|
+
if ((eol = _warc_find_eol(val, buf + bsz - val)) == NULL) {
|
666
|
+
/* no end of line */
|
667
|
+
return WT_NONE;
|
668
|
+
}
|
669
|
+
|
670
|
+
/* overread whitespace */
|
671
|
+
while (val < eol && (*val == ' ' || *val == '\t'))
|
672
|
+
++val;
|
673
|
+
|
674
|
+
if (val + 8U == eol) {
|
675
|
+
if (memcmp(val, "resource", 8U) == 0)
|
676
|
+
return WT_RSRC;
|
677
|
+
else if (memcmp(val, "response", 8U) == 0)
|
678
|
+
return WT_RSP;
|
679
|
+
}
|
680
|
+
return WT_NONE;
|
681
|
+
}
|
682
|
+
|
683
|
+
static warc_string_t
|
684
|
+
_warc_rduri(const char *buf, size_t bsz)
|
685
|
+
{
|
686
|
+
static const char _key[] = "\r\nWARC-Target-URI:";
|
687
|
+
const char *val, *uri, *eol, *p;
|
688
|
+
warc_string_t res = {0U, NULL};
|
689
|
+
|
690
|
+
if ((val = xmemmem(buf, bsz, _key, sizeof(_key) - 1U)) == NULL) {
|
691
|
+
/* no bother */
|
692
|
+
return res;
|
693
|
+
}
|
694
|
+
/* overread whitespace */
|
695
|
+
val += sizeof(_key) - 1U;
|
696
|
+
if ((eol = _warc_find_eol(val, buf + bsz - val)) == NULL) {
|
697
|
+
/* no end of line */
|
698
|
+
return res;
|
699
|
+
}
|
700
|
+
|
701
|
+
while (val < eol && (*val == ' ' || *val == '\t'))
|
702
|
+
++val;
|
703
|
+
|
704
|
+
/* overread URL designators */
|
705
|
+
if ((uri = xmemmem(val, eol - val, "://", 3U)) == NULL) {
|
706
|
+
/* not touching that! */
|
707
|
+
return res;
|
708
|
+
}
|
709
|
+
|
710
|
+
/* spaces inside uri are not allowed, CRLF should follow */
|
711
|
+
for (p = val; p < eol; p++) {
|
712
|
+
if (isspace((unsigned char)*p))
|
713
|
+
return res;
|
714
|
+
}
|
715
|
+
|
716
|
+
/* there must be at least space for ftp */
|
717
|
+
if (uri < (val + 3U))
|
718
|
+
return res;
|
719
|
+
|
720
|
+
/* move uri to point to after :// */
|
721
|
+
uri += 3U;
|
722
|
+
|
723
|
+
/* now then, inspect the URI */
|
724
|
+
if (memcmp(val, "file", 4U) == 0) {
|
725
|
+
/* perfect, nothing left to do here */
|
726
|
+
|
727
|
+
} else if (memcmp(val, "http", 4U) == 0 ||
|
728
|
+
memcmp(val, "ftp", 3U) == 0) {
|
729
|
+
/* overread domain, and the first / */
|
730
|
+
while (uri < eol && *uri++ != '/');
|
731
|
+
} else {
|
732
|
+
/* not sure what to do? best to bugger off */
|
733
|
+
return res;
|
734
|
+
}
|
735
|
+
res.str = uri;
|
736
|
+
res.len = eol - uri;
|
737
|
+
return res;
|
738
|
+
}
|
739
|
+
|
740
|
+
static ssize_t
|
741
|
+
_warc_rdlen(const char *buf, size_t bsz)
|
742
|
+
{
|
743
|
+
static const char _key[] = "\r\nContent-Length:";
|
744
|
+
const char *val, *eol;
|
745
|
+
char *on = NULL;
|
746
|
+
long int len;
|
747
|
+
|
748
|
+
if ((val = xmemmem(buf, bsz, _key, sizeof(_key) - 1U)) == NULL) {
|
749
|
+
/* no bother */
|
750
|
+
return -1;
|
751
|
+
}
|
752
|
+
val += sizeof(_key) - 1U;
|
753
|
+
if ((eol = _warc_find_eol(val, buf + bsz - val)) == NULL) {
|
754
|
+
/* no end of line */
|
755
|
+
return -1;
|
756
|
+
}
|
757
|
+
|
758
|
+
/* skip leading whitespace */
|
759
|
+
while (val < eol && (*val == ' ' || *val == '\t'))
|
760
|
+
val++;
|
761
|
+
/* there must be at least one digit */
|
762
|
+
if (!isdigit((unsigned char)*val))
|
763
|
+
return -1;
|
764
|
+
errno = 0;
|
765
|
+
len = strtol(val, &on, 10);
|
766
|
+
if (errno != 0 || on != eol) {
|
767
|
+
/* line must end here */
|
768
|
+
return -1;
|
769
|
+
}
|
770
|
+
|
771
|
+
return (size_t)len;
|
772
|
+
}
|
773
|
+
|
774
|
+
static time_t
|
775
|
+
_warc_rdrtm(const char *buf, size_t bsz)
|
776
|
+
{
|
777
|
+
static const char _key[] = "\r\nWARC-Date:";
|
778
|
+
const char *val, *eol;
|
779
|
+
char *on = NULL;
|
780
|
+
time_t res;
|
781
|
+
|
782
|
+
if ((val = xmemmem(buf, bsz, _key, sizeof(_key) - 1U)) == NULL) {
|
783
|
+
/* no bother */
|
784
|
+
return (time_t)-1;
|
785
|
+
}
|
786
|
+
val += sizeof(_key) - 1U;
|
787
|
+
if ((eol = _warc_find_eol(val, buf + bsz - val)) == NULL ) {
|
788
|
+
/* no end of line */
|
789
|
+
return -1;
|
790
|
+
}
|
791
|
+
|
792
|
+
/* xstrpisotime() kindly overreads whitespace for us, so use that */
|
793
|
+
res = xstrpisotime(val, &on);
|
794
|
+
if (on != eol) {
|
795
|
+
/* line must end here */
|
796
|
+
return -1;
|
797
|
+
}
|
798
|
+
return res;
|
799
|
+
}
|
800
|
+
|
801
|
+
static time_t
|
802
|
+
_warc_rdmtm(const char *buf, size_t bsz)
|
803
|
+
{
|
804
|
+
static const char _key[] = "\r\nLast-Modified:";
|
805
|
+
const char *val, *eol;
|
806
|
+
char *on = NULL;
|
807
|
+
time_t res;
|
808
|
+
|
809
|
+
if ((val = xmemmem(buf, bsz, _key, sizeof(_key) - 1U)) == NULL) {
|
810
|
+
/* no bother */
|
811
|
+
return (time_t)-1;
|
812
|
+
}
|
813
|
+
val += sizeof(_key) - 1U;
|
814
|
+
if ((eol = _warc_find_eol(val, buf + bsz - val)) == NULL ) {
|
815
|
+
/* no end of line */
|
816
|
+
return -1;
|
817
|
+
}
|
818
|
+
|
819
|
+
/* xstrpisotime() kindly overreads whitespace for us, so use that */
|
820
|
+
res = xstrpisotime(val, &on);
|
821
|
+
if (on != eol) {
|
822
|
+
/* line must end here */
|
823
|
+
return -1;
|
824
|
+
}
|
825
|
+
return res;
|
826
|
+
}
|
827
|
+
|
828
|
+
static const char*
|
829
|
+
_warc_find_eoh(const char *buf, size_t bsz)
|
830
|
+
{
|
831
|
+
static const char _marker[] = "\r\n\r\n";
|
832
|
+
const char *hit = xmemmem(buf, bsz, _marker, sizeof(_marker) - 1U);
|
833
|
+
|
834
|
+
if (hit != NULL) {
|
835
|
+
hit += sizeof(_marker) - 1U;
|
836
|
+
}
|
837
|
+
return hit;
|
838
|
+
}
|
839
|
+
|
840
|
+
static const char*
|
841
|
+
_warc_find_eol(const char *buf, size_t bsz)
|
842
|
+
{
|
843
|
+
static const char _marker[] = "\r\n";
|
844
|
+
const char *hit = xmemmem(buf, bsz, _marker, sizeof(_marker) - 1U);
|
845
|
+
|
846
|
+
return hit;
|
847
|
+
}
|
848
|
+
/* archive_read_support_format_warc.c ends here */
|