chd 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (109) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +30 -0
  3. data/chd.gemspec +29 -0
  4. data/ext/chd.c +1008 -0
  5. data/ext/extconf.rb +60 -0
  6. data/lib/chd/cd.rb +272 -0
  7. data/lib/chd/metadata.rb +196 -0
  8. data/lib/chd/version.rb +4 -0
  9. data/lib/chd.rb +21 -0
  10. data/libchdr/CMakeLists.txt +104 -0
  11. data/libchdr/LICENSE.txt +24 -0
  12. data/libchdr/README.md +7 -0
  13. data/libchdr/deps/lzma-19.00/CMakeLists.txt +33 -0
  14. data/libchdr/deps/lzma-19.00/LICENSE +3 -0
  15. data/libchdr/deps/lzma-19.00/include/7zTypes.h +375 -0
  16. data/libchdr/deps/lzma-19.00/include/Alloc.h +51 -0
  17. data/libchdr/deps/lzma-19.00/include/Bra.h +64 -0
  18. data/libchdr/deps/lzma-19.00/include/Compiler.h +33 -0
  19. data/libchdr/deps/lzma-19.00/include/CpuArch.h +336 -0
  20. data/libchdr/deps/lzma-19.00/include/Delta.h +19 -0
  21. data/libchdr/deps/lzma-19.00/include/LzFind.h +121 -0
  22. data/libchdr/deps/lzma-19.00/include/LzHash.h +57 -0
  23. data/libchdr/deps/lzma-19.00/include/Lzma86.h +111 -0
  24. data/libchdr/deps/lzma-19.00/include/LzmaDec.h +234 -0
  25. data/libchdr/deps/lzma-19.00/include/LzmaEnc.h +76 -0
  26. data/libchdr/deps/lzma-19.00/include/LzmaLib.h +131 -0
  27. data/libchdr/deps/lzma-19.00/include/Precomp.h +10 -0
  28. data/libchdr/deps/lzma-19.00/include/Sort.h +18 -0
  29. data/libchdr/deps/lzma-19.00/lzma-history.txt +446 -0
  30. data/libchdr/deps/lzma-19.00/lzma.txt +328 -0
  31. data/libchdr/deps/lzma-19.00/lzma.vcxproj +543 -0
  32. data/libchdr/deps/lzma-19.00/lzma.vcxproj.filters +17 -0
  33. data/libchdr/deps/lzma-19.00/src/Alloc.c +455 -0
  34. data/libchdr/deps/lzma-19.00/src/Bra86.c +82 -0
  35. data/libchdr/deps/lzma-19.00/src/BraIA64.c +53 -0
  36. data/libchdr/deps/lzma-19.00/src/CpuArch.c +218 -0
  37. data/libchdr/deps/lzma-19.00/src/Delta.c +64 -0
  38. data/libchdr/deps/lzma-19.00/src/LzFind.c +1127 -0
  39. data/libchdr/deps/lzma-19.00/src/Lzma86Dec.c +54 -0
  40. data/libchdr/deps/lzma-19.00/src/LzmaDec.c +1185 -0
  41. data/libchdr/deps/lzma-19.00/src/LzmaEnc.c +1330 -0
  42. data/libchdr/deps/lzma-19.00/src/Sort.c +141 -0
  43. data/libchdr/deps/zlib-1.2.11/CMakeLists.txt +29 -0
  44. data/libchdr/deps/zlib-1.2.11/ChangeLog +1515 -0
  45. data/libchdr/deps/zlib-1.2.11/FAQ +368 -0
  46. data/libchdr/deps/zlib-1.2.11/INDEX +68 -0
  47. data/libchdr/deps/zlib-1.2.11/Makefile +5 -0
  48. data/libchdr/deps/zlib-1.2.11/Makefile.in +410 -0
  49. data/libchdr/deps/zlib-1.2.11/README +115 -0
  50. data/libchdr/deps/zlib-1.2.11/adler32.c +186 -0
  51. data/libchdr/deps/zlib-1.2.11/compress.c +86 -0
  52. data/libchdr/deps/zlib-1.2.11/configure +921 -0
  53. data/libchdr/deps/zlib-1.2.11/crc32.c +442 -0
  54. data/libchdr/deps/zlib-1.2.11/crc32.h +441 -0
  55. data/libchdr/deps/zlib-1.2.11/deflate.c +2163 -0
  56. data/libchdr/deps/zlib-1.2.11/deflate.h +349 -0
  57. data/libchdr/deps/zlib-1.2.11/doc/algorithm.txt +209 -0
  58. data/libchdr/deps/zlib-1.2.11/doc/rfc1950.txt +619 -0
  59. data/libchdr/deps/zlib-1.2.11/doc/rfc1951.txt +955 -0
  60. data/libchdr/deps/zlib-1.2.11/doc/rfc1952.txt +675 -0
  61. data/libchdr/deps/zlib-1.2.11/doc/txtvsbin.txt +107 -0
  62. data/libchdr/deps/zlib-1.2.11/gzclose.c +25 -0
  63. data/libchdr/deps/zlib-1.2.11/gzguts.h +218 -0
  64. data/libchdr/deps/zlib-1.2.11/gzlib.c +637 -0
  65. data/libchdr/deps/zlib-1.2.11/gzread.c +654 -0
  66. data/libchdr/deps/zlib-1.2.11/gzwrite.c +665 -0
  67. data/libchdr/deps/zlib-1.2.11/infback.c +640 -0
  68. data/libchdr/deps/zlib-1.2.11/inffast.c +323 -0
  69. data/libchdr/deps/zlib-1.2.11/inffast.h +11 -0
  70. data/libchdr/deps/zlib-1.2.11/inffixed.h +94 -0
  71. data/libchdr/deps/zlib-1.2.11/inflate.c +1561 -0
  72. data/libchdr/deps/zlib-1.2.11/inflate.h +125 -0
  73. data/libchdr/deps/zlib-1.2.11/inftrees.c +304 -0
  74. data/libchdr/deps/zlib-1.2.11/inftrees.h +62 -0
  75. data/libchdr/deps/zlib-1.2.11/make_vms.com +867 -0
  76. data/libchdr/deps/zlib-1.2.11/treebuild.xml +116 -0
  77. data/libchdr/deps/zlib-1.2.11/trees.c +1203 -0
  78. data/libchdr/deps/zlib-1.2.11/trees.h +128 -0
  79. data/libchdr/deps/zlib-1.2.11/uncompr.c +93 -0
  80. data/libchdr/deps/zlib-1.2.11/zconf.h +534 -0
  81. data/libchdr/deps/zlib-1.2.11/zconf.h.cmakein +536 -0
  82. data/libchdr/deps/zlib-1.2.11/zconf.h.in +534 -0
  83. data/libchdr/deps/zlib-1.2.11/zlib.3 +149 -0
  84. data/libchdr/deps/zlib-1.2.11/zlib.3.pdf +0 -0
  85. data/libchdr/deps/zlib-1.2.11/zlib.h +1912 -0
  86. data/libchdr/deps/zlib-1.2.11/zlib.map +94 -0
  87. data/libchdr/deps/zlib-1.2.11/zlib.pc.cmakein +13 -0
  88. data/libchdr/deps/zlib-1.2.11/zlib.pc.in +13 -0
  89. data/libchdr/deps/zlib-1.2.11/zlib2ansi +152 -0
  90. data/libchdr/deps/zlib-1.2.11/zutil.c +325 -0
  91. data/libchdr/deps/zlib-1.2.11/zutil.h +271 -0
  92. data/libchdr/include/dr_libs/dr_flac.h +12280 -0
  93. data/libchdr/include/libchdr/bitstream.h +43 -0
  94. data/libchdr/include/libchdr/cdrom.h +110 -0
  95. data/libchdr/include/libchdr/chd.h +427 -0
  96. data/libchdr/include/libchdr/chdconfig.h +10 -0
  97. data/libchdr/include/libchdr/coretypes.h +60 -0
  98. data/libchdr/include/libchdr/flac.h +50 -0
  99. data/libchdr/include/libchdr/huffman.h +90 -0
  100. data/libchdr/pkg-config.pc.in +10 -0
  101. data/libchdr/src/libchdr_bitstream.c +125 -0
  102. data/libchdr/src/libchdr_cdrom.c +415 -0
  103. data/libchdr/src/libchdr_chd.c +2744 -0
  104. data/libchdr/src/libchdr_flac.c +302 -0
  105. data/libchdr/src/libchdr_huffman.c +545 -0
  106. data/libchdr/src/link.T +5 -0
  107. data/libchdr/tests/CMakeLists.txt +2 -0
  108. data/libchdr/tests/benchmark.c +52 -0
  109. metadata +183 -0
@@ -0,0 +1,107 @@
1
+ A Fast Method for Identifying Plain Text Files
2
+ ==============================================
3
+
4
+
5
+ Introduction
6
+ ------------
7
+
8
+ Given a file coming from an unknown source, it is sometimes desirable
9
+ to find out whether the format of that file is plain text. Although
10
+ this may appear like a simple task, a fully accurate detection of the
11
+ file type requires heavy-duty semantic analysis on the file contents.
12
+ It is, however, possible to obtain satisfactory results by employing
13
+ various heuristics.
14
+
15
+ Previous versions of PKZip and other zip-compatible compression tools
16
+ were using a crude detection scheme: if more than 80% (4/5) of the bytes
17
+ found in a certain buffer are within the range [7..127], the file is
18
+ labeled as plain text, otherwise it is labeled as binary. A prominent
19
+ limitation of this scheme is the restriction to Latin-based alphabets.
20
+ Other alphabets, like Greek, Cyrillic or Asian, make extensive use of
21
+ the bytes within the range [128..255], and texts using these alphabets
22
+ are most often misidentified by this scheme; in other words, the rate
23
+ of false negatives is sometimes too high, which means that the recall
24
+ is low. Another weakness of this scheme is a reduced precision, due to
25
+ the false positives that may occur when binary files containing large
26
+ amounts of textual characters are misidentified as plain text.
27
+
28
+ In this article we propose a new, simple detection scheme that features
29
+ a much increased precision and a near-100% recall. This scheme is
30
+ designed to work on ASCII, Unicode and other ASCII-derived alphabets,
31
+ and it handles single-byte encodings (ISO-8859, MacRoman, KOI8, etc.)
32
+ and variable-sized encodings (ISO-2022, UTF-8, etc.). Wider encodings
33
+ (UCS-2/UTF-16 and UCS-4/UTF-32) are not handled, however.
34
+
35
+
36
+ The Algorithm
37
+ -------------
38
+
39
+ The algorithm works by dividing the set of bytecodes [0..255] into three
40
+ categories:
41
+ - The white list of textual bytecodes:
42
+ 9 (TAB), 10 (LF), 13 (CR), 32 (SPACE) to 255.
43
+ - The gray list of tolerated bytecodes:
44
+ 7 (BEL), 8 (BS), 11 (VT), 12 (FF), 26 (SUB), 27 (ESC).
45
+ - The black list of undesired, non-textual bytecodes:
46
+ 0 (NUL) to 6, 14 to 31.
47
+
48
+ If a file contains at least one byte that belongs to the white list and
49
+ no byte that belongs to the black list, then the file is categorized as
50
+ plain text; otherwise, it is categorized as binary. (The boundary case,
51
+ when the file is empty, automatically falls into the latter category.)
52
+
53
+
54
+ Rationale
55
+ ---------
56
+
57
+ The idea behind this algorithm relies on two observations.
58
+
59
+ The first observation is that, although the full range of 7-bit codes
60
+ [0..127] is properly specified by the ASCII standard, most control
61
+ characters in the range [0..31] are not used in practice. The only
62
+ widely-used, almost universally-portable control codes are 9 (TAB),
63
+ 10 (LF) and 13 (CR). There are a few more control codes that are
64
+ recognized on a reduced range of platforms and text viewers/editors:
65
+ 7 (BEL), 8 (BS), 11 (VT), 12 (FF), 26 (SUB) and 27 (ESC); but these
66
+ codes are rarely (if ever) used alone, without being accompanied by
67
+ some printable text. Even the newer, portable text formats such as
68
+ XML avoid using control characters outside the list mentioned here.
69
+
70
+ The second observation is that most of the binary files tend to contain
71
+ control characters, especially 0 (NUL). Even though the older text
72
+ detection schemes observe the presence of non-ASCII codes from the range
73
+ [128..255], the precision rarely has to suffer if this upper range is
74
+ labeled as textual, because the files that are genuinely binary tend to
75
+ contain both control characters and codes from the upper range. On the
76
+ other hand, the upper range needs to be labeled as textual, because it
77
+ is used by virtually all ASCII extensions. In particular, this range is
78
+ used for encoding non-Latin scripts.
79
+
80
+ Since there is no counting involved, other than simply observing the
81
+ presence or the absence of some byte values, the algorithm produces
82
+ consistent results, regardless what alphabet encoding is being used.
83
+ (If counting were involved, it could be possible to obtain different
84
+ results on a text encoded, say, using ISO-8859-16 versus UTF-8.)
85
+
86
+ There is an extra category of plain text files that are "polluted" with
87
+ one or more black-listed codes, either by mistake or by peculiar design
88
+ considerations. In such cases, a scheme that tolerates a small fraction
89
+ of black-listed codes would provide an increased recall (i.e. more true
90
+ positives). This, however, incurs a reduced precision overall, since
91
+ false positives are more likely to appear in binary files that contain
92
+ large chunks of textual data. Furthermore, "polluted" plain text should
93
+ be regarded as binary by general-purpose text detection schemes, because
94
+ general-purpose text processing algorithms might not be applicable.
95
+ Under this premise, it is safe to say that our detection method provides
96
+ a near-100% recall.
97
+
98
+ Experiments have been run on many files coming from various platforms
99
+ and applications. We tried plain text files, system logs, source code,
100
+ formatted office documents, compiled object code, etc. The results
101
+ confirm the optimistic assumptions about the capabilities of this
102
+ algorithm.
103
+
104
+
105
+ --
106
+ Cosmin Truta
107
+ Last updated: 2006-May-28
@@ -0,0 +1,25 @@
1
+ /* gzclose.c -- zlib gzclose() function
2
+ * Copyright (C) 2004, 2010 Mark Adler
3
+ * For conditions of distribution and use, see copyright notice in zlib.h
4
+ */
5
+
6
+ #include "gzguts.h"
7
+
8
+ /* gzclose() is in a separate file so that it is linked in only if it is used.
9
+ That way the other gzclose functions can be used instead to avoid linking in
10
+ unneeded compression or decompression routines. */
11
+ int ZEXPORT gzclose(file)
12
+ gzFile file;
13
+ {
14
+ #ifndef NO_GZCOMPRESS
15
+ gz_statep state;
16
+
17
+ if (file == NULL)
18
+ return Z_STREAM_ERROR;
19
+ state = (gz_statep)file;
20
+
21
+ return state->mode == GZ_READ ? gzclose_r(file) : gzclose_w(file);
22
+ #else
23
+ return gzclose_r(file);
24
+ #endif
25
+ }
@@ -0,0 +1,218 @@
1
+ /* gzguts.h -- zlib internal header definitions for gz* operations
2
+ * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler
3
+ * For conditions of distribution and use, see copyright notice in zlib.h
4
+ */
5
+
6
+ #ifdef _LARGEFILE64_SOURCE
7
+ # ifndef _LARGEFILE_SOURCE
8
+ # define _LARGEFILE_SOURCE 1
9
+ # endif
10
+ # ifdef _FILE_OFFSET_BITS
11
+ # undef _FILE_OFFSET_BITS
12
+ # endif
13
+ #endif
14
+
15
+ #ifdef HAVE_HIDDEN
16
+ # define ZLIB_INTERNAL __attribute__((visibility ("hidden")))
17
+ #else
18
+ # define ZLIB_INTERNAL
19
+ #endif
20
+
21
+ #include <stdio.h>
22
+ #include "zlib.h"
23
+ #ifdef STDC
24
+ # include <string.h>
25
+ # include <stdlib.h>
26
+ # include <limits.h>
27
+ #endif
28
+
29
+ #ifndef _POSIX_SOURCE
30
+ # define _POSIX_SOURCE
31
+ #endif
32
+ #include <fcntl.h>
33
+
34
+ #ifdef _WIN32
35
+ # include <stddef.h>
36
+ #endif
37
+
38
+ #if defined(__TURBOC__) || defined(_MSC_VER) || defined(_WIN32)
39
+ # include <io.h>
40
+ #endif
41
+
42
+ #if defined(_WIN32) || defined(__CYGWIN__)
43
+ # define WIDECHAR
44
+ #endif
45
+
46
+ #ifdef WINAPI_FAMILY
47
+ # define open _open
48
+ # define read _read
49
+ # define write _write
50
+ # define close _close
51
+ #endif
52
+
53
+ #ifdef NO_DEFLATE /* for compatibility with old definition */
54
+ # define NO_GZCOMPRESS
55
+ #endif
56
+
57
+ #if defined(STDC99) || (defined(__TURBOC__) && __TURBOC__ >= 0x550)
58
+ # ifndef HAVE_VSNPRINTF
59
+ # define HAVE_VSNPRINTF
60
+ # endif
61
+ #endif
62
+
63
+ #if defined(__CYGWIN__)
64
+ # ifndef HAVE_VSNPRINTF
65
+ # define HAVE_VSNPRINTF
66
+ # endif
67
+ #endif
68
+
69
+ #if defined(MSDOS) && defined(__BORLANDC__) && (BORLANDC > 0x410)
70
+ # ifndef HAVE_VSNPRINTF
71
+ # define HAVE_VSNPRINTF
72
+ # endif
73
+ #endif
74
+
75
+ #ifndef HAVE_VSNPRINTF
76
+ # ifdef MSDOS
77
+ /* vsnprintf may exist on some MS-DOS compilers (DJGPP?),
78
+ but for now we just assume it doesn't. */
79
+ # define NO_vsnprintf
80
+ # endif
81
+ # ifdef __TURBOC__
82
+ # define NO_vsnprintf
83
+ # endif
84
+ # ifdef WIN32
85
+ /* In Win32, vsnprintf is available as the "non-ANSI" _vsnprintf. */
86
+ # if !defined(vsnprintf) && !defined(NO_vsnprintf)
87
+ # if !defined(_MSC_VER) || ( defined(_MSC_VER) && _MSC_VER < 1500 )
88
+ # define vsnprintf _vsnprintf
89
+ # endif
90
+ # endif
91
+ # endif
92
+ # ifdef __SASC
93
+ # define NO_vsnprintf
94
+ # endif
95
+ # ifdef VMS
96
+ # define NO_vsnprintf
97
+ # endif
98
+ # ifdef __OS400__
99
+ # define NO_vsnprintf
100
+ # endif
101
+ # ifdef __MVS__
102
+ # define NO_vsnprintf
103
+ # endif
104
+ #endif
105
+
106
+ /* unlike snprintf (which is required in C99), _snprintf does not guarantee
107
+ null termination of the result -- however this is only used in gzlib.c where
108
+ the result is assured to fit in the space provided */
109
+ #if defined(_MSC_VER) && _MSC_VER < 1900
110
+ # define snprintf _snprintf
111
+ #endif
112
+
113
+ #ifndef local
114
+ # define local static
115
+ #endif
116
+ /* since "static" is used to mean two completely different things in C, we
117
+ define "local" for the non-static meaning of "static", for readability
118
+ (compile with -Dlocal if your debugger can't find static symbols) */
119
+
120
+ /* gz* functions always use library allocation functions */
121
+ #ifndef STDC
122
+ extern voidp malloc OF((uInt size));
123
+ extern void free OF((voidpf ptr));
124
+ #endif
125
+
126
+ /* get errno and strerror definition */
127
+ #if defined UNDER_CE
128
+ # include <windows.h>
129
+ # define zstrerror() gz_strwinerror((DWORD)GetLastError())
130
+ #else
131
+ # ifndef NO_STRERROR
132
+ # include <errno.h>
133
+ # define zstrerror() strerror(errno)
134
+ # else
135
+ # define zstrerror() "stdio error (consult errno)"
136
+ # endif
137
+ #endif
138
+
139
+ /* provide prototypes for these when building zlib without LFS */
140
+ #if !defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0
141
+ ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *));
142
+ ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int));
143
+ ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile));
144
+ ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile));
145
+ #endif
146
+
147
+ /* default memLevel */
148
+ #if MAX_MEM_LEVEL >= 8
149
+ # define DEF_MEM_LEVEL 8
150
+ #else
151
+ # define DEF_MEM_LEVEL MAX_MEM_LEVEL
152
+ #endif
153
+
154
+ /* default i/o buffer size -- double this for output when reading (this and
155
+ twice this must be able to fit in an unsigned type) */
156
+ #define GZBUFSIZE 8192
157
+
158
+ /* gzip modes, also provide a little integrity check on the passed structure */
159
+ #define GZ_NONE 0
160
+ #define GZ_READ 7247
161
+ #define GZ_WRITE 31153
162
+ #define GZ_APPEND 1 /* mode set to GZ_WRITE after the file is opened */
163
+
164
+ /* values for gz_state how */
165
+ #define LOOK 0 /* look for a gzip header */
166
+ #define COPY 1 /* copy input directly */
167
+ #define GZIP 2 /* decompress a gzip stream */
168
+
169
+ /* internal gzip file state data structure */
170
+ typedef struct {
171
+ /* exposed contents for gzgetc() macro */
172
+ struct gzFile_s x; /* "x" for exposed */
173
+ /* x.have: number of bytes available at x.next */
174
+ /* x.next: next output data to deliver or write */
175
+ /* x.pos: current position in uncompressed data */
176
+ /* used for both reading and writing */
177
+ int mode; /* see gzip modes above */
178
+ int fd; /* file descriptor */
179
+ char *path; /* path or fd for error messages */
180
+ unsigned size; /* buffer size, zero if not allocated yet */
181
+ unsigned want; /* requested buffer size, default is GZBUFSIZE */
182
+ unsigned char *in; /* input buffer (double-sized when writing) */
183
+ unsigned char *out; /* output buffer (double-sized when reading) */
184
+ int direct; /* 0 if processing gzip, 1 if transparent */
185
+ /* just for reading */
186
+ int how; /* 0: get header, 1: copy, 2: decompress */
187
+ z_off64_t start; /* where the gzip data started, for rewinding */
188
+ int eof; /* true if end of input file reached */
189
+ int past; /* true if read requested past end */
190
+ /* just for writing */
191
+ int level; /* compression level */
192
+ int strategy; /* compression strategy */
193
+ /* seek request */
194
+ z_off64_t skip; /* amount to skip (already rewound if backwards) */
195
+ int seek; /* true if seek request pending */
196
+ /* error information */
197
+ int err; /* error code */
198
+ char *msg; /* error message */
199
+ /* zlib inflate or deflate stream */
200
+ z_stream strm; /* stream structure in-place (not a pointer) */
201
+ } gz_state;
202
+ typedef gz_state FAR *gz_statep;
203
+
204
+ /* shared functions */
205
+ void ZLIB_INTERNAL gz_error OF((gz_statep, int, const char *));
206
+ #if defined UNDER_CE
207
+ char ZLIB_INTERNAL *gz_strwinerror OF((DWORD error));
208
+ #endif
209
+
210
+ /* GT_OFF(x), where x is an unsigned value, is true if x > maximum z_off64_t
211
+ value -- needed when comparing unsigned to z_off64_t, which is signed
212
+ (possible z_off64_t types off_t, off64_t, and long are all signed) */
213
+ #ifdef INT_MAX
214
+ # define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > INT_MAX)
215
+ #else
216
+ unsigned ZLIB_INTERNAL gz_intmax OF((void));
217
+ # define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > gz_intmax())
218
+ #endif