chd 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (109) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +30 -0
  3. data/chd.gemspec +29 -0
  4. data/ext/chd.c +1008 -0
  5. data/ext/extconf.rb +60 -0
  6. data/lib/chd/cd.rb +272 -0
  7. data/lib/chd/metadata.rb +196 -0
  8. data/lib/chd/version.rb +4 -0
  9. data/lib/chd.rb +21 -0
  10. data/libchdr/CMakeLists.txt +104 -0
  11. data/libchdr/LICENSE.txt +24 -0
  12. data/libchdr/README.md +7 -0
  13. data/libchdr/deps/lzma-19.00/CMakeLists.txt +33 -0
  14. data/libchdr/deps/lzma-19.00/LICENSE +3 -0
  15. data/libchdr/deps/lzma-19.00/include/7zTypes.h +375 -0
  16. data/libchdr/deps/lzma-19.00/include/Alloc.h +51 -0
  17. data/libchdr/deps/lzma-19.00/include/Bra.h +64 -0
  18. data/libchdr/deps/lzma-19.00/include/Compiler.h +33 -0
  19. data/libchdr/deps/lzma-19.00/include/CpuArch.h +336 -0
  20. data/libchdr/deps/lzma-19.00/include/Delta.h +19 -0
  21. data/libchdr/deps/lzma-19.00/include/LzFind.h +121 -0
  22. data/libchdr/deps/lzma-19.00/include/LzHash.h +57 -0
  23. data/libchdr/deps/lzma-19.00/include/Lzma86.h +111 -0
  24. data/libchdr/deps/lzma-19.00/include/LzmaDec.h +234 -0
  25. data/libchdr/deps/lzma-19.00/include/LzmaEnc.h +76 -0
  26. data/libchdr/deps/lzma-19.00/include/LzmaLib.h +131 -0
  27. data/libchdr/deps/lzma-19.00/include/Precomp.h +10 -0
  28. data/libchdr/deps/lzma-19.00/include/Sort.h +18 -0
  29. data/libchdr/deps/lzma-19.00/lzma-history.txt +446 -0
  30. data/libchdr/deps/lzma-19.00/lzma.txt +328 -0
  31. data/libchdr/deps/lzma-19.00/lzma.vcxproj +543 -0
  32. data/libchdr/deps/lzma-19.00/lzma.vcxproj.filters +17 -0
  33. data/libchdr/deps/lzma-19.00/src/Alloc.c +455 -0
  34. data/libchdr/deps/lzma-19.00/src/Bra86.c +82 -0
  35. data/libchdr/deps/lzma-19.00/src/BraIA64.c +53 -0
  36. data/libchdr/deps/lzma-19.00/src/CpuArch.c +218 -0
  37. data/libchdr/deps/lzma-19.00/src/Delta.c +64 -0
  38. data/libchdr/deps/lzma-19.00/src/LzFind.c +1127 -0
  39. data/libchdr/deps/lzma-19.00/src/Lzma86Dec.c +54 -0
  40. data/libchdr/deps/lzma-19.00/src/LzmaDec.c +1185 -0
  41. data/libchdr/deps/lzma-19.00/src/LzmaEnc.c +1330 -0
  42. data/libchdr/deps/lzma-19.00/src/Sort.c +141 -0
  43. data/libchdr/deps/zlib-1.2.11/CMakeLists.txt +29 -0
  44. data/libchdr/deps/zlib-1.2.11/ChangeLog +1515 -0
  45. data/libchdr/deps/zlib-1.2.11/FAQ +368 -0
  46. data/libchdr/deps/zlib-1.2.11/INDEX +68 -0
  47. data/libchdr/deps/zlib-1.2.11/Makefile +5 -0
  48. data/libchdr/deps/zlib-1.2.11/Makefile.in +410 -0
  49. data/libchdr/deps/zlib-1.2.11/README +115 -0
  50. data/libchdr/deps/zlib-1.2.11/adler32.c +186 -0
  51. data/libchdr/deps/zlib-1.2.11/compress.c +86 -0
  52. data/libchdr/deps/zlib-1.2.11/configure +921 -0
  53. data/libchdr/deps/zlib-1.2.11/crc32.c +442 -0
  54. data/libchdr/deps/zlib-1.2.11/crc32.h +441 -0
  55. data/libchdr/deps/zlib-1.2.11/deflate.c +2163 -0
  56. data/libchdr/deps/zlib-1.2.11/deflate.h +349 -0
  57. data/libchdr/deps/zlib-1.2.11/doc/algorithm.txt +209 -0
  58. data/libchdr/deps/zlib-1.2.11/doc/rfc1950.txt +619 -0
  59. data/libchdr/deps/zlib-1.2.11/doc/rfc1951.txt +955 -0
  60. data/libchdr/deps/zlib-1.2.11/doc/rfc1952.txt +675 -0
  61. data/libchdr/deps/zlib-1.2.11/doc/txtvsbin.txt +107 -0
  62. data/libchdr/deps/zlib-1.2.11/gzclose.c +25 -0
  63. data/libchdr/deps/zlib-1.2.11/gzguts.h +218 -0
  64. data/libchdr/deps/zlib-1.2.11/gzlib.c +637 -0
  65. data/libchdr/deps/zlib-1.2.11/gzread.c +654 -0
  66. data/libchdr/deps/zlib-1.2.11/gzwrite.c +665 -0
  67. data/libchdr/deps/zlib-1.2.11/infback.c +640 -0
  68. data/libchdr/deps/zlib-1.2.11/inffast.c +323 -0
  69. data/libchdr/deps/zlib-1.2.11/inffast.h +11 -0
  70. data/libchdr/deps/zlib-1.2.11/inffixed.h +94 -0
  71. data/libchdr/deps/zlib-1.2.11/inflate.c +1561 -0
  72. data/libchdr/deps/zlib-1.2.11/inflate.h +125 -0
  73. data/libchdr/deps/zlib-1.2.11/inftrees.c +304 -0
  74. data/libchdr/deps/zlib-1.2.11/inftrees.h +62 -0
  75. data/libchdr/deps/zlib-1.2.11/make_vms.com +867 -0
  76. data/libchdr/deps/zlib-1.2.11/treebuild.xml +116 -0
  77. data/libchdr/deps/zlib-1.2.11/trees.c +1203 -0
  78. data/libchdr/deps/zlib-1.2.11/trees.h +128 -0
  79. data/libchdr/deps/zlib-1.2.11/uncompr.c +93 -0
  80. data/libchdr/deps/zlib-1.2.11/zconf.h +534 -0
  81. data/libchdr/deps/zlib-1.2.11/zconf.h.cmakein +536 -0
  82. data/libchdr/deps/zlib-1.2.11/zconf.h.in +534 -0
  83. data/libchdr/deps/zlib-1.2.11/zlib.3 +149 -0
  84. data/libchdr/deps/zlib-1.2.11/zlib.3.pdf +0 -0
  85. data/libchdr/deps/zlib-1.2.11/zlib.h +1912 -0
  86. data/libchdr/deps/zlib-1.2.11/zlib.map +94 -0
  87. data/libchdr/deps/zlib-1.2.11/zlib.pc.cmakein +13 -0
  88. data/libchdr/deps/zlib-1.2.11/zlib.pc.in +13 -0
  89. data/libchdr/deps/zlib-1.2.11/zlib2ansi +152 -0
  90. data/libchdr/deps/zlib-1.2.11/zutil.c +325 -0
  91. data/libchdr/deps/zlib-1.2.11/zutil.h +271 -0
  92. data/libchdr/include/dr_libs/dr_flac.h +12280 -0
  93. data/libchdr/include/libchdr/bitstream.h +43 -0
  94. data/libchdr/include/libchdr/cdrom.h +110 -0
  95. data/libchdr/include/libchdr/chd.h +427 -0
  96. data/libchdr/include/libchdr/chdconfig.h +10 -0
  97. data/libchdr/include/libchdr/coretypes.h +60 -0
  98. data/libchdr/include/libchdr/flac.h +50 -0
  99. data/libchdr/include/libchdr/huffman.h +90 -0
  100. data/libchdr/pkg-config.pc.in +10 -0
  101. data/libchdr/src/libchdr_bitstream.c +125 -0
  102. data/libchdr/src/libchdr_cdrom.c +415 -0
  103. data/libchdr/src/libchdr_chd.c +2744 -0
  104. data/libchdr/src/libchdr_flac.c +302 -0
  105. data/libchdr/src/libchdr_huffman.c +545 -0
  106. data/libchdr/src/link.T +5 -0
  107. data/libchdr/tests/CMakeLists.txt +2 -0
  108. data/libchdr/tests/benchmark.c +52 -0
  109. metadata +183 -0
@@ -0,0 +1,107 @@
1
+ A Fast Method for Identifying Plain Text Files
2
+ ==============================================
3
+
4
+
5
+ Introduction
6
+ ------------
7
+
8
+ Given a file coming from an unknown source, it is sometimes desirable
9
+ to find out whether the format of that file is plain text. Although
10
+ this may appear like a simple task, a fully accurate detection of the
11
+ file type requires heavy-duty semantic analysis on the file contents.
12
+ It is, however, possible to obtain satisfactory results by employing
13
+ various heuristics.
14
+
15
+ Previous versions of PKZip and other zip-compatible compression tools
16
+ were using a crude detection scheme: if more than 80% (4/5) of the bytes
17
+ found in a certain buffer are within the range [7..127], the file is
18
+ labeled as plain text, otherwise it is labeled as binary. A prominent
19
+ limitation of this scheme is the restriction to Latin-based alphabets.
20
+ Other alphabets, like Greek, Cyrillic or Asian, make extensive use of
21
+ the bytes within the range [128..255], and texts using these alphabets
22
+ are most often misidentified by this scheme; in other words, the rate
23
+ of false negatives is sometimes too high, which means that the recall
24
+ is low. Another weakness of this scheme is a reduced precision, due to
25
+ the false positives that may occur when binary files containing large
26
+ amounts of textual characters are misidentified as plain text.
27
+
28
+ In this article we propose a new, simple detection scheme that features
29
+ a much increased precision and a near-100% recall. This scheme is
30
+ designed to work on ASCII, Unicode and other ASCII-derived alphabets,
31
+ and it handles single-byte encodings (ISO-8859, MacRoman, KOI8, etc.)
32
+ and variable-sized encodings (ISO-2022, UTF-8, etc.). Wider encodings
33
+ (UCS-2/UTF-16 and UCS-4/UTF-32) are not handled, however.
34
+
35
+
36
+ The Algorithm
37
+ -------------
38
+
39
+ The algorithm works by dividing the set of bytecodes [0..255] into three
40
+ categories:
41
+ - The white list of textual bytecodes:
42
+ 9 (TAB), 10 (LF), 13 (CR), 32 (SPACE) to 255.
43
+ - The gray list of tolerated bytecodes:
44
+ 7 (BEL), 8 (BS), 11 (VT), 12 (FF), 26 (SUB), 27 (ESC).
45
+ - The black list of undesired, non-textual bytecodes:
46
+ 0 (NUL) to 6, 14 to 31.
47
+
48
+ If a file contains at least one byte that belongs to the white list and
49
+ no byte that belongs to the black list, then the file is categorized as
50
+ plain text; otherwise, it is categorized as binary. (The boundary case,
51
+ when the file is empty, automatically falls into the latter category.)
52
+
53
+
54
+ Rationale
55
+ ---------
56
+
57
+ The idea behind this algorithm relies on two observations.
58
+
59
+ The first observation is that, although the full range of 7-bit codes
60
+ [0..127] is properly specified by the ASCII standard, most control
61
+ characters in the range [0..31] are not used in practice. The only
62
+ widely-used, almost universally-portable control codes are 9 (TAB),
63
+ 10 (LF) and 13 (CR). There are a few more control codes that are
64
+ recognized on a reduced range of platforms and text viewers/editors:
65
+ 7 (BEL), 8 (BS), 11 (VT), 12 (FF), 26 (SUB) and 27 (ESC); but these
66
+ codes are rarely (if ever) used alone, without being accompanied by
67
+ some printable text. Even the newer, portable text formats such as
68
+ XML avoid using control characters outside the list mentioned here.
69
+
70
+ The second observation is that most of the binary files tend to contain
71
+ control characters, especially 0 (NUL). Even though the older text
72
+ detection schemes observe the presence of non-ASCII codes from the range
73
+ [128..255], the precision rarely has to suffer if this upper range is
74
+ labeled as textual, because the files that are genuinely binary tend to
75
+ contain both control characters and codes from the upper range. On the
76
+ other hand, the upper range needs to be labeled as textual, because it
77
+ is used by virtually all ASCII extensions. In particular, this range is
78
+ used for encoding non-Latin scripts.
79
+
80
+ Since there is no counting involved, other than simply observing the
81
+ presence or the absence of some byte values, the algorithm produces
82
+ consistent results, regardless what alphabet encoding is being used.
83
+ (If counting were involved, it could be possible to obtain different
84
+ results on a text encoded, say, using ISO-8859-16 versus UTF-8.)
85
+
86
+ There is an extra category of plain text files that are "polluted" with
87
+ one or more black-listed codes, either by mistake or by peculiar design
88
+ considerations. In such cases, a scheme that tolerates a small fraction
89
+ of black-listed codes would provide an increased recall (i.e. more true
90
+ positives). This, however, incurs a reduced precision overall, since
91
+ false positives are more likely to appear in binary files that contain
92
+ large chunks of textual data. Furthermore, "polluted" plain text should
93
+ be regarded as binary by general-purpose text detection schemes, because
94
+ general-purpose text processing algorithms might not be applicable.
95
+ Under this premise, it is safe to say that our detection method provides
96
+ a near-100% recall.
97
+
98
+ Experiments have been run on many files coming from various platforms
99
+ and applications. We tried plain text files, system logs, source code,
100
+ formatted office documents, compiled object code, etc. The results
101
+ confirm the optimistic assumptions about the capabilities of this
102
+ algorithm.
103
+
104
+
105
+ --
106
+ Cosmin Truta
107
+ Last updated: 2006-May-28
@@ -0,0 +1,25 @@
1
+ /* gzclose.c -- zlib gzclose() function
2
+ * Copyright (C) 2004, 2010 Mark Adler
3
+ * For conditions of distribution and use, see copyright notice in zlib.h
4
+ */
5
+
6
+ #include "gzguts.h"
7
+
8
+ /* gzclose() is in a separate file so that it is linked in only if it is used.
9
+ That way the other gzclose functions can be used instead to avoid linking in
10
+ unneeded compression or decompression routines. */
11
+ int ZEXPORT gzclose(file)
12
+ gzFile file;
13
+ {
14
+ #ifndef NO_GZCOMPRESS
15
+ gz_statep state;
16
+
17
+ if (file == NULL)
18
+ return Z_STREAM_ERROR;
19
+ state = (gz_statep)file;
20
+
21
+ return state->mode == GZ_READ ? gzclose_r(file) : gzclose_w(file);
22
+ #else
23
+ return gzclose_r(file);
24
+ #endif
25
+ }
@@ -0,0 +1,218 @@
1
+ /* gzguts.h -- zlib internal header definitions for gz* operations
2
+ * Copyright (C) 2004, 2005, 2010, 2011, 2012, 2013, 2016 Mark Adler
3
+ * For conditions of distribution and use, see copyright notice in zlib.h
4
+ */
5
+
6
+ #ifdef _LARGEFILE64_SOURCE
7
+ # ifndef _LARGEFILE_SOURCE
8
+ # define _LARGEFILE_SOURCE 1
9
+ # endif
10
+ # ifdef _FILE_OFFSET_BITS
11
+ # undef _FILE_OFFSET_BITS
12
+ # endif
13
+ #endif
14
+
15
+ #ifdef HAVE_HIDDEN
16
+ # define ZLIB_INTERNAL __attribute__((visibility ("hidden")))
17
+ #else
18
+ # define ZLIB_INTERNAL
19
+ #endif
20
+
21
+ #include <stdio.h>
22
+ #include "zlib.h"
23
+ #ifdef STDC
24
+ # include <string.h>
25
+ # include <stdlib.h>
26
+ # include <limits.h>
27
+ #endif
28
+
29
+ #ifndef _POSIX_SOURCE
30
+ # define _POSIX_SOURCE
31
+ #endif
32
+ #include <fcntl.h>
33
+
34
+ #ifdef _WIN32
35
+ # include <stddef.h>
36
+ #endif
37
+
38
+ #if defined(__TURBOC__) || defined(_MSC_VER) || defined(_WIN32)
39
+ # include <io.h>
40
+ #endif
41
+
42
+ #if defined(_WIN32) || defined(__CYGWIN__)
43
+ # define WIDECHAR
44
+ #endif
45
+
46
+ #ifdef WINAPI_FAMILY
47
+ # define open _open
48
+ # define read _read
49
+ # define write _write
50
+ # define close _close
51
+ #endif
52
+
53
+ #ifdef NO_DEFLATE /* for compatibility with old definition */
54
+ # define NO_GZCOMPRESS
55
+ #endif
56
+
57
+ #if defined(STDC99) || (defined(__TURBOC__) && __TURBOC__ >= 0x550)
58
+ # ifndef HAVE_VSNPRINTF
59
+ # define HAVE_VSNPRINTF
60
+ # endif
61
+ #endif
62
+
63
+ #if defined(__CYGWIN__)
64
+ # ifndef HAVE_VSNPRINTF
65
+ # define HAVE_VSNPRINTF
66
+ # endif
67
+ #endif
68
+
69
+ #if defined(MSDOS) && defined(__BORLANDC__) && (BORLANDC > 0x410)
70
+ # ifndef HAVE_VSNPRINTF
71
+ # define HAVE_VSNPRINTF
72
+ # endif
73
+ #endif
74
+
75
+ #ifndef HAVE_VSNPRINTF
76
+ # ifdef MSDOS
77
+ /* vsnprintf may exist on some MS-DOS compilers (DJGPP?),
78
+ but for now we just assume it doesn't. */
79
+ # define NO_vsnprintf
80
+ # endif
81
+ # ifdef __TURBOC__
82
+ # define NO_vsnprintf
83
+ # endif
84
+ # ifdef WIN32
85
+ /* In Win32, vsnprintf is available as the "non-ANSI" _vsnprintf. */
86
+ # if !defined(vsnprintf) && !defined(NO_vsnprintf)
87
+ # if !defined(_MSC_VER) || ( defined(_MSC_VER) && _MSC_VER < 1500 )
88
+ # define vsnprintf _vsnprintf
89
+ # endif
90
+ # endif
91
+ # endif
92
+ # ifdef __SASC
93
+ # define NO_vsnprintf
94
+ # endif
95
+ # ifdef VMS
96
+ # define NO_vsnprintf
97
+ # endif
98
+ # ifdef __OS400__
99
+ # define NO_vsnprintf
100
+ # endif
101
+ # ifdef __MVS__
102
+ # define NO_vsnprintf
103
+ # endif
104
+ #endif
105
+
106
+ /* unlike snprintf (which is required in C99), _snprintf does not guarantee
107
+ null termination of the result -- however this is only used in gzlib.c where
108
+ the result is assured to fit in the space provided */
109
+ #if defined(_MSC_VER) && _MSC_VER < 1900
110
+ # define snprintf _snprintf
111
+ #endif
112
+
113
+ #ifndef local
114
+ # define local static
115
+ #endif
116
+ /* since "static" is used to mean two completely different things in C, we
117
+ define "local" for the non-static meaning of "static", for readability
118
+ (compile with -Dlocal if your debugger can't find static symbols) */
119
+
120
+ /* gz* functions always use library allocation functions */
121
+ #ifndef STDC
122
+ extern voidp malloc OF((uInt size));
123
+ extern void free OF((voidpf ptr));
124
+ #endif
125
+
126
+ /* get errno and strerror definition */
127
+ #if defined UNDER_CE
128
+ # include <windows.h>
129
+ # define zstrerror() gz_strwinerror((DWORD)GetLastError())
130
+ #else
131
+ # ifndef NO_STRERROR
132
+ # include <errno.h>
133
+ # define zstrerror() strerror(errno)
134
+ # else
135
+ # define zstrerror() "stdio error (consult errno)"
136
+ # endif
137
+ #endif
138
+
139
+ /* provide prototypes for these when building zlib without LFS */
140
+ #if !defined(_LARGEFILE64_SOURCE) || _LFS64_LARGEFILE-0 == 0
141
+ ZEXTERN gzFile ZEXPORT gzopen64 OF((const char *, const char *));
142
+ ZEXTERN z_off64_t ZEXPORT gzseek64 OF((gzFile, z_off64_t, int));
143
+ ZEXTERN z_off64_t ZEXPORT gztell64 OF((gzFile));
144
+ ZEXTERN z_off64_t ZEXPORT gzoffset64 OF((gzFile));
145
+ #endif
146
+
147
+ /* default memLevel */
148
+ #if MAX_MEM_LEVEL >= 8
149
+ # define DEF_MEM_LEVEL 8
150
+ #else
151
+ # define DEF_MEM_LEVEL MAX_MEM_LEVEL
152
+ #endif
153
+
154
+ /* default i/o buffer size -- double this for output when reading (this and
155
+ twice this must be able to fit in an unsigned type) */
156
+ #define GZBUFSIZE 8192
157
+
158
+ /* gzip modes, also provide a little integrity check on the passed structure */
159
+ #define GZ_NONE 0
160
+ #define GZ_READ 7247
161
+ #define GZ_WRITE 31153
162
+ #define GZ_APPEND 1 /* mode set to GZ_WRITE after the file is opened */
163
+
164
+ /* values for gz_state how */
165
+ #define LOOK 0 /* look for a gzip header */
166
+ #define COPY 1 /* copy input directly */
167
+ #define GZIP 2 /* decompress a gzip stream */
168
+
169
+ /* internal gzip file state data structure */
170
+ typedef struct {
171
+ /* exposed contents for gzgetc() macro */
172
+ struct gzFile_s x; /* "x" for exposed */
173
+ /* x.have: number of bytes available at x.next */
174
+ /* x.next: next output data to deliver or write */
175
+ /* x.pos: current position in uncompressed data */
176
+ /* used for both reading and writing */
177
+ int mode; /* see gzip modes above */
178
+ int fd; /* file descriptor */
179
+ char *path; /* path or fd for error messages */
180
+ unsigned size; /* buffer size, zero if not allocated yet */
181
+ unsigned want; /* requested buffer size, default is GZBUFSIZE */
182
+ unsigned char *in; /* input buffer (double-sized when writing) */
183
+ unsigned char *out; /* output buffer (double-sized when reading) */
184
+ int direct; /* 0 if processing gzip, 1 if transparent */
185
+ /* just for reading */
186
+ int how; /* 0: get header, 1: copy, 2: decompress */
187
+ z_off64_t start; /* where the gzip data started, for rewinding */
188
+ int eof; /* true if end of input file reached */
189
+ int past; /* true if read requested past end */
190
+ /* just for writing */
191
+ int level; /* compression level */
192
+ int strategy; /* compression strategy */
193
+ /* seek request */
194
+ z_off64_t skip; /* amount to skip (already rewound if backwards) */
195
+ int seek; /* true if seek request pending */
196
+ /* error information */
197
+ int err; /* error code */
198
+ char *msg; /* error message */
199
+ /* zlib inflate or deflate stream */
200
+ z_stream strm; /* stream structure in-place (not a pointer) */
201
+ } gz_state;
202
+ typedef gz_state FAR *gz_statep;
203
+
204
+ /* shared functions */
205
+ void ZLIB_INTERNAL gz_error OF((gz_statep, int, const char *));
206
+ #if defined UNDER_CE
207
+ char ZLIB_INTERNAL *gz_strwinerror OF((DWORD error));
208
+ #endif
209
+
210
+ /* GT_OFF(x), where x is an unsigned value, is true if x > maximum z_off64_t
211
+ value -- needed when comparing unsigned to z_off64_t, which is signed
212
+ (possible z_off64_t types off_t, off64_t, and long are all signed) */
213
+ #ifdef INT_MAX
214
+ # define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > INT_MAX)
215
+ #else
216
+ unsigned ZLIB_INTERNAL gz_intmax OF((void));
217
+ # define GT_OFF(x) (sizeof(int) == sizeof(z_off64_t) && (x) > gz_intmax())
218
+ #endif