libmagic 0.5.11
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/History.txt +24 -0
- data/Manifest.txt +18 -0
- data/README.txt +57 -0
- data/Rakefile +9 -0
- data/lib/custom-magic +1 -0
- data/lib/custom-magic.mime +1 -0
- data/lib/libmagic.rb +177 -0
- data/libmagic.gemspec +44 -0
- data/test/files/file_with_text_that_looked_like_ppm_image.csv +9 -0
- data/test/files/huge_file_with_one_special_character.csv +23001 -0
- data/test/files/huge_file_with_one_special_character_at_the_end.csv +23001 -0
- data/test/files/iso-8859-1.txt +2 -0
- data/test/files/macintosh.txt +1 -0
- data/test/files/part_of_ki_file.csv +1001 -0
- data/test/files/us-ascii.txt +3 -0
- data/test/files/utf-8.txt +3 -0
- data/test/files/windows-1252.txt +2 -0
- data/test/test_magic.rb +213 -0
- metadata +103 -0
data/test/test_magic.rb
ADDED
@@ -0,0 +1,213 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require "rubygems"
|
3
|
+
require "lib/libmagic"
|
4
|
+
require "test/unit"
|
5
|
+
require "stringio"
|
6
|
+
|
7
|
+
class MagicTest < Test::Unit::TestCase
|
8
|
+
def test_public_interface_is_limited
|
9
|
+
assert_equal(%w(file_charset file_charset! file_mime_type io_charset string_charset string_mime_type).map { |m| m.to_sym },
|
10
|
+
(Magic.public_methods - Magic.instance_methods - FFI::Library.methods - FFI::Library.instance_methods).sort.map { |m| m.to_sym })
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_file_mime_type_for_utf8_file
|
14
|
+
# regex necessary because some versions of file return the semicolon and some don't
|
15
|
+
assert(Magic.file_mime_type(absolute_path("utf-8.txt")) =~ /text\/plain;? charset=utf-8/)
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_string_mime_type_for_utf8_text
|
19
|
+
# regex necessary because some versions of file return the semicolon and some don't
|
20
|
+
assert(Magic.string_mime_type("Some truly Unicode characters like: 불거기") =~ /text\/plain;? charset=utf-8/)
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_string_charset_for_ascii_text
|
24
|
+
assert_equal("us-ascii", Magic.string_charset("Just ASCII"))
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_string_charset_for_utf8_text
|
28
|
+
assert_equal("utf-8", Magic.string_charset("Some truly Unicode characters like: 불거기"))
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_string_charset_for_iso_8859_1_text
|
32
|
+
assert_equal("iso-8859-1", Magic.string_charset("A\240B\240C"))
|
33
|
+
end
|
34
|
+
|
35
|
+
(128..159).each do |windows_char|
|
36
|
+
eval <<-EOMETHOD
|
37
|
+
def test_string_charset_for_string_with_windows_char_#{windows_char}_returns_unknown
|
38
|
+
assert_equal("unknown", Magic.string_charset("over, and over, and over\\#{windows_char.to_s(8)}"))
|
39
|
+
end
|
40
|
+
EOMETHOD
|
41
|
+
end
|
42
|
+
|
43
|
+
# although this is redundant, it's nice to document the one weird case we've found explicitly
|
44
|
+
def test_string_charset_for_string_with_windows_ellipsis_returns_unknown
|
45
|
+
# windows 1252 ellipsis is 133 = 0205
|
46
|
+
# this is a weird case found from a file Sylvia had problem slurping
|
47
|
+
assert_equal("unknown", Magic.string_charset("over, and over, and over\205"))
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_string_charset_for_string_with_utf8_Angstrom_returns_utf8_not_unknown
|
51
|
+
string = File.open(absolute_path("utf-8.csv.gz")) do |io|
|
52
|
+
uncompressed_io = Zlib::GzipReader.new(io)
|
53
|
+
uncompressed_io.read
|
54
|
+
end
|
55
|
+
assert_equal("utf-8", Magic.string_charset(string))
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_io_charset_for_ascii_file
|
59
|
+
assert_equal("us-ascii", Magic.io_charset(File.open((absolute_path("us-ascii.txt")))))
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_io_charset_for_utf8_file
|
63
|
+
assert_equal("utf-8", Magic.io_charset(File.open(absolute_path("utf-8.txt"))))
|
64
|
+
end
|
65
|
+
|
66
|
+
require "zlib"
|
67
|
+
def test_io_charset_for_gzipped_utf8_file
|
68
|
+
File.open(absolute_path("utf-8.csv.gz")) do |io|
|
69
|
+
uncompressed_io = Zlib::GzipReader.new(io)
|
70
|
+
assert_equal("utf-8", Magic.io_charset(uncompressed_io))
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def test_io_charset_for_iso_8859_1_file
|
75
|
+
assert_equal("iso-8859-1", Magic.io_charset(File.open(absolute_path("iso-8859-1.txt"))))
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_io_charset_for_windows_1252_file
|
79
|
+
# unfortunately, unknown means some kind of extended ascii
|
80
|
+
assert_equal("unknown", Magic.io_charset(File.open(absolute_path("windows-1252.txt"))))
|
81
|
+
end
|
82
|
+
|
83
|
+
def test_io_charset_for_macintosh_file
|
84
|
+
# unfortunately, unknown means some kind of extended ascii
|
85
|
+
assert_equal("unknown", Magic.io_charset(File.open(absolute_path("macintosh.txt"))))
|
86
|
+
end
|
87
|
+
|
88
|
+
def test_io_charset_for_csv_file_that_looked_like_ppm_image
|
89
|
+
assert_equal("us-ascii", Magic.io_charset(File.open(absolute_path("file_with_text_that_looked_like_ppm_image.csv"))))
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_file_charset_for_ascii_file
|
93
|
+
assert_equal("us-ascii", Magic.file_charset(absolute_path("us-ascii.txt")))
|
94
|
+
end
|
95
|
+
|
96
|
+
def test_file_charset_for_large_CSV_file_that_libmagic_thinks_is_pascal_sourcecode
|
97
|
+
assert_equal("us-ascii", Magic.file_charset(absolute_path("part_of_ki_file.csv")))
|
98
|
+
end
|
99
|
+
|
100
|
+
def test_file_charset_for_utf8_file
|
101
|
+
assert_equal("utf-8", Magic.file_charset(absolute_path("utf-8.txt")))
|
102
|
+
end
|
103
|
+
|
104
|
+
def test_file_charset_for_iso_8859_1_file
|
105
|
+
assert_equal("iso-8859-1", Magic.file_charset(absolute_path("iso-8859-1.txt")))
|
106
|
+
end
|
107
|
+
|
108
|
+
def test_file_charset_for_windows_1252_file
|
109
|
+
# unfortunately, unknown means some kind of extended ascii
|
110
|
+
assert_equal("unknown", Magic.file_charset(absolute_path("windows-1252.txt")))
|
111
|
+
end
|
112
|
+
|
113
|
+
def test_file_charset_for_macintosh_file
|
114
|
+
# unfortunately, unknown means some kind of extended ascii
|
115
|
+
assert_equal("unknown", Magic.file_charset(absolute_path("macintosh.txt")))
|
116
|
+
end
|
117
|
+
|
118
|
+
def test_file_charset_for_csv_file_that_looked_like_ppm_image
|
119
|
+
assert_equal("us-ascii", Magic.file_charset(absolute_path("file_with_text_that_looked_like_ppm_image.csv")))
|
120
|
+
end
|
121
|
+
|
122
|
+
def test_file_charset_raises_if_file_does_not_exist
|
123
|
+
# for this, we don't use assert_raise
|
124
|
+
begin
|
125
|
+
Magic.file_charset("some file that does not exist.txt")
|
126
|
+
fail "Did not raise"
|
127
|
+
rescue Exception => expected
|
128
|
+
# ruby 1.9 and 1.8 return different exceptions
|
129
|
+
assert(expected.message =~ /(some file that does not exist.txt|NULL pointer)/i)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
def test_file_charset_bang_exhaustively_checks_file_contents
|
134
|
+
t1 = Time.now
|
135
|
+
assert_equal("iso-8859-1", Magic.file_charset!(absolute_path("huge_file_with_one_special_character.csv")))
|
136
|
+
puts "took #{Time.now - t1} seconds"
|
137
|
+
end
|
138
|
+
|
139
|
+
def test_file_charset_bang_returns_correct_value_for_us_ascii_file
|
140
|
+
assert_equal("us-ascii", Magic.file_charset!(absolute_path("us-ascii.txt")))
|
141
|
+
end
|
142
|
+
|
143
|
+
def test_file_charset_bang_returns_correct_value_for_windows_1252_file
|
144
|
+
assert_equal("unknown", Magic.file_charset!(absolute_path("windows-1252.txt")))
|
145
|
+
end
|
146
|
+
|
147
|
+
def test_file_charset_bang_returns_correct_value_for_UTF8_file
|
148
|
+
assert_equal("utf-8", Magic.file_charset!(absolute_path("utf-8.txt")))
|
149
|
+
end
|
150
|
+
|
151
|
+
def test_file_charset_bang_handles_special_character_at_the_end_of_the_file
|
152
|
+
assert_equal("us-ascii", Magic.file_charset(absolute_path("huge_file_with_one_special_character_at_the_end.csv")))
|
153
|
+
assert_equal("iso-8859-1", Magic.file_charset!(absolute_path("huge_file_with_one_special_character_at_the_end.csv")))
|
154
|
+
end
|
155
|
+
|
156
|
+
def test_collect_special_characters_is_empty_when_there_are_no_special_characters
|
157
|
+
assert_special_chars_equal("", "")
|
158
|
+
assert_special_chars_equal("", "hello")
|
159
|
+
assert_special_chars_equal("", "12345678901234567890")
|
160
|
+
end
|
161
|
+
|
162
|
+
def test_collect_special_characters_returns_characters_with_context
|
163
|
+
assert_special_chars_equal("µ", "µ")
|
164
|
+
assert_special_chars_equal("321µ123", "321µ123")
|
165
|
+
assert_special_chars_equal("µ123", "µ123")
|
166
|
+
assert_special_chars_equal("321µ", "321µ")
|
167
|
+
assert_special_chars_equal("0987654321\xC21234567890", "0987654321\xC21234567890")
|
168
|
+
assert_special_chars_equal("0987654321µ1234567890", "XXX0987654321µ1234567890XXX")
|
169
|
+
assert_special_chars_equal("µ1234567890", "µ1234567890XXX")
|
170
|
+
assert_special_chars_equal("0987654321µ", "XXX0987654321µ")
|
171
|
+
end
|
172
|
+
|
173
|
+
def test_collect_special_characters_does_not_duplicate_context
|
174
|
+
assert_special_chars_equal("0987654321µaaaaaµ1234567890", "XXX0987654321µaaaaaµ1234567890XXX")
|
175
|
+
end
|
176
|
+
|
177
|
+
def test_collect_special_characters_works_with_multiple_characters
|
178
|
+
assert_special_chars_equal(
|
179
|
+
"0987654321µaaaaaµ12345678900987654321µ1234567890",
|
180
|
+
"XXX0987654321µaaaaaµ1234567890XXXXXX0987654321µ1234567890XXX"
|
181
|
+
)
|
182
|
+
end
|
183
|
+
|
184
|
+
def test_collect_special_characters_works_when_reading_multiple_chunks_to_the_buffer
|
185
|
+
default_chunk_size = 2 ** 15
|
186
|
+
assert_equal(default_chunk_size, Magic::CHUNK_SIZE)
|
187
|
+
begin
|
188
|
+
Magic.send(:remove_const, "CHUNK_SIZE")
|
189
|
+
Magic.send(:const_set, "CHUNK_SIZE", 2)
|
190
|
+
assert_special_chars_equal("µ", "µ")
|
191
|
+
assert_special_chars_equal("µ123", "µ123")
|
192
|
+
assert_special_chars_equal("321µ", "321µ")
|
193
|
+
assert_special_chars_equal("321µ123", "321µ123")
|
194
|
+
assert_special_chars_equal("0987654321µ", "XXX0987654321µ")
|
195
|
+
assert_special_chars_equal(
|
196
|
+
"0987654321µaaaaaµ12345678900987654321µ1234567890",
|
197
|
+
"XXX0987654321µaaaaaµ1234567890XXXXXX0987654321µ1234567890XXX"
|
198
|
+
)
|
199
|
+
ensure
|
200
|
+
Magic.send(:remove_const, "CHUNK_SIZE") if Magic.const_defined?("CHUNK_SIZE")
|
201
|
+
Magic.const_set("CHUNK_SIZE", default_chunk_size)
|
202
|
+
end
|
203
|
+
assert_equal(default_chunk_size, Magic::CHUNK_SIZE)
|
204
|
+
end
|
205
|
+
|
206
|
+
def assert_special_chars_equal(expected_output, input)
|
207
|
+
assert_equal(expected_output.force_encoding(Encoding::BINARY), Magic.send(:collect_special_characters, StringIO.new(input)))
|
208
|
+
end
|
209
|
+
|
210
|
+
def absolute_path(test_file_name)
|
211
|
+
"#{ENV["PWD"]}/test/files/#{test_file_name}"
|
212
|
+
end
|
213
|
+
end
|
metadata
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: libmagic
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.5.11
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Moses Hohman
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2021-06-11 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rdoc
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '4.0'
|
20
|
+
- - "<"
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '7'
|
23
|
+
type: :development
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '4.0'
|
30
|
+
- - "<"
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '7'
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: hoe
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '3.23'
|
40
|
+
type: :development
|
41
|
+
prerelease: false
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - "~>"
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '3.23'
|
47
|
+
description: Ruby wrapper for the Unix file/libmagic utility, which can guess mime
|
48
|
+
types and character sets.
|
49
|
+
email:
|
50
|
+
- moses@moseshohman.com
|
51
|
+
executables: []
|
52
|
+
extensions: []
|
53
|
+
extra_rdoc_files:
|
54
|
+
- History.txt
|
55
|
+
- Manifest.txt
|
56
|
+
- README.txt
|
57
|
+
files:
|
58
|
+
- History.txt
|
59
|
+
- Manifest.txt
|
60
|
+
- README.txt
|
61
|
+
- Rakefile
|
62
|
+
- lib/custom-magic
|
63
|
+
- lib/custom-magic.mime
|
64
|
+
- lib/libmagic.rb
|
65
|
+
- libmagic.gemspec
|
66
|
+
- test/files/file_with_text_that_looked_like_ppm_image.csv
|
67
|
+
- test/files/huge_file_with_one_special_character.csv
|
68
|
+
- test/files/huge_file_with_one_special_character_at_the_end.csv
|
69
|
+
- test/files/iso-8859-1.txt
|
70
|
+
- test/files/macintosh.txt
|
71
|
+
- test/files/part_of_ki_file.csv
|
72
|
+
- test/files/us-ascii.txt
|
73
|
+
- test/files/utf-8.txt
|
74
|
+
- test/files/windows-1252.txt
|
75
|
+
- test/test_magic.rb
|
76
|
+
homepage: https://github.com/cdd/libmagic
|
77
|
+
licenses:
|
78
|
+
- MIT
|
79
|
+
metadata:
|
80
|
+
homepage_uri: https://github.com/cdd/libmagic
|
81
|
+
post_install_message:
|
82
|
+
rdoc_options:
|
83
|
+
- "--main"
|
84
|
+
- README.txt
|
85
|
+
require_paths:
|
86
|
+
- lib
|
87
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
88
|
+
requirements:
|
89
|
+
- - ">="
|
90
|
+
- !ruby/object:Gem::Version
|
91
|
+
version: '0'
|
92
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
requirements: []
|
98
|
+
rubygems_version: 3.0.3
|
99
|
+
signing_key:
|
100
|
+
specification_version: 4
|
101
|
+
summary: Ruby wrapper for the Unix file/libmagic utility, which can guess mime types
|
102
|
+
and character sets.
|
103
|
+
test_files: []
|