libmagic 0.5.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/History.txt +24 -0
- data/Manifest.txt +18 -0
- data/README.txt +57 -0
- data/Rakefile +9 -0
- data/lib/custom-magic +1 -0
- data/lib/custom-magic.mime +1 -0
- data/lib/libmagic.rb +177 -0
- data/libmagic.gemspec +44 -0
- data/test/files/file_with_text_that_looked_like_ppm_image.csv +9 -0
- data/test/files/huge_file_with_one_special_character.csv +23001 -0
- data/test/files/huge_file_with_one_special_character_at_the_end.csv +23001 -0
- data/test/files/iso-8859-1.txt +2 -0
- data/test/files/macintosh.txt +1 -0
- data/test/files/part_of_ki_file.csv +1001 -0
- data/test/files/us-ascii.txt +3 -0
- data/test/files/utf-8.txt +3 -0
- data/test/files/windows-1252.txt +2 -0
- data/test/test_magic.rb +213 -0
- metadata +103 -0
data/test/test_magic.rb
ADDED
@@ -0,0 +1,213 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require "rubygems"
|
3
|
+
require "lib/libmagic"
|
4
|
+
require "test/unit"
|
5
|
+
require "stringio"
|
6
|
+
|
7
|
+
class MagicTest < Test::Unit::TestCase
|
8
|
+
def test_public_interface_is_limited
|
9
|
+
assert_equal(%w(file_charset file_charset! file_mime_type io_charset string_charset string_mime_type).map { |m| m.to_sym },
|
10
|
+
(Magic.public_methods - Magic.instance_methods - FFI::Library.methods - FFI::Library.instance_methods).sort.map { |m| m.to_sym })
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_file_mime_type_for_utf8_file
|
14
|
+
# regex necessary because some versions of file return the semicolon and some don't
|
15
|
+
assert(Magic.file_mime_type(absolute_path("utf-8.txt")) =~ /text\/plain;? charset=utf-8/)
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_string_mime_type_for_utf8_text
|
19
|
+
# regex necessary because some versions of file return the semicolon and some don't
|
20
|
+
assert(Magic.string_mime_type("Some truly Unicode characters like: 불거기") =~ /text\/plain;? charset=utf-8/)
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_string_charset_for_ascii_text
|
24
|
+
assert_equal("us-ascii", Magic.string_charset("Just ASCII"))
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_string_charset_for_utf8_text
|
28
|
+
assert_equal("utf-8", Magic.string_charset("Some truly Unicode characters like: 불거기"))
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_string_charset_for_iso_8859_1_text
|
32
|
+
assert_equal("iso-8859-1", Magic.string_charset("A\240B\240C"))
|
33
|
+
end
|
34
|
+
|
35
|
+
(128..159).each do |windows_char|
|
36
|
+
eval <<-EOMETHOD
|
37
|
+
def test_string_charset_for_string_with_windows_char_#{windows_char}_returns_unknown
|
38
|
+
assert_equal("unknown", Magic.string_charset("over, and over, and over\\#{windows_char.to_s(8)}"))
|
39
|
+
end
|
40
|
+
EOMETHOD
|
41
|
+
end
|
42
|
+
|
43
|
+
# although this is redundant, it's nice to document the one weird case we've found explicitly
|
44
|
+
def test_string_charset_for_string_with_windows_ellipsis_returns_unknown
|
45
|
+
# windows 1252 ellipsis is 133 = 0205
|
46
|
+
# this is a weird case found from a file Sylvia had problem slurping
|
47
|
+
assert_equal("unknown", Magic.string_charset("over, and over, and over\205"))
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_string_charset_for_string_with_utf8_Angstrom_returns_utf8_not_unknown
|
51
|
+
string = File.open(absolute_path("utf-8.csv.gz")) do |io|
|
52
|
+
uncompressed_io = Zlib::GzipReader.new(io)
|
53
|
+
uncompressed_io.read
|
54
|
+
end
|
55
|
+
assert_equal("utf-8", Magic.string_charset(string))
|
56
|
+
end
|
57
|
+
|
58
|
+
def test_io_charset_for_ascii_file
|
59
|
+
assert_equal("us-ascii", Magic.io_charset(File.open((absolute_path("us-ascii.txt")))))
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_io_charset_for_utf8_file
|
63
|
+
assert_equal("utf-8", Magic.io_charset(File.open(absolute_path("utf-8.txt"))))
|
64
|
+
end
|
65
|
+
|
66
|
+
require "zlib"
|
67
|
+
def test_io_charset_for_gzipped_utf8_file
|
68
|
+
File.open(absolute_path("utf-8.csv.gz")) do |io|
|
69
|
+
uncompressed_io = Zlib::GzipReader.new(io)
|
70
|
+
assert_equal("utf-8", Magic.io_charset(uncompressed_io))
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def test_io_charset_for_iso_8859_1_file
|
75
|
+
assert_equal("iso-8859-1", Magic.io_charset(File.open(absolute_path("iso-8859-1.txt"))))
|
76
|
+
end
|
77
|
+
|
78
|
+
def test_io_charset_for_windows_1252_file
|
79
|
+
# unfortunately, unknown means some kind of extended ascii
|
80
|
+
assert_equal("unknown", Magic.io_charset(File.open(absolute_path("windows-1252.txt"))))
|
81
|
+
end
|
82
|
+
|
83
|
+
def test_io_charset_for_macintosh_file
|
84
|
+
# unfortunately, unknown means some kind of extended ascii
|
85
|
+
assert_equal("unknown", Magic.io_charset(File.open(absolute_path("macintosh.txt"))))
|
86
|
+
end
|
87
|
+
|
88
|
+
def test_io_charset_for_csv_file_that_looked_like_ppm_image
|
89
|
+
assert_equal("us-ascii", Magic.io_charset(File.open(absolute_path("file_with_text_that_looked_like_ppm_image.csv"))))
|
90
|
+
end
|
91
|
+
|
92
|
+
def test_file_charset_for_ascii_file
|
93
|
+
assert_equal("us-ascii", Magic.file_charset(absolute_path("us-ascii.txt")))
|
94
|
+
end
|
95
|
+
|
96
|
+
def test_file_charset_for_large_CSV_file_that_libmagic_thinks_is_pascal_sourcecode
|
97
|
+
assert_equal("us-ascii", Magic.file_charset(absolute_path("part_of_ki_file.csv")))
|
98
|
+
end
|
99
|
+
|
100
|
+
def test_file_charset_for_utf8_file
|
101
|
+
assert_equal("utf-8", Magic.file_charset(absolute_path("utf-8.txt")))
|
102
|
+
end
|
103
|
+
|
104
|
+
def test_file_charset_for_iso_8859_1_file
|
105
|
+
assert_equal("iso-8859-1", Magic.file_charset(absolute_path("iso-8859-1.txt")))
|
106
|
+
end
|
107
|
+
|
108
|
+
def test_file_charset_for_windows_1252_file
|
109
|
+
# unfortunately, unknown means some kind of extended ascii
|
110
|
+
assert_equal("unknown", Magic.file_charset(absolute_path("windows-1252.txt")))
|
111
|
+
end
|
112
|
+
|
113
|
+
def test_file_charset_for_macintosh_file
|
114
|
+
# unfortunately, unknown means some kind of extended ascii
|
115
|
+
assert_equal("unknown", Magic.file_charset(absolute_path("macintosh.txt")))
|
116
|
+
end
|
117
|
+
|
118
|
+
def test_file_charset_for_csv_file_that_looked_like_ppm_image
|
119
|
+
assert_equal("us-ascii", Magic.file_charset(absolute_path("file_with_text_that_looked_like_ppm_image.csv")))
|
120
|
+
end
|
121
|
+
|
122
|
+
def test_file_charset_raises_if_file_does_not_exist
|
123
|
+
# for this, we don't use assert_raise
|
124
|
+
begin
|
125
|
+
Magic.file_charset("some file that does not exist.txt")
|
126
|
+
fail "Did not raise"
|
127
|
+
rescue Exception => expected
|
128
|
+
# ruby 1.9 and 1.8 return different exceptions
|
129
|
+
assert(expected.message =~ /(some file that does not exist.txt|NULL pointer)/i)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
def test_file_charset_bang_exhaustively_checks_file_contents
|
134
|
+
t1 = Time.now
|
135
|
+
assert_equal("iso-8859-1", Magic.file_charset!(absolute_path("huge_file_with_one_special_character.csv")))
|
136
|
+
puts "took #{Time.now - t1} seconds"
|
137
|
+
end
|
138
|
+
|
139
|
+
def test_file_charset_bang_returns_correct_value_for_us_ascii_file
|
140
|
+
assert_equal("us-ascii", Magic.file_charset!(absolute_path("us-ascii.txt")))
|
141
|
+
end
|
142
|
+
|
143
|
+
def test_file_charset_bang_returns_correct_value_for_windows_1252_file
|
144
|
+
assert_equal("unknown", Magic.file_charset!(absolute_path("windows-1252.txt")))
|
145
|
+
end
|
146
|
+
|
147
|
+
def test_file_charset_bang_returns_correct_value_for_UTF8_file
|
148
|
+
assert_equal("utf-8", Magic.file_charset!(absolute_path("utf-8.txt")))
|
149
|
+
end
|
150
|
+
|
151
|
+
def test_file_charset_bang_handles_special_character_at_the_end_of_the_file
|
152
|
+
assert_equal("us-ascii", Magic.file_charset(absolute_path("huge_file_with_one_special_character_at_the_end.csv")))
|
153
|
+
assert_equal("iso-8859-1", Magic.file_charset!(absolute_path("huge_file_with_one_special_character_at_the_end.csv")))
|
154
|
+
end
|
155
|
+
|
156
|
+
def test_collect_special_characters_is_empty_when_there_are_no_special_characters
|
157
|
+
assert_special_chars_equal("", "")
|
158
|
+
assert_special_chars_equal("", "hello")
|
159
|
+
assert_special_chars_equal("", "12345678901234567890")
|
160
|
+
end
|
161
|
+
|
162
|
+
def test_collect_special_characters_returns_characters_with_context
|
163
|
+
assert_special_chars_equal("µ", "µ")
|
164
|
+
assert_special_chars_equal("321µ123", "321µ123")
|
165
|
+
assert_special_chars_equal("µ123", "µ123")
|
166
|
+
assert_special_chars_equal("321µ", "321µ")
|
167
|
+
assert_special_chars_equal("0987654321\xC21234567890", "0987654321\xC21234567890")
|
168
|
+
assert_special_chars_equal("0987654321µ1234567890", "XXX0987654321µ1234567890XXX")
|
169
|
+
assert_special_chars_equal("µ1234567890", "µ1234567890XXX")
|
170
|
+
assert_special_chars_equal("0987654321µ", "XXX0987654321µ")
|
171
|
+
end
|
172
|
+
|
173
|
+
def test_collect_special_characters_does_not_duplicate_context
|
174
|
+
assert_special_chars_equal("0987654321µaaaaaµ1234567890", "XXX0987654321µaaaaaµ1234567890XXX")
|
175
|
+
end
|
176
|
+
|
177
|
+
def test_collect_special_characters_works_with_multiple_characters
|
178
|
+
assert_special_chars_equal(
|
179
|
+
"0987654321µaaaaaµ12345678900987654321µ1234567890",
|
180
|
+
"XXX0987654321µaaaaaµ1234567890XXXXXX0987654321µ1234567890XXX"
|
181
|
+
)
|
182
|
+
end
|
183
|
+
|
184
|
+
def test_collect_special_characters_works_when_reading_multiple_chunks_to_the_buffer
|
185
|
+
default_chunk_size = 2 ** 15
|
186
|
+
assert_equal(default_chunk_size, Magic::CHUNK_SIZE)
|
187
|
+
begin
|
188
|
+
Magic.send(:remove_const, "CHUNK_SIZE")
|
189
|
+
Magic.send(:const_set, "CHUNK_SIZE", 2)
|
190
|
+
assert_special_chars_equal("µ", "µ")
|
191
|
+
assert_special_chars_equal("µ123", "µ123")
|
192
|
+
assert_special_chars_equal("321µ", "321µ")
|
193
|
+
assert_special_chars_equal("321µ123", "321µ123")
|
194
|
+
assert_special_chars_equal("0987654321µ", "XXX0987654321µ")
|
195
|
+
assert_special_chars_equal(
|
196
|
+
"0987654321µaaaaaµ12345678900987654321µ1234567890",
|
197
|
+
"XXX0987654321µaaaaaµ1234567890XXXXXX0987654321µ1234567890XXX"
|
198
|
+
)
|
199
|
+
ensure
|
200
|
+
Magic.send(:remove_const, "CHUNK_SIZE") if Magic.const_defined?("CHUNK_SIZE")
|
201
|
+
Magic.const_set("CHUNK_SIZE", default_chunk_size)
|
202
|
+
end
|
203
|
+
assert_equal(default_chunk_size, Magic::CHUNK_SIZE)
|
204
|
+
end
|
205
|
+
|
206
|
+
def assert_special_chars_equal(expected_output, input)
|
207
|
+
assert_equal(expected_output.force_encoding(Encoding::BINARY), Magic.send(:collect_special_characters, StringIO.new(input)))
|
208
|
+
end
|
209
|
+
|
210
|
+
def absolute_path(test_file_name)
|
211
|
+
"#{ENV["PWD"]}/test/files/#{test_file_name}"
|
212
|
+
end
|
213
|
+
end
|
metadata
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: libmagic
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.5.11
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Moses Hohman
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2021-06-11 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rdoc
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '4.0'
|
20
|
+
- - "<"
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '7'
|
23
|
+
type: :development
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '4.0'
|
30
|
+
- - "<"
|
31
|
+
- !ruby/object:Gem::Version
|
32
|
+
version: '7'
|
33
|
+
- !ruby/object:Gem::Dependency
|
34
|
+
name: hoe
|
35
|
+
requirement: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - "~>"
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: '3.23'
|
40
|
+
type: :development
|
41
|
+
prerelease: false
|
42
|
+
version_requirements: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - "~>"
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: '3.23'
|
47
|
+
description: Ruby wrapper for the Unix file/libmagic utility, which can guess mime
|
48
|
+
types and character sets.
|
49
|
+
email:
|
50
|
+
- moses@moseshohman.com
|
51
|
+
executables: []
|
52
|
+
extensions: []
|
53
|
+
extra_rdoc_files:
|
54
|
+
- History.txt
|
55
|
+
- Manifest.txt
|
56
|
+
- README.txt
|
57
|
+
files:
|
58
|
+
- History.txt
|
59
|
+
- Manifest.txt
|
60
|
+
- README.txt
|
61
|
+
- Rakefile
|
62
|
+
- lib/custom-magic
|
63
|
+
- lib/custom-magic.mime
|
64
|
+
- lib/libmagic.rb
|
65
|
+
- libmagic.gemspec
|
66
|
+
- test/files/file_with_text_that_looked_like_ppm_image.csv
|
67
|
+
- test/files/huge_file_with_one_special_character.csv
|
68
|
+
- test/files/huge_file_with_one_special_character_at_the_end.csv
|
69
|
+
- test/files/iso-8859-1.txt
|
70
|
+
- test/files/macintosh.txt
|
71
|
+
- test/files/part_of_ki_file.csv
|
72
|
+
- test/files/us-ascii.txt
|
73
|
+
- test/files/utf-8.txt
|
74
|
+
- test/files/windows-1252.txt
|
75
|
+
- test/test_magic.rb
|
76
|
+
homepage: https://github.com/cdd/libmagic
|
77
|
+
licenses:
|
78
|
+
- MIT
|
79
|
+
metadata:
|
80
|
+
homepage_uri: https://github.com/cdd/libmagic
|
81
|
+
post_install_message:
|
82
|
+
rdoc_options:
|
83
|
+
- "--main"
|
84
|
+
- README.txt
|
85
|
+
require_paths:
|
86
|
+
- lib
|
87
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
88
|
+
requirements:
|
89
|
+
- - ">="
|
90
|
+
- !ruby/object:Gem::Version
|
91
|
+
version: '0'
|
92
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - ">="
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0'
|
97
|
+
requirements: []
|
98
|
+
rubygems_version: 3.0.3
|
99
|
+
signing_key:
|
100
|
+
specification_version: 4
|
101
|
+
summary: Ruby wrapper for the Unix file/libmagic utility, which can guess mime types
|
102
|
+
and character sets.
|
103
|
+
test_files: []
|