libmagic 0.5.11

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,3 @@
1
+ Just some plain old ASCII text.
2
+
3
+ And some more.
@@ -0,0 +1,3 @@
1
+ Lots of unicode content in sequence to check that we don't accidentally cut multibyte characters: 漢字漢字漢字漢字漢字漢字漢字漢字漢字漢字漢字漢字漢字漢字漢字漢字漢字漢字漢字漢字漢字漢字漢字漢字漢字漢字漢字漢字漢字漢字漢字漢字漢字漢字漢字漢字漢字漢字
2
+ Some unicode content like this: µM
3
+ And more just for fun: 漢字
@@ -0,0 +1,2 @@
1
+ MoleculeICKdComment
2
+ ABC-�Archive Mine Collaborate� is our trademark� said Barry on pages �
@@ -0,0 +1,213 @@
1
+ # coding: utf-8
2
+ require "rubygems"
3
+ require "lib/libmagic"
4
+ require "test/unit"
5
+ require "stringio"
6
+
7
+ class MagicTest < Test::Unit::TestCase
8
+ def test_public_interface_is_limited
9
+ assert_equal(%w(file_charset file_charset! file_mime_type io_charset string_charset string_mime_type).map { |m| m.to_sym },
10
+ (Magic.public_methods - Magic.instance_methods - FFI::Library.methods - FFI::Library.instance_methods).sort.map { |m| m.to_sym })
11
+ end
12
+
13
+ def test_file_mime_type_for_utf8_file
14
+ # regex necessary because some versions of file return the semicolon and some don't
15
+ assert(Magic.file_mime_type(absolute_path("utf-8.txt")) =~ /text\/plain;? charset=utf-8/)
16
+ end
17
+
18
+ def test_string_mime_type_for_utf8_text
19
+ # regex necessary because some versions of file return the semicolon and some don't
20
+ assert(Magic.string_mime_type("Some truly Unicode characters like: 불거기") =~ /text\/plain;? charset=utf-8/)
21
+ end
22
+
23
+ def test_string_charset_for_ascii_text
24
+ assert_equal("us-ascii", Magic.string_charset("Just ASCII"))
25
+ end
26
+
27
+ def test_string_charset_for_utf8_text
28
+ assert_equal("utf-8", Magic.string_charset("Some truly Unicode characters like: 불거기"))
29
+ end
30
+
31
+ def test_string_charset_for_iso_8859_1_text
32
+ assert_equal("iso-8859-1", Magic.string_charset("A\240B\240C"))
33
+ end
34
+
35
+ (128..159).each do |windows_char|
36
+ eval <<-EOMETHOD
37
+ def test_string_charset_for_string_with_windows_char_#{windows_char}_returns_unknown
38
+ assert_equal("unknown", Magic.string_charset("over, and over, and over\\#{windows_char.to_s(8)}"))
39
+ end
40
+ EOMETHOD
41
+ end
42
+
43
+ # although this is redundant, it's nice to document the one weird case we've found explicitly
44
+ def test_string_charset_for_string_with_windows_ellipsis_returns_unknown
45
+ # windows 1252 ellipsis is 133 = 0205
46
+ # this is a weird case found from a file Sylvia had problem slurping
47
+ assert_equal("unknown", Magic.string_charset("over, and over, and over\205"))
48
+ end
49
+
50
+ def test_string_charset_for_string_with_utf8_Angstrom_returns_utf8_not_unknown
51
+ string = File.open(absolute_path("utf-8.csv.gz")) do |io|
52
+ uncompressed_io = Zlib::GzipReader.new(io)
53
+ uncompressed_io.read
54
+ end
55
+ assert_equal("utf-8", Magic.string_charset(string))
56
+ end
57
+
58
+ def test_io_charset_for_ascii_file
59
+ assert_equal("us-ascii", Magic.io_charset(File.open((absolute_path("us-ascii.txt")))))
60
+ end
61
+
62
+ def test_io_charset_for_utf8_file
63
+ assert_equal("utf-8", Magic.io_charset(File.open(absolute_path("utf-8.txt"))))
64
+ end
65
+
66
+ require "zlib"
67
+ def test_io_charset_for_gzipped_utf8_file
68
+ File.open(absolute_path("utf-8.csv.gz")) do |io|
69
+ uncompressed_io = Zlib::GzipReader.new(io)
70
+ assert_equal("utf-8", Magic.io_charset(uncompressed_io))
71
+ end
72
+ end
73
+
74
+ def test_io_charset_for_iso_8859_1_file
75
+ assert_equal("iso-8859-1", Magic.io_charset(File.open(absolute_path("iso-8859-1.txt"))))
76
+ end
77
+
78
+ def test_io_charset_for_windows_1252_file
79
+ # unfortunately, unknown means some kind of extended ascii
80
+ assert_equal("unknown", Magic.io_charset(File.open(absolute_path("windows-1252.txt"))))
81
+ end
82
+
83
+ def test_io_charset_for_macintosh_file
84
+ # unfortunately, unknown means some kind of extended ascii
85
+ assert_equal("unknown", Magic.io_charset(File.open(absolute_path("macintosh.txt"))))
86
+ end
87
+
88
+ def test_io_charset_for_csv_file_that_looked_like_ppm_image
89
+ assert_equal("us-ascii", Magic.io_charset(File.open(absolute_path("file_with_text_that_looked_like_ppm_image.csv"))))
90
+ end
91
+
92
+ def test_file_charset_for_ascii_file
93
+ assert_equal("us-ascii", Magic.file_charset(absolute_path("us-ascii.txt")))
94
+ end
95
+
96
+ def test_file_charset_for_large_CSV_file_that_libmagic_thinks_is_pascal_sourcecode
97
+ assert_equal("us-ascii", Magic.file_charset(absolute_path("part_of_ki_file.csv")))
98
+ end
99
+
100
+ def test_file_charset_for_utf8_file
101
+ assert_equal("utf-8", Magic.file_charset(absolute_path("utf-8.txt")))
102
+ end
103
+
104
+ def test_file_charset_for_iso_8859_1_file
105
+ assert_equal("iso-8859-1", Magic.file_charset(absolute_path("iso-8859-1.txt")))
106
+ end
107
+
108
+ def test_file_charset_for_windows_1252_file
109
+ # unfortunately, unknown means some kind of extended ascii
110
+ assert_equal("unknown", Magic.file_charset(absolute_path("windows-1252.txt")))
111
+ end
112
+
113
+ def test_file_charset_for_macintosh_file
114
+ # unfortunately, unknown means some kind of extended ascii
115
+ assert_equal("unknown", Magic.file_charset(absolute_path("macintosh.txt")))
116
+ end
117
+
118
+ def test_file_charset_for_csv_file_that_looked_like_ppm_image
119
+ assert_equal("us-ascii", Magic.file_charset(absolute_path("file_with_text_that_looked_like_ppm_image.csv")))
120
+ end
121
+
122
+ def test_file_charset_raises_if_file_does_not_exist
123
+ # for this, we don't use assert_raise
124
+ begin
125
+ Magic.file_charset("some file that does not exist.txt")
126
+ fail "Did not raise"
127
+ rescue Exception => expected
128
+ # ruby 1.9 and 1.8 return different exceptions
129
+ assert(expected.message =~ /(some file that does not exist.txt|NULL pointer)/i)
130
+ end
131
+ end
132
+
133
+ def test_file_charset_bang_exhaustively_checks_file_contents
134
+ t1 = Time.now
135
+ assert_equal("iso-8859-1", Magic.file_charset!(absolute_path("huge_file_with_one_special_character.csv")))
136
+ puts "took #{Time.now - t1} seconds"
137
+ end
138
+
139
+ def test_file_charset_bang_returns_correct_value_for_us_ascii_file
140
+ assert_equal("us-ascii", Magic.file_charset!(absolute_path("us-ascii.txt")))
141
+ end
142
+
143
+ def test_file_charset_bang_returns_correct_value_for_windows_1252_file
144
+ assert_equal("unknown", Magic.file_charset!(absolute_path("windows-1252.txt")))
145
+ end
146
+
147
+ def test_file_charset_bang_returns_correct_value_for_UTF8_file
148
+ assert_equal("utf-8", Magic.file_charset!(absolute_path("utf-8.txt")))
149
+ end
150
+
151
+ def test_file_charset_bang_handles_special_character_at_the_end_of_the_file
152
+ assert_equal("us-ascii", Magic.file_charset(absolute_path("huge_file_with_one_special_character_at_the_end.csv")))
153
+ assert_equal("iso-8859-1", Magic.file_charset!(absolute_path("huge_file_with_one_special_character_at_the_end.csv")))
154
+ end
155
+
156
+ def test_collect_special_characters_is_empty_when_there_are_no_special_characters
157
+ assert_special_chars_equal("", "")
158
+ assert_special_chars_equal("", "hello")
159
+ assert_special_chars_equal("", "12345678901234567890")
160
+ end
161
+
162
+ def test_collect_special_characters_returns_characters_with_context
163
+ assert_special_chars_equal("µ", "µ")
164
+ assert_special_chars_equal("321µ123", "321µ123")
165
+ assert_special_chars_equal("µ123", "µ123")
166
+ assert_special_chars_equal("321µ", "321µ")
167
+ assert_special_chars_equal("0987654321\xC21234567890", "0987654321\xC21234567890")
168
+ assert_special_chars_equal("0987654321µ1234567890", "XXX0987654321µ1234567890XXX")
169
+ assert_special_chars_equal("µ1234567890", "µ1234567890XXX")
170
+ assert_special_chars_equal("0987654321µ", "XXX0987654321µ")
171
+ end
172
+
173
+ def test_collect_special_characters_does_not_duplicate_context
174
+ assert_special_chars_equal("0987654321µaaaaaµ1234567890", "XXX0987654321µaaaaaµ1234567890XXX")
175
+ end
176
+
177
+ def test_collect_special_characters_works_with_multiple_characters
178
+ assert_special_chars_equal(
179
+ "0987654321µaaaaaµ12345678900987654321µ1234567890",
180
+ "XXX0987654321µaaaaaµ1234567890XXXXXX0987654321µ1234567890XXX"
181
+ )
182
+ end
183
+
184
+ def test_collect_special_characters_works_when_reading_multiple_chunks_to_the_buffer
185
+ default_chunk_size = 2 ** 15
186
+ assert_equal(default_chunk_size, Magic::CHUNK_SIZE)
187
+ begin
188
+ Magic.send(:remove_const, "CHUNK_SIZE")
189
+ Magic.send(:const_set, "CHUNK_SIZE", 2)
190
+ assert_special_chars_equal("µ", "µ")
191
+ assert_special_chars_equal("µ123", "µ123")
192
+ assert_special_chars_equal("321µ", "321µ")
193
+ assert_special_chars_equal("321µ123", "321µ123")
194
+ assert_special_chars_equal("0987654321µ", "XXX0987654321µ")
195
+ assert_special_chars_equal(
196
+ "0987654321µaaaaaµ12345678900987654321µ1234567890",
197
+ "XXX0987654321µaaaaaµ1234567890XXXXXX0987654321µ1234567890XXX"
198
+ )
199
+ ensure
200
+ Magic.send(:remove_const, "CHUNK_SIZE") if Magic.const_defined?("CHUNK_SIZE")
201
+ Magic.const_set("CHUNK_SIZE", default_chunk_size)
202
+ end
203
+ assert_equal(default_chunk_size, Magic::CHUNK_SIZE)
204
+ end
205
+
206
+ def assert_special_chars_equal(expected_output, input)
207
+ assert_equal(expected_output.force_encoding(Encoding::BINARY), Magic.send(:collect_special_characters, StringIO.new(input)))
208
+ end
209
+
210
+ def absolute_path(test_file_name)
211
+ "#{ENV["PWD"]}/test/files/#{test_file_name}"
212
+ end
213
+ end
metadata ADDED
@@ -0,0 +1,103 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: libmagic
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.5.11
5
+ platform: ruby
6
+ authors:
7
+ - Moses Hohman
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2021-06-11 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rdoc
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '4.0'
20
+ - - "<"
21
+ - !ruby/object:Gem::Version
22
+ version: '7'
23
+ type: :development
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ version: '4.0'
30
+ - - "<"
31
+ - !ruby/object:Gem::Version
32
+ version: '7'
33
+ - !ruby/object:Gem::Dependency
34
+ name: hoe
35
+ requirement: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '3.23'
40
+ type: :development
41
+ prerelease: false
42
+ version_requirements: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '3.23'
47
+ description: Ruby wrapper for the Unix file/libmagic utility, which can guess mime
48
+ types and character sets.
49
+ email:
50
+ - moses@moseshohman.com
51
+ executables: []
52
+ extensions: []
53
+ extra_rdoc_files:
54
+ - History.txt
55
+ - Manifest.txt
56
+ - README.txt
57
+ files:
58
+ - History.txt
59
+ - Manifest.txt
60
+ - README.txt
61
+ - Rakefile
62
+ - lib/custom-magic
63
+ - lib/custom-magic.mime
64
+ - lib/libmagic.rb
65
+ - libmagic.gemspec
66
+ - test/files/file_with_text_that_looked_like_ppm_image.csv
67
+ - test/files/huge_file_with_one_special_character.csv
68
+ - test/files/huge_file_with_one_special_character_at_the_end.csv
69
+ - test/files/iso-8859-1.txt
70
+ - test/files/macintosh.txt
71
+ - test/files/part_of_ki_file.csv
72
+ - test/files/us-ascii.txt
73
+ - test/files/utf-8.txt
74
+ - test/files/windows-1252.txt
75
+ - test/test_magic.rb
76
+ homepage: https://github.com/cdd/libmagic
77
+ licenses:
78
+ - MIT
79
+ metadata:
80
+ homepage_uri: https://github.com/cdd/libmagic
81
+ post_install_message:
82
+ rdoc_options:
83
+ - "--main"
84
+ - README.txt
85
+ require_paths:
86
+ - lib
87
+ required_ruby_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - ">="
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ required_rubygems_version: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ requirements: []
98
+ rubygems_version: 3.0.3
99
+ signing_key:
100
+ specification_version: 4
101
+ summary: Ruby wrapper for the Unix file/libmagic utility, which can guess mime types
102
+ and character sets.
103
+ test_files: []