fileinfo 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d118d43747ad299895b1f1d8cdf958617a98ebf6
4
+ data.tar.gz: 1bc3e0c4c74d6e308f7ca0aa8914407fade41039
5
+ SHA512:
6
+ metadata.gz: 8555f22f4c06c74e2e90aef4150b9637bd03030adc0ccc4e7e114f499ff801844574f59fd5d3cbe9eed8a83f7cc54a62ee1831aa1d6487f298c123054e47e779
7
+ data.tar.gz: 9a19e2014c9545ec433570177aa443b5312ad6c012128168603414913f1dd58be5392dca55b60d8b6a6e6badb2ce90a5678cec0b0531f86f6ca8d031106ab1f8
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format documentation
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Rafaël Blais Masson
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,47 @@
1
+ # FileInfo
2
+
3
+ FileInfo extracts encoding from strings and files using the [wonderful Unix `file` command](http://en.wikipedia.org/wiki/File_\(command\)).
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'fileinfo'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ ```sh
16
+ $ bundle
17
+ ```
18
+
19
+ Or install it yourself as:
20
+
21
+ ```sh
22
+ $ gem install fileinfo
23
+ ```
24
+
25
+ ## Usage
26
+
27
+ Use `FileInfo.parse` with a string:
28
+
29
+ ```ruby
30
+ FileInfo.parse('foo bar baz').encoding # => #<Encoding:US-ASCII>
31
+ FileInfo.parse('föø bår bàz').encoding # => #<Encoding:UTF-8>
32
+ ```
33
+
34
+ Use `FileInfo.load` with a filename:
35
+
36
+ ```ruby
37
+ filename = '/Users/rafbm/Downloads/some_crap_coming_from_windows.csv'
38
+ FileInfo.load(filename).encoding # => #<Encoding:ISO-8859-1>
39
+ ```
40
+
41
+ ## Contributing
42
+
43
+ 1. Fork it
44
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
45
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
46
+ 4. Push to the branch (`git push origin my-new-feature`)
47
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require 'bundler/gem_tasks'
data/fileinfo.gemspec ADDED
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'file_info/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'fileinfo'
8
+ spec.version = FileInfo::VERSION
9
+ spec.authors = ['Rafaël Blais Masson']
10
+ spec.email = ['rafael@heliom.ca']
11
+ spec.description = "FileInfo extracts encoding from strings and files using the wonderful Unix `file` command."
12
+ spec.summary = "FileInfo extracts encoding from strings and files using the wonderful Unix `file` command."
13
+ spec.homepage = 'http://github.com/rafBM/fileinfo'
14
+ spec.license = 'MIT'
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ['lib']
20
+
21
+ spec.add_development_dependency 'bundler', '~> 1.3'
22
+ spec.add_development_dependency 'rake'
23
+ spec.add_development_dependency 'rspec', '~> 2.14'
24
+ end
data/lib/file_info.rb ADDED
@@ -0,0 +1,43 @@
1
+ require 'file_info/version'
2
+
3
+ require 'shellwords'
4
+ require 'tempfile'
5
+
6
+ class FileInfo
7
+ STRING_REGEX = /: ([^:]+)$/
8
+ ENCODING_REGEX = /charset=(\S+)/
9
+
10
+ def initialize(output)
11
+ @output = output
12
+ end
13
+
14
+ def encoding
15
+ @encoding ||= ::Encoding.find(encoding_string)
16
+ end
17
+
18
+ def self.load(filename)
19
+ raise ArgumentError, "File '#{filename}' does not exist." if !File.exists? filename
20
+ new `file --mime #{Shellwords.escape(filename)}`
21
+ end
22
+
23
+ def self.parse(content)
24
+ file = Tempfile.new(rand.to_s)
25
+ file.write(content)
26
+ file.rewind
27
+ output = `file --mime #{file.path}`
28
+ file.close
29
+ file.unlink
30
+
31
+ new output
32
+ end
33
+
34
+ private
35
+
36
+ def string
37
+ @string ||= @output.match(STRING_REGEX)[1].strip
38
+ end
39
+
40
+ def encoding_string
41
+ @encoding_string ||= string.match(ENCODING_REGEX)[1]
42
+ end
43
+ end
@@ -0,0 +1,3 @@
1
+ class FileInfo
2
+ VERSION = '0.1.0'
3
+ end
data/lib/fileinfo.rb ADDED
@@ -0,0 +1 @@
1
+ require 'file_info'
@@ -0,0 +1,50 @@
1
+ require 'spec_helper'
2
+
3
+ describe FileInfo do
4
+ let(:ascii_file) { fixture('encoding_ascii.csv') }
5
+ let(:isolatin_file) { fixture('encoding_isolatin.csv') }
6
+ let(:isowindows_file) { fixture('encoding_isowindows.csv') }
7
+ let(:utf8_file) { fixture('encoding_utf8.csv') }
8
+
9
+ describe '.load' do
10
+ it 'extracts encoding from filename' do
11
+ expect(FileInfo.load(ascii_file.path).encoding).to eq Encoding::US_ASCII
12
+ expect(FileInfo.load(isolatin_file.path).encoding).to eq Encoding::ISO_8859_1
13
+ expect(FileInfo.load(isowindows_file.path).encoding).to eq Encoding::ISO_8859_1
14
+ expect(FileInfo.load(utf8_file.path).encoding).to eq Encoding::UTF_8
15
+ end
16
+
17
+ it 'extracts encoding from filename with space' do
18
+ old_filename = fixture('encoding_utf8.csv').path
19
+ new_filename = old_filename.sub('_', ' ')
20
+ FileUtils.cp(old_filename, new_filename)
21
+
22
+ expect(FileInfo.load(new_filename).encoding).to eq Encoding::UTF_8
23
+
24
+ FileUtils.rm(new_filename)
25
+ end
26
+
27
+ it 'extracts encoding from filename with space and quote' do
28
+ old_filename = fixture('encoding_utf8.csv').path
29
+ new_filename = old_filename.sub('_', ' " ')
30
+ FileUtils.cp(old_filename, new_filename)
31
+
32
+ expect(FileInfo.load(new_filename).encoding).to eq Encoding::UTF_8
33
+
34
+ FileUtils.rm(new_filename)
35
+ end
36
+
37
+ it 'raises ArgumentError if file does not exist' do
38
+ expect { FileInfo.load('WRONG!!1') }.to raise_error ArgumentError
39
+ end
40
+ end
41
+
42
+ describe '.parse' do
43
+ it 'extracts encoding from string' do
44
+ expect(FileInfo.parse(ascii_file.read).encoding).to eq Encoding::US_ASCII
45
+ expect(FileInfo.parse(isolatin_file.read).encoding).to eq Encoding::ISO_8859_1
46
+ expect(FileInfo.parse(isowindows_file.read).encoding).to eq Encoding::ISO_8859_1
47
+ expect(FileInfo.parse(utf8_file.read).encoding).to eq Encoding::UTF_8
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,2 @@
1
+ Prenom,Nom
2
+ Rafael,Labbe
@@ -0,0 +1,2 @@
1
+ Pr�nom,Nom
2
+ Rafa�l,Labb�
@@ -0,0 +1,2 @@
1
+ Pr�nom,Nom
2
+ Rafa�l,Labb�
@@ -0,0 +1,2 @@
1
+ Prénom,Nom
2
+ Rafaël,Labbé
@@ -0,0 +1,13 @@
1
+ require 'fileinfo'
2
+ require 'fileutils'
3
+
4
+ RSpec.configure do |config|
5
+ config.order = 'random'
6
+
7
+ # Helpers
8
+ config.include Module.new {
9
+ def fixture(filename)
10
+ File.open(File.expand_path("spec/fixtures/#{filename}"))
11
+ end
12
+ }
13
+ end
metadata ADDED
@@ -0,0 +1,110 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fileinfo
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Rafaël Blais Masson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-09-08 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '2.14'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '2.14'
55
+ description: FileInfo extracts encoding from strings and files using the wonderful
56
+ Unix `file` command.
57
+ email:
58
+ - rafael@heliom.ca
59
+ executables: []
60
+ extensions: []
61
+ extra_rdoc_files: []
62
+ files:
63
+ - .gitignore
64
+ - .rspec
65
+ - Gemfile
66
+ - LICENSE.txt
67
+ - README.md
68
+ - Rakefile
69
+ - fileinfo.gemspec
70
+ - lib/file_info.rb
71
+ - lib/file_info/version.rb
72
+ - lib/fileinfo.rb
73
+ - spec/file_info_spec.rb
74
+ - spec/fixtures/encoding_ascii.csv
75
+ - spec/fixtures/encoding_isolatin.csv
76
+ - spec/fixtures/encoding_isowindows.csv
77
+ - spec/fixtures/encoding_utf8.csv
78
+ - spec/spec_helper.rb
79
+ homepage: http://github.com/rafBM/fileinfo
80
+ licenses:
81
+ - MIT
82
+ metadata: {}
83
+ post_install_message:
84
+ rdoc_options: []
85
+ require_paths:
86
+ - lib
87
+ required_ruby_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - '>='
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ required_rubygems_version: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ requirements: []
98
+ rubyforge_project:
99
+ rubygems_version: 2.0.3
100
+ signing_key:
101
+ specification_version: 4
102
+ summary: FileInfo extracts encoding from strings and files using the wonderful Unix
103
+ `file` command.
104
+ test_files:
105
+ - spec/file_info_spec.rb
106
+ - spec/fixtures/encoding_ascii.csv
107
+ - spec/fixtures/encoding_isolatin.csv
108
+ - spec/fixtures/encoding_isowindows.csv
109
+ - spec/fixtures/encoding_utf8.csv
110
+ - spec/spec_helper.rb