fileinfo 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: d118d43747ad299895b1f1d8cdf958617a98ebf6
4
+ data.tar.gz: 1bc3e0c4c74d6e308f7ca0aa8914407fade41039
5
+ SHA512:
6
+ metadata.gz: 8555f22f4c06c74e2e90aef4150b9637bd03030adc0ccc4e7e114f499ff801844574f59fd5d3cbe9eed8a83f7cc54a62ee1831aa1d6487f298c123054e47e779
7
+ data.tar.gz: 9a19e2014c9545ec433570177aa443b5312ad6c012128168603414913f1dd58be5392dca55b60d8b6a6e6badb2ce90a5678cec0b0531f86f6ca8d031106ab1f8
data/.gitignore ADDED
@@ -0,0 +1,17 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .yardoc
6
+ Gemfile.lock
7
+ InstalledFiles
8
+ _yardoc
9
+ coverage
10
+ doc/
11
+ lib/bundler/man
12
+ pkg
13
+ rdoc
14
+ spec/reports
15
+ test/tmp
16
+ test/version_tmp
17
+ tmp
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --color
2
+ --format documentation
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org'
2
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Rafaël Blais Masson
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,47 @@
1
+ # FileInfo
2
+
3
+ FileInfo extracts encoding from strings and files using the [wonderful Unix `file` command](http://en.wikipedia.org/wiki/File_\(command\)).
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'fileinfo'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ ```sh
16
+ $ bundle
17
+ ```
18
+
19
+ Or install it yourself as:
20
+
21
+ ```sh
22
+ $ gem install fileinfo
23
+ ```
24
+
25
+ ## Usage
26
+
27
+ Use `FileInfo.parse` with a string:
28
+
29
+ ```ruby
30
+ FileInfo.parse('foo bar baz').encoding # => #<Encoding:US-ASCII>
31
+ FileInfo.parse('föø bår bàz').encoding # => #<Encoding:UTF-8>
32
+ ```
33
+
34
+ Use `FileInfo.load` with a filename:
35
+
36
+ ```ruby
37
+ filename = '/Users/rafbm/Downloads/some_crap_coming_from_windows.csv'
38
+ FileInfo.load(filename).encoding # => #<Encoding:ISO-8859-1>
39
+ ```
40
+
41
+ ## Contributing
42
+
43
+ 1. Fork it
44
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
45
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
46
+ 4. Push to the branch (`git push origin my-new-feature`)
47
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require 'bundler/gem_tasks'
data/fileinfo.gemspec ADDED
@@ -0,0 +1,24 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'file_info/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'fileinfo'
8
+ spec.version = FileInfo::VERSION
9
+ spec.authors = ['Rafaël Blais Masson']
10
+ spec.email = ['rafael@heliom.ca']
11
+ spec.description = "FileInfo extracts encoding from strings and files using the wonderful Unix `file` command."
12
+ spec.summary = "FileInfo extracts encoding from strings and files using the wonderful Unix `file` command."
13
+ spec.homepage = 'http://github.com/rafBM/fileinfo'
14
+ spec.license = 'MIT'
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ['lib']
20
+
21
+ spec.add_development_dependency 'bundler', '~> 1.3'
22
+ spec.add_development_dependency 'rake'
23
+ spec.add_development_dependency 'rspec', '~> 2.14'
24
+ end
data/lib/file_info.rb ADDED
@@ -0,0 +1,43 @@
1
+ require 'file_info/version'
2
+
3
+ require 'shellwords'
4
+ require 'tempfile'
5
+
6
+ class FileInfo
7
+ STRING_REGEX = /: ([^:]+)$/
8
+ ENCODING_REGEX = /charset=(\S+)/
9
+
10
+ def initialize(output)
11
+ @output = output
12
+ end
13
+
14
+ def encoding
15
+ @encoding ||= ::Encoding.find(encoding_string)
16
+ end
17
+
18
+ def self.load(filename)
19
+ raise ArgumentError, "File '#{filename}' does not exist." if !File.exists? filename
20
+ new `file --mime #{Shellwords.escape(filename)}`
21
+ end
22
+
23
+ def self.parse(content)
24
+ file = Tempfile.new(rand.to_s)
25
+ file.write(content)
26
+ file.rewind
27
+ output = `file --mime #{file.path}`
28
+ file.close
29
+ file.unlink
30
+
31
+ new output
32
+ end
33
+
34
+ private
35
+
36
+ def string
37
+ @string ||= @output.match(STRING_REGEX)[1].strip
38
+ end
39
+
40
+ def encoding_string
41
+ @encoding_string ||= string.match(ENCODING_REGEX)[1]
42
+ end
43
+ end
@@ -0,0 +1,3 @@
1
+ class FileInfo
2
+ VERSION = '0.1.0'
3
+ end
data/lib/fileinfo.rb ADDED
@@ -0,0 +1 @@
1
+ require 'file_info'
@@ -0,0 +1,50 @@
1
+ require 'spec_helper'
2
+
3
+ describe FileInfo do
4
+ let(:ascii_file) { fixture('encoding_ascii.csv') }
5
+ let(:isolatin_file) { fixture('encoding_isolatin.csv') }
6
+ let(:isowindows_file) { fixture('encoding_isowindows.csv') }
7
+ let(:utf8_file) { fixture('encoding_utf8.csv') }
8
+
9
+ describe '.load' do
10
+ it 'extracts encoding from filename' do
11
+ expect(FileInfo.load(ascii_file.path).encoding).to eq Encoding::US_ASCII
12
+ expect(FileInfo.load(isolatin_file.path).encoding).to eq Encoding::ISO_8859_1
13
+ expect(FileInfo.load(isowindows_file.path).encoding).to eq Encoding::ISO_8859_1
14
+ expect(FileInfo.load(utf8_file.path).encoding).to eq Encoding::UTF_8
15
+ end
16
+
17
+ it 'extracts encoding from filename with space' do
18
+ old_filename = fixture('encoding_utf8.csv').path
19
+ new_filename = old_filename.sub('_', ' ')
20
+ FileUtils.cp(old_filename, new_filename)
21
+
22
+ expect(FileInfo.load(new_filename).encoding).to eq Encoding::UTF_8
23
+
24
+ FileUtils.rm(new_filename)
25
+ end
26
+
27
+ it 'extracts encoding from filename with space and quote' do
28
+ old_filename = fixture('encoding_utf8.csv').path
29
+ new_filename = old_filename.sub('_', ' " ')
30
+ FileUtils.cp(old_filename, new_filename)
31
+
32
+ expect(FileInfo.load(new_filename).encoding).to eq Encoding::UTF_8
33
+
34
+ FileUtils.rm(new_filename)
35
+ end
36
+
37
+ it 'raises ArgumentError if file does not exist' do
38
+ expect { FileInfo.load('WRONG!!1') }.to raise_error ArgumentError
39
+ end
40
+ end
41
+
42
+ describe '.parse' do
43
+ it 'extracts encoding from string' do
44
+ expect(FileInfo.parse(ascii_file.read).encoding).to eq Encoding::US_ASCII
45
+ expect(FileInfo.parse(isolatin_file.read).encoding).to eq Encoding::ISO_8859_1
46
+ expect(FileInfo.parse(isowindows_file.read).encoding).to eq Encoding::ISO_8859_1
47
+ expect(FileInfo.parse(utf8_file.read).encoding).to eq Encoding::UTF_8
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,2 @@
1
+ Prenom,Nom
2
+ Rafael,Labbe
@@ -0,0 +1,2 @@
1
+ Pr�nom,Nom
2
+ Rafa�l,Labb�
@@ -0,0 +1,2 @@
1
+ Pr�nom,Nom
2
+ Rafa�l,Labb�
@@ -0,0 +1,2 @@
1
+ Prénom,Nom
2
+ Rafaël,Labbé
@@ -0,0 +1,13 @@
1
+ require 'fileinfo'
2
+ require 'fileutils'
3
+
4
+ RSpec.configure do |config|
5
+ config.order = 'random'
6
+
7
+ # Helpers
8
+ config.include Module.new {
9
+ def fixture(filename)
10
+ File.open(File.expand_path("spec/fixtures/#{filename}"))
11
+ end
12
+ }
13
+ end
metadata ADDED
@@ -0,0 +1,110 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: fileinfo
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Rafaël Blais Masson
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-09-08 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ~>
18
+ - !ruby/object:Gem::Version
19
+ version: '1.3'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ~>
25
+ - !ruby/object:Gem::Version
26
+ version: '1.3'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ~>
46
+ - !ruby/object:Gem::Version
47
+ version: '2.14'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ~>
53
+ - !ruby/object:Gem::Version
54
+ version: '2.14'
55
+ description: FileInfo extracts encoding from strings and files using the wonderful
56
+ Unix `file` command.
57
+ email:
58
+ - rafael@heliom.ca
59
+ executables: []
60
+ extensions: []
61
+ extra_rdoc_files: []
62
+ files:
63
+ - .gitignore
64
+ - .rspec
65
+ - Gemfile
66
+ - LICENSE.txt
67
+ - README.md
68
+ - Rakefile
69
+ - fileinfo.gemspec
70
+ - lib/file_info.rb
71
+ - lib/file_info/version.rb
72
+ - lib/fileinfo.rb
73
+ - spec/file_info_spec.rb
74
+ - spec/fixtures/encoding_ascii.csv
75
+ - spec/fixtures/encoding_isolatin.csv
76
+ - spec/fixtures/encoding_isowindows.csv
77
+ - spec/fixtures/encoding_utf8.csv
78
+ - spec/spec_helper.rb
79
+ homepage: http://github.com/rafBM/fileinfo
80
+ licenses:
81
+ - MIT
82
+ metadata: {}
83
+ post_install_message:
84
+ rdoc_options: []
85
+ require_paths:
86
+ - lib
87
+ required_ruby_version: !ruby/object:Gem::Requirement
88
+ requirements:
89
+ - - '>='
90
+ - !ruby/object:Gem::Version
91
+ version: '0'
92
+ required_rubygems_version: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - '>='
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ requirements: []
98
+ rubyforge_project:
99
+ rubygems_version: 2.0.3
100
+ signing_key:
101
+ specification_version: 4
102
+ summary: FileInfo extracts encoding from strings and files using the wonderful Unix
103
+ `file` command.
104
+ test_files:
105
+ - spec/file_info_spec.rb
106
+ - spec/fixtures/encoding_ascii.csv
107
+ - spec/fixtures/encoding_isolatin.csv
108
+ - spec/fixtures/encoding_isowindows.csv
109
+ - spec/fixtures/encoding_utf8.csv
110
+ - spec/spec_helper.rb