encoda 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Zipme
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,62 @@
1
+ = Encoda
2
+
3
+ This is a simple file encoding converter
4
+
5
+ == Installation
6
+
7
+ sudo gem install Encoda
8
+
9
+ == Getting Started
10
+
11
+ ==== Single file conversion
12
+
13
+ Encoda.convert do
14
+ to_path 'convert/to/path'
15
+ from_file 'path/to/your/file'
16
+ from_encoding 'GB2312'
17
+ to_encoding 'UTF8'
18
+ end
19
+
20
+ ==== Multiple files conversion
21
+
22
+ You can batch convert multiple files from a specific directory by specifying from_path
23
+
24
+ Encoda.convert do
25
+ from_path /from/path
26
+ to_path /to/path
27
+ to_encoding 'UTF8'
28
+ end
29
+
30
+ If you specify from_path, it will alway do the batch conversion and ignore the from_path option.
31
+
32
+ As you can see, if you omit the from_encoding option, the Encoda will try to guess the file's original encoding.
33
+ The guessing feature is based on chardet library.
34
+
35
+ ==== Retrieve the conversion result
36
+
37
+ encoda = Encoda.convert do
38
+ from_path /from/path
39
+ to_path /to/path
40
+ to_encoding 'UTF8'
41
+ end
42
+
43
+ puts encoda.failed #=> ['failed_file1.txt', 'failed_file2.txt']
44
+ puts encoda.success #=> ['success_file1.txt', 'success_file3.txt, file4.srt']
45
+
46
+ == TODOs
47
+
48
+ Refactoring!!!
49
+
50
+ == Note on Patches/Pull Requests
51
+
52
+ * Fork the project.
53
+ * Make your feature addition or bug fix.
54
+ * Add tests for it. This is important so I don't break it in a
55
+ future version unintentionally.
56
+ * Commit, do not mess with rakefile, version, or history.
57
+ (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
58
+ * Send me a pull request. Bonus points for topic branches.
59
+
60
+ == Copyright
61
+
62
+ Copyright (c) 2010 Zipme. See LICENSE for details.
@@ -0,0 +1,46 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "encoda"
8
+ gem.summary = %Q{Encoding converter}
9
+ gem.description = %Q{It's a simple file encoding converter }
10
+ gem.email = "genkiwow@gmail.com"
11
+ gem.homepage = "http://github.com/zipme/encoda"
12
+ gem.authors = ["Zipme"]
13
+ gem.add_dependency "chardet", ">=0.9.0"
14
+ gem.add_development_dependency "rspec", ">= 1.2.9"
15
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
16
+ end
17
+ Jeweler::GemcutterTasks.new
18
+ rescue LoadError
19
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
20
+ end
21
+
22
+ require 'spec/rake/spectask'
23
+ Spec::Rake::SpecTask.new(:spec) do |spec|
24
+ spec.libs << 'lib' << 'spec'
25
+ spec.spec_files = FileList['spec/**/*_spec.rb']
26
+ end
27
+
28
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
29
+ spec.libs << 'lib' << 'spec'
30
+ spec.pattern = 'spec/**/*_spec.rb'
31
+ spec.rcov = true
32
+ end
33
+
34
+ task :spec => :check_dependencies
35
+
36
+ task :default => :spec
37
+
38
+ require 'rake/rdoctask'
39
+ Rake::RDocTask.new do |rdoc|
40
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
41
+
42
+ rdoc.rdoc_dir = 'rdoc'
43
+ rdoc.title = "encoda #{version}"
44
+ rdoc.rdoc_files.include('README*')
45
+ rdoc.rdoc_files.include('lib/**/*.rb')
46
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,66 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{encoda}
8
+ s.version = "0.1.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Zipme"]
12
+ s.date = %q{2010-01-07}
13
+ s.description = %q{It's a simple file encoding converter }
14
+ s.email = %q{genkiwow@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.rdoc",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "encoda.gemspec",
27
+ "lib/encoda.rb",
28
+ "spec/encoda_spec.rb",
29
+ "spec/spec.opts",
30
+ "spec/spec_helper.rb",
31
+ "spec/test_files/conv_files/nedivx-tbl.big5.srt",
32
+ "spec/test_files/conv_files/nedivx-tbl.eng.srt",
33
+ "spec/test_files/conv_files/nedivx-tbl.gb.srt",
34
+ "spec/test_files/conv_files/utf8.gb.srt",
35
+ "spec/test_files/files/nedivx-tbl.big5.srt",
36
+ "spec/test_files/files/nedivx-tbl.eng.srt",
37
+ "spec/test_files/files/nedivx-tbl.gb.srt",
38
+ "spec/test_files/files/utf8.gb.srt"
39
+ ]
40
+ s.homepage = %q{http://github.com/zipme/encoda}
41
+ s.rdoc_options = ["--charset=UTF-8"]
42
+ s.require_paths = ["lib"]
43
+ s.rubygems_version = %q{1.3.5}
44
+ s.summary = %q{Encoding converter}
45
+ s.test_files = [
46
+ "spec/encoda_spec.rb",
47
+ "spec/spec_helper.rb"
48
+ ]
49
+
50
+ if s.respond_to? :specification_version then
51
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
52
+ s.specification_version = 3
53
+
54
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
55
+ s.add_runtime_dependency(%q<chardet>, [">= 0.9.0"])
56
+ s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
57
+ else
58
+ s.add_dependency(%q<chardet>, [">= 0.9.0"])
59
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
60
+ end
61
+ else
62
+ s.add_dependency(%q<chardet>, [">= 0.9.0"])
63
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
64
+ end
65
+ end
66
+
@@ -0,0 +1,141 @@
1
+ require 'rubygems'
2
+ require 'UniversalDetector'
3
+ require 'iconv'
4
+ require 'ftools'
5
+
6
+ class Encoda
7
+
8
+ attr_reader :from_path, :to_path, :from_file, :to_file,
9
+ :from_encoding, :to_encoding, :failed, :success
10
+
11
+ def initialize
12
+ @from_path = ""
13
+ @to_path = ""
14
+ @to_file = ""
15
+ @to_encoding = "UTF8"
16
+ @failed = []
17
+ @success = []
18
+ end
19
+
20
+ def from_path(from_path)
21
+ @from_path = from_path
22
+ end
23
+
24
+ def from_file(from_file)
25
+ @from_file = from_file
26
+ end
27
+
28
+ def to_file(to_file)
29
+ @to_file = to_file
30
+ end
31
+
32
+ def to_path(to_path)
33
+ @to_path = to_path
34
+ end
35
+
36
+ def to_encoding(to_enc)
37
+ @to_encoding = to_enc
38
+ end
39
+
40
+
41
+ def run!
42
+ #TODO: Add some validation against the attrs
43
+ process!
44
+ end
45
+
46
+ #DSL API
47
+ def self.convert(&block)
48
+ encoda = Encoda.new
49
+ encoda.instance_eval(&block)
50
+ encoda.run!
51
+ encoda
52
+ end
53
+
54
+ #Conventional API
55
+ def convert(from, to, from_encoding, to_encoding)
56
+ #TODO
57
+ end
58
+
59
+ private
60
+
61
+ def process!
62
+ puts "convert begins!"
63
+ check_file_or_dir @to_path
64
+ if convert_from_path?
65
+ create_directories
66
+ unfinished_files.each do |filename|
67
+ @failed << filename unless convert_file( "#{@from_path}/#{filename}", "#{@to_path}/#{filename}" )
68
+ end
69
+ else
70
+ check_file_or_dir @from_file
71
+ from_file_basename = File.basename(@from_file)
72
+ @to_file = "converted_#{from_file_basename}" if @to_file.strip.empty?
73
+ @failed << @from_file unless convert_file( @from_file, "#{@to_path}/#{File.basename(@to_file)}" )
74
+ end
75
+ @success = unfinished_files - @failed
76
+ puts "convert finished! #{@success.size} files have been converted, #{@failed.size} failed."
77
+ end
78
+
79
+ def convert_from_path?
80
+ !@from_path.empty? && !@from_path.nil?
81
+ end
82
+
83
+ def create_directories
84
+ check_file_or_dir @from_path
85
+ Dir.mkdir @to_path unless File.exist? @to_path
86
+ end
87
+
88
+ def check_file_or_dir(file_or_dir)
89
+ raise "File or directory: #{file_or_dir} cannot be found!" if ( file_or_dir.nil? || !File.exist?(file_or_dir) )
90
+ end
91
+
92
+ def unfinished_files
93
+ @unfinished_files ||= unfinished_files!
94
+ end
95
+
96
+ def unfinished_files!
97
+ orin_files = Dir.glob(File.join(@from_path, "*.srt"))
98
+ conv_files = Dir.glob(File.join(@to_path, "*.srt"))
99
+ orin_files.map! { |f| File.basename(f) }
100
+ conv_files.map! { |f| File.basename(f) }
101
+ orin_files - conv_files
102
+ end
103
+
104
+ def convert_file(from_file, to_file)
105
+ File.open(from_file,'r') do |f|
106
+ if @from_encoding.nil?
107
+ lines = f.read
108
+ result = UniversalDetector::chardet(lines[0..1000])
109
+ @from_encoding = result["encoding"] if result["confidence"] > 0.9
110
+ puts "Guessing Result: #{result.inspect} of file #{@from_file}"
111
+ end
112
+ raise "Not specifying from_encoding" if @from_encoding.strip.empty?
113
+
114
+ if @from_encoding.upcase.sub("-","") == @to_encoding.upcase.sub("-","")
115
+ File.copy(from_file, "#{@to_path}/#{File.basename(from_file)}")
116
+ return true
117
+ end
118
+
119
+ begin
120
+ File.open(to_file, 'w') do |cf|
121
+ begin
122
+ cf.write Iconv.conv(@to_encoding, @from_encoding, lines)
123
+ rescue Exception => e
124
+ cf.write Iconv.conv(@to_encoding, "BIG5", lines)
125
+ end
126
+ end
127
+ rescue Exception => e
128
+ puts "delete failed file"
129
+ File.delete(to_file)
130
+ puts "#{from_file} converted failed. Msg:#{e.message}."
131
+ return false
132
+ ensure
133
+ @from_encoding = nil
134
+ end
135
+
136
+ end
137
+ true
138
+ end
139
+
140
+
141
+ end
@@ -0,0 +1,80 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+ class TempFile
3
+ #These variable can be set in the before block, however, it won't work with DSL APIs
4
+ TO_PATH = File.expand_path(File.dirname(__FILE__) + '/test_files/conv_files')
5
+ FROM_PATH = File.expand_path(File.dirname(__FILE__) + '/test_files/files')
6
+ FROM_FILE = File.expand_path(File.dirname(__FILE__) + '/test_files/files/nedivx-tbl.gb.srt')
7
+ UTF8_FILE = File.expand_path(File.dirname(__FILE__) + '/test_files/files/utf8.gb.srt')
8
+ end
9
+ describe "Encoda" do
10
+
11
+ describe "Providing incorrect info" do
12
+ it "should raise error when providing from_path cannot be found" do
13
+ lambda {
14
+ Encoda.convert do
15
+ to_path TempFile::TO_PATH
16
+ from_file 'xxx_1.srt'
17
+ end
18
+ }.should raise_error(/xxx_1.srt cannot be found!/)
19
+ end
20
+
21
+ it "should raise error when providing from_path cannot be found" do
22
+ lambda {
23
+ Encoda.convert do
24
+ to_path TempFile::TO_PATH
25
+ from_path "xxx"
26
+ end
27
+ }.should raise_error(/xxx cannot be found!/)
28
+ end
29
+
30
+ it "should raise error when providing from_path cannot be found" do
31
+ lambda {
32
+ Encoda.convert do
33
+ to_path "to_path"
34
+ end
35
+ }.should raise_error(/to_path cannot be found!/)
36
+ end
37
+ end
38
+
39
+ describe "Providing correct info" do
40
+
41
+ def conv_files
42
+ Dir.glob(File.join(TempFile::TO_PATH, "*.srt"))
43
+ end
44
+
45
+ before(:each) do
46
+ conv_files.each do |f|
47
+ File.delete(f)
48
+ end
49
+ end
50
+
51
+ describe "Convert single file" do
52
+ it "should convert 1 file to default converted folder" do
53
+ Encoda.convert do
54
+ from_file TempFile::FROM_FILE
55
+ to_path TempFile::TO_PATH
56
+ end
57
+ conv_files.should have(1).item
58
+ end
59
+ it "should bypass the convert and copy the file directly if from_encoding is the same as to_encoding" do
60
+ Encoda.convert do
61
+ from_file TempFile::UTF8_FILE
62
+ to_path TempFile::TO_PATH
63
+ end
64
+ conv_files.should have(1).item
65
+ File.basename(conv_files[0]).should == File.basename(TempFile::UTF8_FILE)
66
+ end
67
+ end
68
+ describe "Convert files under a specific folder" do
69
+ it "should batch convert files when specifying from_path" do
70
+ encoda = Encoda.convert do
71
+ from_path TempFile::FROM_PATH
72
+ to_path TempFile::TO_PATH
73
+ end
74
+ puts encoda.inspect
75
+ conv_files.should have(encoda.failed.size + encoda.success.size).items
76
+ end
77
+ end
78
+ end
79
+
80
+ end