encoda 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
@@ -0,0 +1,21 @@
1
+ ## MAC OS
2
+ .DS_Store
3
+
4
+ ## TEXTMATE
5
+ *.tmproj
6
+ tmtags
7
+
8
+ ## EMACS
9
+ *~
10
+ \#*
11
+ .\#*
12
+
13
+ ## VIM
14
+ *.swp
15
+
16
+ ## PROJECT::GENERAL
17
+ coverage
18
+ rdoc
19
+ pkg
20
+
21
+ ## PROJECT::SPECIFIC
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Zipme
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,62 @@
1
+ = Encoda
2
+
3
+ This is a simple file encoding converter
4
+
5
+ == Installation
6
+
7
+ sudo gem install Encoda
8
+
9
+ == Getting Started
10
+
11
+ ==== Single file conversion
12
+
13
+ Encoda.convert do
14
+ to_path 'convert/to/path'
15
+ from_file 'path/to/your/file'
16
+ from_encoding 'GB2312'
17
+ to_encoding 'UTF8'
18
+ end
19
+
20
+ ==== Multiple files conversion
21
+
22
+ You can batch convert multiple files from a specific directory by specifying from_path
23
+
24
+ Encoda.convert do
25
+ from_path /from/path
26
+ to_path /to/path
27
+ to_encoding 'UTF8'
28
+ end
29
+
30
+ If you specify from_path, it will alway do the batch conversion and ignore the from_path option.
31
+
32
+ As you can see, if you omit the from_encoding option, the Encoda will try to guess the file's original encoding.
33
+ The guessing feature is based on chardet library.
34
+
35
+ ==== Retrieve the conversion result
36
+
37
+ encoda = Encoda.convert do
38
+ from_path /from/path
39
+ to_path /to/path
40
+ to_encoding 'UTF8'
41
+ end
42
+
43
+ puts encoda.failed #=> ['failed_file1.txt', 'failed_file2.txt']
44
+ puts encoda.success #=> ['success_file1.txt', 'success_file3.txt, file4.srt']
45
+
46
+ == TODOs
47
+
48
+ Refactoring!!!
49
+
50
+ == Note on Patches/Pull Requests
51
+
52
+ * Fork the project.
53
+ * Make your feature addition or bug fix.
54
+ * Add tests for it. This is important so I don't break it in a
55
+ future version unintentionally.
56
+ * Commit, do not mess with rakefile, version, or history.
57
+ (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
58
+ * Send me a pull request. Bonus points for topic branches.
59
+
60
+ == Copyright
61
+
62
+ Copyright (c) 2010 Zipme. See LICENSE for details.
@@ -0,0 +1,46 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "encoda"
8
+ gem.summary = %Q{Encoding converter}
9
+ gem.description = %Q{It's a simple file encoding converter }
10
+ gem.email = "genkiwow@gmail.com"
11
+ gem.homepage = "http://github.com/zipme/encoda"
12
+ gem.authors = ["Zipme"]
13
+ gem.add_dependency "chardet", ">=0.9.0"
14
+ gem.add_development_dependency "rspec", ">= 1.2.9"
15
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
16
+ end
17
+ Jeweler::GemcutterTasks.new
18
+ rescue LoadError
19
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
20
+ end
21
+
22
+ require 'spec/rake/spectask'
23
+ Spec::Rake::SpecTask.new(:spec) do |spec|
24
+ spec.libs << 'lib' << 'spec'
25
+ spec.spec_files = FileList['spec/**/*_spec.rb']
26
+ end
27
+
28
+ Spec::Rake::SpecTask.new(:rcov) do |spec|
29
+ spec.libs << 'lib' << 'spec'
30
+ spec.pattern = 'spec/**/*_spec.rb'
31
+ spec.rcov = true
32
+ end
33
+
34
+ task :spec => :check_dependencies
35
+
36
+ task :default => :spec
37
+
38
+ require 'rake/rdoctask'
39
+ Rake::RDocTask.new do |rdoc|
40
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
41
+
42
+ rdoc.rdoc_dir = 'rdoc'
43
+ rdoc.title = "encoda #{version}"
44
+ rdoc.rdoc_files.include('README*')
45
+ rdoc.rdoc_files.include('lib/**/*.rb')
46
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.1.0
@@ -0,0 +1,66 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{encoda}
8
+ s.version = "0.1.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Zipme"]
12
+ s.date = %q{2010-01-07}
13
+ s.description = %q{It's a simple file encoding converter }
14
+ s.email = %q{genkiwow@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.rdoc",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "encoda.gemspec",
27
+ "lib/encoda.rb",
28
+ "spec/encoda_spec.rb",
29
+ "spec/spec.opts",
30
+ "spec/spec_helper.rb",
31
+ "spec/test_files/conv_files/nedivx-tbl.big5.srt",
32
+ "spec/test_files/conv_files/nedivx-tbl.eng.srt",
33
+ "spec/test_files/conv_files/nedivx-tbl.gb.srt",
34
+ "spec/test_files/conv_files/utf8.gb.srt",
35
+ "spec/test_files/files/nedivx-tbl.big5.srt",
36
+ "spec/test_files/files/nedivx-tbl.eng.srt",
37
+ "spec/test_files/files/nedivx-tbl.gb.srt",
38
+ "spec/test_files/files/utf8.gb.srt"
39
+ ]
40
+ s.homepage = %q{http://github.com/zipme/encoda}
41
+ s.rdoc_options = ["--charset=UTF-8"]
42
+ s.require_paths = ["lib"]
43
+ s.rubygems_version = %q{1.3.5}
44
+ s.summary = %q{Encoding converter}
45
+ s.test_files = [
46
+ "spec/encoda_spec.rb",
47
+ "spec/spec_helper.rb"
48
+ ]
49
+
50
+ if s.respond_to? :specification_version then
51
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
52
+ s.specification_version = 3
53
+
54
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
55
+ s.add_runtime_dependency(%q<chardet>, [">= 0.9.0"])
56
+ s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
57
+ else
58
+ s.add_dependency(%q<chardet>, [">= 0.9.0"])
59
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
60
+ end
61
+ else
62
+ s.add_dependency(%q<chardet>, [">= 0.9.0"])
63
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
64
+ end
65
+ end
66
+
@@ -0,0 +1,141 @@
1
+ require 'rubygems'
2
+ require 'UniversalDetector'
3
+ require 'iconv'
4
+ require 'ftools'
5
+
6
+ class Encoda
7
+
8
+ attr_reader :from_path, :to_path, :from_file, :to_file,
9
+ :from_encoding, :to_encoding, :failed, :success
10
+
11
+ def initialize
12
+ @from_path = ""
13
+ @to_path = ""
14
+ @to_file = ""
15
+ @to_encoding = "UTF8"
16
+ @failed = []
17
+ @success = []
18
+ end
19
+
20
+ def from_path(from_path)
21
+ @from_path = from_path
22
+ end
23
+
24
+ def from_file(from_file)
25
+ @from_file = from_file
26
+ end
27
+
28
+ def to_file(to_file)
29
+ @to_file = to_file
30
+ end
31
+
32
+ def to_path(to_path)
33
+ @to_path = to_path
34
+ end
35
+
36
+ def to_encoding(to_enc)
37
+ @to_encoding = to_enc
38
+ end
39
+
40
+
41
+ def run!
42
+ #TODO: Add some validation against the attrs
43
+ process!
44
+ end
45
+
46
+ #DSL API
47
+ def self.convert(&block)
48
+ encoda = Encoda.new
49
+ encoda.instance_eval(&block)
50
+ encoda.run!
51
+ encoda
52
+ end
53
+
54
+ #Conventional API
55
+ def convert(from, to, from_encoding, to_encoding)
56
+ #TODO
57
+ end
58
+
59
+ private
60
+
61
+ def process!
62
+ puts "convert begins!"
63
+ check_file_or_dir @to_path
64
+ if convert_from_path?
65
+ create_directories
66
+ unfinished_files.each do |filename|
67
+ @failed << filename unless convert_file( "#{@from_path}/#{filename}", "#{@to_path}/#{filename}" )
68
+ end
69
+ else
70
+ check_file_or_dir @from_file
71
+ from_file_basename = File.basename(@from_file)
72
+ @to_file = "converted_#{from_file_basename}" if @to_file.strip.empty?
73
+ @failed << @from_file unless convert_file( @from_file, "#{@to_path}/#{File.basename(@to_file)}" )
74
+ end
75
+ @success = unfinished_files - @failed
76
+ puts "convert finished! #{@success.size} files have been converted, #{@failed.size} failed."
77
+ end
78
+
79
+ def convert_from_path?
80
+ !@from_path.empty? && !@from_path.nil?
81
+ end
82
+
83
+ def create_directories
84
+ check_file_or_dir @from_path
85
+ Dir.mkdir @to_path unless File.exist? @to_path
86
+ end
87
+
88
+ def check_file_or_dir(file_or_dir)
89
+ raise "File or directory: #{file_or_dir} cannot be found!" if ( file_or_dir.nil? || !File.exist?(file_or_dir) )
90
+ end
91
+
92
+ def unfinished_files
93
+ @unfinished_files ||= unfinished_files!
94
+ end
95
+
96
+ def unfinished_files!
97
+ orin_files = Dir.glob(File.join(@from_path, "*.srt"))
98
+ conv_files = Dir.glob(File.join(@to_path, "*.srt"))
99
+ orin_files.map! { |f| File.basename(f) }
100
+ conv_files.map! { |f| File.basename(f) }
101
+ orin_files - conv_files
102
+ end
103
+
104
+ def convert_file(from_file, to_file)
105
+ File.open(from_file,'r') do |f|
106
+ if @from_encoding.nil?
107
+ lines = f.read
108
+ result = UniversalDetector::chardet(lines[0..1000])
109
+ @from_encoding = result["encoding"] if result["confidence"] > 0.9
110
+ puts "Guessing Result: #{result.inspect} of file #{@from_file}"
111
+ end
112
+ raise "Not specifying from_encoding" if @from_encoding.strip.empty?
113
+
114
+ if @from_encoding.upcase.sub("-","") == @to_encoding.upcase.sub("-","")
115
+ File.copy(from_file, "#{@to_path}/#{File.basename(from_file)}")
116
+ return true
117
+ end
118
+
119
+ begin
120
+ File.open(to_file, 'w') do |cf|
121
+ begin
122
+ cf.write Iconv.conv(@to_encoding, @from_encoding, lines)
123
+ rescue Exception => e
124
+ cf.write Iconv.conv(@to_encoding, "BIG5", lines)
125
+ end
126
+ end
127
+ rescue Exception => e
128
+ puts "delete failed file"
129
+ File.delete(to_file)
130
+ puts "#{from_file} converted failed. Msg:#{e.message}."
131
+ return false
132
+ ensure
133
+ @from_encoding = nil
134
+ end
135
+
136
+ end
137
+ true
138
+ end
139
+
140
+
141
+ end
@@ -0,0 +1,80 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+ class TempFile
3
+ #These variable can be set in the before block, however, it won't work with DSL APIs
4
+ TO_PATH = File.expand_path(File.dirname(__FILE__) + '/test_files/conv_files')
5
+ FROM_PATH = File.expand_path(File.dirname(__FILE__) + '/test_files/files')
6
+ FROM_FILE = File.expand_path(File.dirname(__FILE__) + '/test_files/files/nedivx-tbl.gb.srt')
7
+ UTF8_FILE = File.expand_path(File.dirname(__FILE__) + '/test_files/files/utf8.gb.srt')
8
+ end
9
+ describe "Encoda" do
10
+
11
+ describe "Providing incorrect info" do
12
+ it "should raise error when providing from_path cannot be found" do
13
+ lambda {
14
+ Encoda.convert do
15
+ to_path TempFile::TO_PATH
16
+ from_file 'xxx_1.srt'
17
+ end
18
+ }.should raise_error(/xxx_1.srt cannot be found!/)
19
+ end
20
+
21
+ it "should raise error when providing from_path cannot be found" do
22
+ lambda {
23
+ Encoda.convert do
24
+ to_path TempFile::TO_PATH
25
+ from_path "xxx"
26
+ end
27
+ }.should raise_error(/xxx cannot be found!/)
28
+ end
29
+
30
+ it "should raise error when providing from_path cannot be found" do
31
+ lambda {
32
+ Encoda.convert do
33
+ to_path "to_path"
34
+ end
35
+ }.should raise_error(/to_path cannot be found!/)
36
+ end
37
+ end
38
+
39
+ describe "Providing correct info" do
40
+
41
+ def conv_files
42
+ Dir.glob(File.join(TempFile::TO_PATH, "*.srt"))
43
+ end
44
+
45
+ before(:each) do
46
+ conv_files.each do |f|
47
+ File.delete(f)
48
+ end
49
+ end
50
+
51
+ describe "Convert single file" do
52
+ it "should convert 1 file to default converted folder" do
53
+ Encoda.convert do
54
+ from_file TempFile::FROM_FILE
55
+ to_path TempFile::TO_PATH
56
+ end
57
+ conv_files.should have(1).item
58
+ end
59
+ it "should bypass the convert and copy the file directly if from_encoding is the same as to_encoding" do
60
+ Encoda.convert do
61
+ from_file TempFile::UTF8_FILE
62
+ to_path TempFile::TO_PATH
63
+ end
64
+ conv_files.should have(1).item
65
+ File.basename(conv_files[0]).should == File.basename(TempFile::UTF8_FILE)
66
+ end
67
+ end
68
+ describe "Convert files under a specific folder" do
69
+ it "should batch convert files when specifying from_path" do
70
+ encoda = Encoda.convert do
71
+ from_path TempFile::FROM_PATH
72
+ to_path TempFile::TO_PATH
73
+ end
74
+ puts encoda.inspect
75
+ conv_files.should have(encoda.failed.size + encoda.success.size).items
76
+ end
77
+ end
78
+ end
79
+
80
+ end