shoji 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.rvmrc ADDED
@@ -0,0 +1 @@
1
+ rvm use 1.9.3-p125@shoji --create
data/Gemfile ADDED
@@ -0,0 +1,17 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+
6
+ # Add dependencies to develop your gem here.
7
+ # Include everything needed to run rake, tests, features, etc.
8
+ group :development do
9
+ gem "shoulda", ">= 0"
10
+ gem "rdoc", "~> 3.12"
11
+ gem "bundler", "~> 1.0.0"
12
+ gem "jeweler", "~> 1.8.3"
13
+ end
14
+
15
+ gem 'zipruby'
16
+ gem 'spreadsheet'
17
+ gem 'nokogiri'
data/Gemfile.lock ADDED
@@ -0,0 +1,35 @@
1
+ GEM
2
+ remote: http://rubygems.org/
3
+ specs:
4
+ git (1.2.5)
5
+ jeweler (1.8.3)
6
+ bundler (~> 1.0)
7
+ git (>= 1.2.5)
8
+ rake
9
+ rdoc
10
+ json (1.6.5)
11
+ nokogiri (1.5.2)
12
+ rake (0.9.2.2)
13
+ rdoc (3.12)
14
+ json (~> 1.4)
15
+ ruby-ole (1.2.11.3)
16
+ shoulda (3.0.1)
17
+ shoulda-context (~> 1.0.0)
18
+ shoulda-matchers (~> 1.0.0)
19
+ shoulda-context (1.0.0)
20
+ shoulda-matchers (1.0.0)
21
+ spreadsheet (0.6.8)
22
+ ruby-ole (>= 1.0)
23
+ zipruby (0.3.6)
24
+
25
+ PLATFORMS
26
+ ruby
27
+
28
+ DEPENDENCIES
29
+ bundler (~> 1.0.0)
30
+ jeweler (~> 1.8.3)
31
+ nokogiri
32
+ rdoc (~> 3.12)
33
+ shoulda
34
+ spreadsheet
35
+ zipruby
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2012 CHIKURA Shinsaku
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21
+
data/README.rdoc ADDED
@@ -0,0 +1,19 @@
1
+ = shoji
2
+
3
+ Description goes here.
4
+
5
+ == Contributing to shoji
6
+
7
+ * Check out the latest master to make sure the feature hasn't been implemented or the bug hasn't been fixed yet.
8
+ * Check out the issue tracker to make sure someone already hasn't requested it and/or contributed it.
9
+ * Fork the project.
10
+ * Start a feature/bugfix branch.
11
+ * Commit and push until you are happy with your contribution.
12
+ * Make sure to add tests for it. This is important so I don't break it in a future version unintentionally.
13
+ * Please try not to mess with the Rakefile, version, or history. If you want to have your own version, or is otherwise necessary, that is fine, but please isolate to its own commit so I can cherry-pick around it.
14
+
15
+ == Copyright
16
+
17
+ Copyright (c) 2012 CHIKURA Shinsaku. See LICENSE.txt for
18
+ further details.
19
+
data/Rakefile ADDED
@@ -0,0 +1,45 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://docs.rubygems.org/read/chapter/20 for more options
17
+ gem.name = "shoji"
18
+ gem.homepage = "http://github.com/chsh/shoji"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{Shoji is a package of reading various formats of table structured data file.}
21
+ gem.description = %Q{Shoji is a package of reading various formats of table structured data file.}
22
+ gem.email = "shinsaku@chikura.me"
23
+ gem.authors = ["CHIKURA Shinsaku"]
24
+ # dependencies defined in Gemfile
25
+ end
26
+ Jeweler::RubygemsDotOrgTasks.new
27
+
28
+ require 'rake/testtask'
29
+ Rake::TestTask.new(:test) do |test|
30
+ test.libs << 'lib' << 'test'
31
+ test.pattern = 'test/**/test_*.rb'
32
+ test.verbose = true
33
+ end
34
+
35
+ task :default => :test
36
+
37
+ require 'rdoc/task'
38
+ Rake::RDocTask.new do |rdoc|
39
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
40
+
41
+ rdoc.rdoc_dir = 'rdoc'
42
+ rdoc.title = "shoji #{version}"
43
+ rdoc.rdoc_files.include('README*')
44
+ rdoc.rdoc_files.include('lib/**/*.rb')
45
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.9
data/lib/shoji.rb ADDED
@@ -0,0 +1,4 @@
1
+
2
+ require 'shoji_main'
3
+
4
+
data/lib/shoji/base.rb ADDED
@@ -0,0 +1,33 @@
1
+
2
+
3
+ class Shoji::Base
4
+ include Enumerable
5
+ def self.foreach(filename, opts = {}, &block); raise NoMethodError.new; end
6
+ def self.valid_file?(filename, opts = {}); raise NoMethodError.new; end
7
+ def self.rows(filename, opts = {}); raise NoMethodError.new; end
8
+ def self.row_size(filename, opts = {}); raise NoMethodError.new; end
9
+ def self.valid_content?(content, opts = {})
10
+ tf = Tempfile.new("shoji-base.#{$$}.data")
11
+ tf.write content
12
+ tf.close
13
+ status = self.valid_file?(tf.path)
14
+ tf.close(true)
15
+ status
16
+ end
17
+
18
+ def each(&block)
19
+ self.class.foreach(@filename, {}, &block)
20
+ end
21
+ def self.foreach_hash(filename, opts = {}, &block)
22
+ header = nil
23
+ self.foreach(filename, opts) do |row|
24
+ if header
25
+ hash = Hash[*[header, row].transpose.flatten]
26
+ block.call(hash)
27
+ else
28
+ header = row.map(&:to_sym)
29
+ end
30
+ end
31
+ end
32
+
33
+ end
data/lib/shoji/csv.rb ADDED
@@ -0,0 +1,12 @@
1
+
2
+ require 'shoji/text_base'
3
+
4
+ class Shoji::CSV < Shoji::TextBase
5
+ def self.valid_file?(filename)
6
+ has_char? filename, ','
7
+ end
8
+ protected
9
+ def self.fastercsv_opts
10
+ {}
11
+ end
12
+ end
@@ -0,0 +1,76 @@
1
+ # Shoji
2
+
3
+ require 'spreadsheet'
4
+
5
+ require 'shoji/base'
6
+ class Shoji::Excel < Shoji::Base
7
+
8
+ require 'shoji/excel/reader'
9
+
10
+ READER = Shoji::Excel::Reader
11
+
12
+ def self.foreach(filename, opts = {}, &block)
13
+ raise 'Block must be exist.' unless block_given?
14
+ READER.new(filename).foreach(opts, &block)
15
+ end
16
+ def self.valid_file?(filename)
17
+ READER.valid_file? filename
18
+ end
19
+
20
+ def self.rows(filename, opts = {})
21
+ READER.new(filename).rows(opts)
22
+ end
23
+
24
+ def self.row_size(filename, opts = {})
25
+ READER.new(filename).row_size(opts)
26
+ end
27
+
28
+ def self.convert_to_hash(filename, opts = {})
29
+ opts_for_parse = opts.slice(:sheet_index)
30
+ opts_for_convert = opts.slice(:header)
31
+ rows = self.rows(filename, opts)
32
+ return {} if rows.size < 2
33
+ list = []
34
+ header = rows.shift
35
+ header = opts_for_convert[:header] if opts_for_convert[:header]
36
+ rows.each do |row|
37
+ list << make_hash(header, row)
38
+ end
39
+ list
40
+ end
41
+
42
+ def initialize(filename)
43
+ @filename = filename
44
+ end
45
+
46
+ def foreach(opts = {}, &block)
47
+ self.class.foreach(@filename, opts, &block)
48
+ end
49
+ def valid_file?
50
+ self.class.valid_file? @filename
51
+ end
52
+ def rows(opts = {})
53
+ self.class.rows(@filename, opts)
54
+ end
55
+ def row_size(opts = {})
56
+ self.class.row_size(@filename, opts)
57
+ end
58
+ def convert_to_hash(opts = {})
59
+ self.class.convert_to_hash(@filename, opts)
60
+ end
61
+
62
+ private
63
+ def self.process_rows(sheet, &block)
64
+ sheet.each do |row|
65
+ cells = cells_from_row(row)
66
+ block.call(cells)
67
+ end
68
+ end
69
+ def self.make_hash(header_columns, row_columns)
70
+ h = {}
71
+ header_columns.size.times do |i|
72
+ h[header_columns[i]] = row_columns[i]
73
+ end
74
+ h
75
+ end
76
+ end
@@ -0,0 +1,70 @@
1
+
2
+ require 'tempfile'
3
+ require 'spreadsheet'
4
+
5
+ class Shoji::Excel::Reader
6
+
7
+ def initialize(filename_or_content)
8
+ @filename_or_content = filename_or_content
9
+ end
10
+
11
+ def self.valid_file?(filename_or_content)
12
+ valid = true
13
+ begin
14
+ Spreadsheet.open(filename_or_content) do |workbook|
15
+ end
16
+ rescue
17
+ valid = false
18
+ end
19
+ valid
20
+ end
21
+
22
+ def rows(opts = {})
23
+ r = []
24
+ foreach(opts) do |row|
25
+ r << row
26
+ end
27
+ r
28
+ end
29
+
30
+ def row_size(opts = {})
31
+ return @row_size if @row_size
32
+ sheet_index = opts[:sheet_index] || 0
33
+ idx = 0
34
+ Spreadsheet.open(@filename_or_content) do |workbook|
35
+ num_sheets = workbook.worksheets.size
36
+ return [] if num_sheets == 0 || num_sheets <= sheet_index
37
+ worksheet = workbook.worksheet(sheet_index)
38
+ @row_size = worksheet.row_count
39
+ @row_size -= 1 if opts[:use_header]
40
+ end
41
+ @row_size
42
+ end
43
+
44
+ def foreach(opts = {}, &block)
45
+ sheet_index = opts[:sheet_index] || 0
46
+ Spreadsheet.open(@filename_or_content) do |workbook|
47
+ num_sheets = workbook.worksheets.size
48
+ return [] if num_sheets == 0 || num_sheets <= sheet_index
49
+ worksheet = workbook.worksheet(sheet_index)
50
+ process_rows(worksheet, opts, &block)
51
+ end
52
+ end
53
+
54
+ private
55
+ def process_rows(worksheet, opts = {}, &block)
56
+ max = opts[:limit]
57
+ idx = 0
58
+ idx -= 1 if opts[:use_header]
59
+ worksheet.each do |row|
60
+ cells = []
61
+ row.each do |c|
62
+ cells << c
63
+ end
64
+ block.call(cells)
65
+ idx += 1
66
+ break if max && max <= idx
67
+ end
68
+ end
69
+
70
+ end
data/lib/shoji/ods.rb ADDED
@@ -0,0 +1,75 @@
1
+
2
+ require 'shoji/base'
3
+
4
+ class Shoji::ODS < Shoji::Base
5
+
6
+ require 'shoji/ods/reader'
7
+ READER = Shoji::ODS::Reader
8
+
9
+ def self.foreach(filename, opts = {}, &block)
10
+ raise 'Block must be exist.' unless block_given?
11
+ READER.open(filename, opts[:sheet]) do |row|
12
+ block.call(row)
13
+ end
14
+ end
15
+ def self.valid_file?(filename)
16
+ READER.valid_file? filename
17
+ end
18
+
19
+ def self.rows(filename, opts = {})
20
+ READER.new(filename).rows(opts)
21
+ end
22
+
23
+ def self.row_size(filename, opts = {})
24
+ READER.new(filename).row_size(opts)
25
+ end
26
+
27
+ def self.convert_to_hash(filename, opts = {})
28
+ opts_for_parse = opts.slice(:sheet_index)
29
+ opts_for_convert = opts.slice(:header)
30
+ rows = self.rows(filename, opts)
31
+ return {} if rows.size < 2
32
+ list = []
33
+ header = rows.shift
34
+ header = opts_for_convert[:header] if opts_for_convert[:header]
35
+ rows.each do |row|
36
+ list << make_hash(header, row)
37
+ end
38
+ list
39
+ end
40
+
41
+ def initialize(filename)
42
+ @filename = filename
43
+ end
44
+
45
+ def foreach(opts = {}, &block)
46
+ self.class.foreach(@filename, opts, &block)
47
+ end
48
+ def valid_file?
49
+ self.class.valid_file? @filename
50
+ end
51
+ def rows(opts = {})
52
+ self.class.rows(@filename, opts)
53
+ end
54
+ def row_size(opts = {})
55
+ self.class.row_size(@filename, opts)
56
+ end
57
+ def convert_to_hash(opts = {})
58
+ self.class.convert_to_hash(@filename, opts)
59
+ end
60
+
61
+ private
62
+ def self.process_rows(sheet, &block)
63
+ sheet.each do |row|
64
+ cells = cells_from_row(row)
65
+ block.call(cells)
66
+ end
67
+ end
68
+ def self.make_hash(header_columns, row_columns)
69
+ h = {}
70
+ header_columns.size.times do |i|
71
+ h[header_columns[i]] = row_columns[i]
72
+ end
73
+ h
74
+ end
75
+ end
@@ -0,0 +1,119 @@
1
+ require 'zipruby'
2
+ require 'nokogiri'
3
+
4
+ class Shoji::ODS::Reader
5
+
6
+ attr_accessor :skip_empty_row
7
+
8
+ def self.open(filename, sheet_name = nil, &block)
9
+ reader = new(filename)
10
+ reader.skip_empty_row = false
11
+ reader.process_book(sheet_name, &block)
12
+ end
13
+
14
+ def initialize(filename)
15
+ @filename = filename
16
+ end
17
+
18
+ def process_book(sheet_name = nil, &block)
19
+ docbytes = read_from_zip_content_xml(@filename)
20
+
21
+ doc = Nokogiri::XML(docbytes)
22
+ path = "//table:table"
23
+ path += "[@table:name='#{sheet_name}']" if sheet_name
24
+ ws = doc.at_xpath path
25
+ process_sheet(ws, &block)
26
+ end
27
+
28
+ def valid_file?
29
+ valid = true
30
+ begin
31
+ read_from_zip_content_xml(@filename, true)
32
+ rescue
33
+ valid = false
34
+ end
35
+ valid
36
+ end
37
+ def rows(opts = {})
38
+ result = []
39
+ process_book(opts[:sheet]) do |row|
40
+ result << row
41
+ end
42
+ result
43
+ end
44
+ def self.valid_file?(filename)
45
+ new(filename).valid_file?
46
+ end
47
+
48
+ def process_sheet(sheet, &block)
49
+ sheet.xpath('table:table-row').each do |row|
50
+ rowreps = row['table:number-rows-repeated'] || '1'
51
+ rowreps = rowreps.to_i
52
+ process_row(rowreps, row, &block)
53
+ end
54
+ end
55
+
56
+ def process_row(rowreps, row, &block)
57
+ cols = []
58
+ index = 0
59
+ has_value = false
60
+ row.xpath('table:table-cell').each do |cell|
61
+ tv = typed_value cell
62
+ if tv && tv != ''
63
+ cols[index] = tv
64
+ has_value = true
65
+ else
66
+ cols[index] = ''
67
+ end
68
+ colreps = cell['number-columns-repeated']
69
+ if colreps
70
+ colreps.to_i.times do |num|
71
+ cols[index + num] = cols[index]
72
+ end
73
+ index = index + colreps.to_i
74
+ else
75
+ index = index + 1
76
+ end
77
+ end
78
+ cols = regulate_trailing_blank_cols(cols)
79
+ rowreps.times do |num|
80
+ if has_value
81
+ block.call(cols)
82
+ elsif !skip_empty_row
83
+ block.call(cols)
84
+ end
85
+ end
86
+ end
87
+
88
+ private
89
+ def typed_value(cell)
90
+ case cell['value-type']
91
+ when nil then nil
92
+ when 'date' then Date.parse cell['date-value']
93
+ when 'currency', 'float' then cell['value'].to_f
94
+ else cell.text
95
+ end
96
+ end
97
+
98
+ def read_from_zip_content_xml(filename, verify_only = false)
99
+ raise "File:#{filename} doesn't exist." unless File.exist? filename
100
+ docbytes = nil
101
+ Zip::Archive.open(filename) do |ar|
102
+ raise "content.xml doesn't exist in #{filename}" unless ar.get_stat 'content.xml' # raise unless exist.
103
+ unless verify_only
104
+ f = ar.fopen('content.xml')
105
+ docbytes = f.read
106
+ f.close
107
+ end
108
+ end
109
+ docbytes
110
+ end
111
+
112
+ def regulate_trailing_blank_cols(cols)
113
+ while cols.size > 0 && (cols[cols.size-1] == nil || cols[cols.size-1] == '')
114
+ cols.pop
115
+ end
116
+ cols
117
+ end
118
+
119
+ end
@@ -0,0 +1,58 @@
1
+ require 'csv'
2
+ require 'shoji/base'
3
+ require 'shoji/utf8_file'
4
+
5
+ class Shoji::TextBase < Shoji::Base
6
+ ENCMAP = Hash.new('n').merge({
7
+ 'UTF-8' => 'u',
8
+ 'SHIFT-JIS' => 's',
9
+ 'EUC-JP' => 'e',
10
+ 'CP932' => 's'
11
+ })
12
+ def self.foreach(filename, opts = {}, &block)
13
+ Shoji::UTF8File.convert filename do |path|
14
+ limit = opts[:limit].to_i
15
+ index = 0
16
+ CSV.foreach(path, fastercsv_opts) do |row|
17
+ block.call(row)
18
+ index += 1
19
+ break if (limit > 0 && limit <= index)
20
+ end
21
+ end
22
+ end
23
+ def self.valid_file?(filename, opts = {})
24
+ Shoji::UTF8File.convert filename do |path|
25
+ end
26
+ end
27
+ def self.rows(filename, opts = {})
28
+ rows = []
29
+ self.foreach(filename, opts) do |row|
30
+ rows << row
31
+ end
32
+ rows
33
+ end
34
+ def self.row_size(filename, opts = {})
35
+ enc = ENCMAP[Shoji::UTF8File.guess_encoding(filename)]
36
+ index = 0
37
+ CSV.foreach(filename, fastercsv_opts.merge({:encoding => enc})) do |row|
38
+ index += 1
39
+ end
40
+ index
41
+ end
42
+ protected
43
+ def self.fastercsv_opts; raise NoMethodError.new; end
44
+ def self.first_line(filename)
45
+ line = nil
46
+ Shoji::UTF8File.convert filename do |path|
47
+ File.foreach(path) do |l|
48
+ line = l
49
+ break
50
+ end
51
+ end
52
+ line
53
+ end
54
+ def self.has_char?(filename, char)
55
+ first_line(filename).include? char
56
+ end
57
+
58
+ end
data/lib/shoji/tsv.rb ADDED
@@ -0,0 +1,12 @@
1
+
2
+ require 'shoji/text_base'
3
+
4
+ class Shoji::TSV < Shoji::TextBase
5
+ def self.valid_file?(filename)
6
+ has_char? filename, "\t"
7
+ end
8
+ protected
9
+ def self.fastercsv_opts
10
+ { :col_sep => "\t" }
11
+ end
12
+ end
@@ -0,0 +1,77 @@
1
+ require 'tempfile'
2
+ require 'nkf'
3
+
4
+ class Shoji::UTF8File
5
+ def self.convert(filename, &block)
6
+ encoding = guess_encoding(filename)
7
+ raise "Couldn't detect encoding" unless encoding
8
+ fp = make_instance(filename, encoding)
9
+ if block_given?
10
+ begin
11
+ yield(fp.path)
12
+ ensure
13
+ fp.delete
14
+ end
15
+ else
16
+ fp
17
+ end
18
+ end
19
+ attr_reader :tempfile
20
+ def initialize(source, type)
21
+ @filename = nil; @tempfile = nil
22
+ case type
23
+ when :filename then @filename = source
24
+ when :tempfile then @tempfile = source
25
+ else raise "Unexpected type=#{type}"
26
+ end
27
+ end
28
+ def path
29
+ return @filename if @filename
30
+ @tempfile.path
31
+ end
32
+ def delete
33
+ return false unless @tempfile
34
+ @tempfile.close(true)
35
+ @tempfile = nil
36
+ true
37
+ end
38
+ def self.guess_encoding(filename)
39
+ NKF2ICONV[NKF.guess(read_lines(filename, 3))]
40
+ end
41
+ private
42
+ NKF2ICONV = {
43
+ NKF::UTF8 => 'UTF-8',
44
+ NKF::SJIS => 'SJIS',
45
+ NKF::EUC => 'EUC-JP',
46
+ NKF::JIS => 'ISO-2022-JP',
47
+ NKF::ASCII => 'UTF-8'
48
+ }
49
+ def self.winfile?(filename)
50
+ line = read_lines(filename, 1)
51
+ if line =~ /\r\n$/
52
+ true
53
+ else
54
+ false
55
+ end
56
+ end
57
+ def self.make_instance(filename, encoding)
58
+ return new(filename, :filename) if encoding == 'UTF-8'
59
+ if winfile?(filename) && encoding == 'SJIS'
60
+ encoding = 'CP932'
61
+ end
62
+ tf = Tempfile.new('file-path')
63
+ tf.write File.open(filename, "r:#{encoding}:UTF-8").read
64
+ tf.close
65
+ new(tf, :tempfile)
66
+ end
67
+ def self.read_lines(filename, max = 1)
68
+ lines = []
69
+ index = 0
70
+ File.foreach(filename, encoding: 'BINARY') do |line|
71
+ lines << line
72
+ index += 1
73
+ break if index >= max
74
+ end
75
+ lines.join('')
76
+ end
77
+ end
data/lib/shoji_main.rb ADDED
@@ -0,0 +1,105 @@
1
+
2
+ # meta class of excel, ods, csv and tsv processor.
3
+
4
+
5
+ class Shoji
6
+
7
+ autoload :Excel, 'shoji/excel'
8
+ autoload :CSV, 'shoji/csv'
9
+ autoload :TSV, 'shoji/tsv'
10
+ autoload :ODS, 'shoji/ods'
11
+
12
+ class_eval do
13
+ [:foreach, :foreach_hash, :valid_file?, :valid_content?, :rows, :row_size].each do |meth|
14
+ eval <<EOL
15
+ def self.#{meth}(*args, &block)
16
+ klass = class_from_params(*args, &block)
17
+ klass.send(:#{meth}, *args, &block)
18
+ end
19
+ EOL
20
+ end
21
+ end
22
+
23
+ def initialize(filename, opts = {}, &block)
24
+ @filename = filename
25
+ @opts = opts
26
+ if block_given?
27
+ yield(self)
28
+ end
29
+ end
30
+ def foreach(opts = {}, &block)
31
+ self.clcass.foreach(@filename, opts, &block)
32
+ end
33
+ def valid_file?(opts = {})
34
+ self.class.valid_file?(@filename, opts)
35
+ end
36
+ def valid_content?(opts = {})
37
+ self.class.valid_content?(@filename, opts)
38
+ end
39
+
40
+ def row_size(opts = {})
41
+ self.class.row_size(@filename, opts)
42
+ end
43
+ def rows(opts = {})
44
+ self.class.rows(@filename, opts)
45
+ end
46
+
47
+ private
48
+ def self.detect_class_from_filename_or_content(filename)
49
+ klass = detect_class_from_filename(filename)
50
+ return klass if klass
51
+ detect_class_from_content(filename)
52
+ end
53
+ def self.detect_class_from_filename(filename)
54
+ @@ext2class ||= build_ext2class
55
+ @@ext2class[File.extname(filename).upcase]
56
+ end
57
+ def self.build_ext2class
58
+ {
59
+ '.XLS' => Shoji::Excel,
60
+ '.CSV' => Shoji::CSV,
61
+ '.TSV' => Shoji::TSV,
62
+ '.ODS' => Shoji::ODS
63
+ }
64
+ end
65
+ def self.detect_class_from_content(filename)
66
+ if binary_file? filename
67
+ # Try to check valid xls.
68
+ return Shoji::Excel if Shoji::Excel.valid_file? filename
69
+ else
70
+ line = first_line(filename)
71
+ case line
72
+ when /\t/ then Shoji::TSV
73
+ when /,/ then Shoji::CSV
74
+ else
75
+ nil
76
+ end
77
+ end
78
+ end
79
+ def self.class_from_params(*args)
80
+ filename = args[0]
81
+ opts = args[1] || {}
82
+ case opts[:type]
83
+ when nil, :auto then detect_class_from_filename_or_content(filename)
84
+ when :excel, :xls then Shoji::Excel
85
+ when :csv then Shoji::CSV
86
+ when :tsv, :tabtext, :tab_text, :tab then Shoji::TSV
87
+ else "Unexpected type value=#{opts[:type]}"
88
+ end
89
+ end
90
+ def self.binary_file?(filename)
91
+ buf = nil
92
+ File.open(filename, 'rb') do |f|
93
+ buf = f.read(256)
94
+ end
95
+ buf.index("\0") ? true : false
96
+ end
97
+ def self.first_line(filename)
98
+ line = nil
99
+ File.foreach(filename, encoding: 'BINARY') do |l|
100
+ line = l
101
+ break
102
+ end
103
+ line
104
+ end
105
+ end
data/shoji.gemspec ADDED
@@ -0,0 +1,87 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = "shoji"
8
+ s.version = "0.0.9"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["CHIKURA Shinsaku"]
12
+ s.date = "2012-03-19"
13
+ s.description = "Shoji is a package of reading various formats of table structured data file."
14
+ s.email = "shinsaku@chikura.me"
15
+ s.extra_rdoc_files = [
16
+ "LICENSE.txt",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".rvmrc",
22
+ "Gemfile",
23
+ "Gemfile.lock",
24
+ "LICENSE.txt",
25
+ "README.rdoc",
26
+ "Rakefile",
27
+ "VERSION",
28
+ "lib/shoji.rb",
29
+ "lib/shoji/base.rb",
30
+ "lib/shoji/csv.rb",
31
+ "lib/shoji/excel.rb",
32
+ "lib/shoji/excel/reader.rb",
33
+ "lib/shoji/ods.rb",
34
+ "lib/shoji/ods/reader.rb",
35
+ "lib/shoji/text_base.rb",
36
+ "lib/shoji/tsv.rb",
37
+ "lib/shoji/utf8_file.rb",
38
+ "lib/shoji_main.rb",
39
+ "shoji.gemspec",
40
+ "test/files/test-reps.ods",
41
+ "test/files/test01.csv",
42
+ "test/files/test01.ods",
43
+ "test/files/test01.tsv",
44
+ "test/files/test01.xls",
45
+ "test/files/testcsv.data",
46
+ "test/files/testtsv.data",
47
+ "test/files/testxls.data",
48
+ "test/helper.rb",
49
+ "test/test_shoji.rb"
50
+ ]
51
+ s.homepage = "http://github.com/chsh/shoji"
52
+ s.licenses = ["MIT"]
53
+ s.require_paths = ["lib"]
54
+ s.rubygems_version = "1.8.10"
55
+ s.summary = "Shoji is a package of reading various formats of table structured data file."
56
+
57
+ if s.respond_to? :specification_version then
58
+ s.specification_version = 3
59
+
60
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
61
+ s.add_runtime_dependency(%q<zipruby>, [">= 0"])
62
+ s.add_runtime_dependency(%q<spreadsheet>, [">= 0"])
63
+ s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
64
+ s.add_development_dependency(%q<shoulda>, [">= 0"])
65
+ s.add_development_dependency(%q<rdoc>, ["~> 3.12"])
66
+ s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
67
+ s.add_development_dependency(%q<jeweler>, ["~> 1.8.3"])
68
+ else
69
+ s.add_dependency(%q<zipruby>, [">= 0"])
70
+ s.add_dependency(%q<spreadsheet>, [">= 0"])
71
+ s.add_dependency(%q<nokogiri>, [">= 0"])
72
+ s.add_dependency(%q<shoulda>, [">= 0"])
73
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
74
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
75
+ s.add_dependency(%q<jeweler>, ["~> 1.8.3"])
76
+ end
77
+ else
78
+ s.add_dependency(%q<zipruby>, [">= 0"])
79
+ s.add_dependency(%q<spreadsheet>, [">= 0"])
80
+ s.add_dependency(%q<nokogiri>, [">= 0"])
81
+ s.add_dependency(%q<shoulda>, [">= 0"])
82
+ s.add_dependency(%q<rdoc>, ["~> 3.12"])
83
+ s.add_dependency(%q<bundler>, ["~> 1.0.0"])
84
+ s.add_dependency(%q<jeweler>, ["~> 1.8.3"])
85
+ end
86
+ end
87
+
Binary file
@@ -0,0 +1,2 @@
1
+ "�����","abc","�قւ�"
2
+ 123,"������",8��20��
Binary file
@@ -0,0 +1 @@
1
+ ������ abc �ۤؤ�
Binary file
@@ -0,0 +1,2 @@
1
+ "�����","abc","�قւ�"
2
+ 123,"������",8��20��
@@ -0,0 +1 @@
1
+ ������ abc �ۤؤ�
Binary file
data/test/helper.rb ADDED
@@ -0,0 +1,18 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+ require 'shoulda'
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'shoji'
16
+
17
+ class Test::Unit::TestCase
18
+ end
@@ -0,0 +1,109 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'helper'
3
+
4
+ FILEPATH = File.dirname(__FILE__) + "/files"
5
+
6
+ class TestShoji < Test::Unit::TestCase
7
+
8
+ should "excel: load all rows" do
9
+ reader = Shoji::Excel::Reader.new("#{FILEPATH}/test01.xls")
10
+ rows = reader.rows
11
+ assert_equal [Date.parse('2009/2/1'), Date.parse('1998/2/1'),
12
+ Date.parse('2008/3/1'), DateTime.parse('1899/12/30 14:30')], rows[0]
13
+ assert_equal ["アルファ", "alpha", 300, 123.456], rows[1]
14
+ end
15
+
16
+ should "excel: process foreach row" do
17
+ reader = Shoji::Excel::Reader.new("#{FILEPATH}/test01.xls")
18
+ first = true
19
+ reader.foreach do |cells|
20
+ if first
21
+ assert_equal [Date.parse('2009/2/1'), Date.parse('1998/2/1'),
22
+ Date.parse('2008/3/1'), DateTime.parse('1899/12/30 14:30')], cells
23
+ first = false
24
+ next
25
+ end
26
+ assert_equal ["アルファ", "alpha", 300, 123.456], cells
27
+ end
28
+ end
29
+
30
+ should "openoffice calc: load all rows" do
31
+ reader = Shoji::ODS::Reader.new("#{FILEPATH}/test01.ods")
32
+ rows = reader.rows
33
+ assert_equal [123, "abc", Date.parse('2009/12/13'), 'あいう'], rows[0]
34
+ assert_equal ["13:40", 35.22, '#ab', 'çons'], rows[1]
35
+ end
36
+
37
+ should "openoffice calc: can handle repeated cells correctly." do
38
+ reader = Shoji::ODS::Reader.new("#{FILEPATH}/test-reps.ods")
39
+ rows = reader.rows
40
+ assert_equal [Date.parse('2009/12/6'), '', '', 3, 5, 1], rows[1]
41
+ assert_equal [98, 5, 5, 5, 1, 1, 2, 3], rows[2]
42
+ end
43
+
44
+ should "csv: load all rows" do
45
+ rows = Shoji::CSV.rows("#{FILEPATH}/test01.csv")
46
+ assert_equal 2, rows.size
47
+ rows = Shoji::CSV.rows("#{FILEPATH}/test01.csv", :limit => 1)
48
+ assert_equal 1, rows.size
49
+ assert_equal ['いろは', 'abc', 'ほへと'], rows[0]
50
+ end
51
+
52
+ should "csv: process foreach row" do
53
+ first = true
54
+ Shoji::TSV.foreach "#{FILEPATH}/test01.tsv" do |cells|
55
+ if first
56
+ assert_equal ['いろは', 'abc', 'ほへと'], cells
57
+ first = false
58
+ next
59
+ end
60
+ assert_equal ["123", "あいう", "8月20日"], cells
61
+ end
62
+ end
63
+
64
+ should "tsv: load all rows" do
65
+ rows = Shoji::TSV.rows("#{FILEPATH}/test01.tsv")
66
+ assert_equal 2, rows.size
67
+ rows = Shoji::TSV.rows("#{FILEPATH}/test01.tsv", :limit => 1)
68
+ assert_equal 1, rows.size
69
+
70
+ end
71
+
72
+ should "tsv: process foreach row" do
73
+ first = true
74
+ Shoji::TSV.foreach "#{FILEPATH}/test01.tsv" do |cells|
75
+ if first
76
+ assert_equal ['いろは', 'abc', 'ほへと'], cells
77
+ first = false
78
+ next
79
+ end
80
+ assert_equal ["123", "あいう", "8月20日"], cells
81
+ end
82
+ end
83
+
84
+ should "autodetect: load all rows" do
85
+ rows = Shoji.rows("#{FILEPATH}/testxls.data")
86
+ assert_equal 2, rows.size
87
+ assert_equal [Date.parse('2009/2/1'), Date.parse('1998/2/1'),
88
+ Date.parse('2008/3/1'), DateTime.parse('1899/12/30 14:30')], rows[0]
89
+ rows = Shoji.rows("#{FILEPATH}/testcsv.data")
90
+ assert_equal 2, rows.size
91
+ assert_equal ['いろは', 'abc', 'ほへと'], rows[0]
92
+ rows = Shoji.rows("#{FILEPATH}/testtsv.data")
93
+ assert_equal 2, rows.size
94
+ assert_equal ['いろは', 'abc', 'ほへと'], rows[0]
95
+
96
+ end
97
+ should "can handle various types of file." do
98
+ rows = Shoji.rows("#{FILEPATH}/test01.xls")
99
+ assert_equal [Date.parse('2009/2/1'), Date.parse('1998/2/1'),
100
+ Date.parse('2008/3/1'), DateTime.parse('1899/12/30 14:30')], rows[0]
101
+ rows = Shoji.rows("#{FILEPATH}/test01.ods")
102
+ assert_equal [123, "abc", Date.parse('2009/12/13'), 'あいう'], rows[0]
103
+ rows = Shoji.rows("#{FILEPATH}/test01.csv")
104
+ rows = Shoji::CSV.rows("#{FILEPATH}/test01.csv")
105
+ assert_equal ['いろは', 'abc', 'ほへと'], rows[0]
106
+ rows = Shoji.rows("#{FILEPATH}/test01.tsv")
107
+ assert_equal ['いろは', 'abc', 'ほへと'], rows[0]
108
+ end
109
+ end
metadata ADDED
@@ -0,0 +1,158 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: shoji
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.9
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - CHIKURA Shinsaku
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-03-19 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: zipruby
16
+ requirement: &70317314442440 !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: *70317314442440
25
+ - !ruby/object:Gem::Dependency
26
+ name: spreadsheet
27
+ requirement: &70317314441500 !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: '0'
33
+ type: :runtime
34
+ prerelease: false
35
+ version_requirements: *70317314441500
36
+ - !ruby/object:Gem::Dependency
37
+ name: nokogiri
38
+ requirement: &70317314441000 !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ! '>='
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ type: :runtime
45
+ prerelease: false
46
+ version_requirements: *70317314441000
47
+ - !ruby/object:Gem::Dependency
48
+ name: shoulda
49
+ requirement: &70317314440300 !ruby/object:Gem::Requirement
50
+ none: false
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ type: :development
56
+ prerelease: false
57
+ version_requirements: *70317314440300
58
+ - !ruby/object:Gem::Dependency
59
+ name: rdoc
60
+ requirement: &70317314439520 !ruby/object:Gem::Requirement
61
+ none: false
62
+ requirements:
63
+ - - ~>
64
+ - !ruby/object:Gem::Version
65
+ version: '3.12'
66
+ type: :development
67
+ prerelease: false
68
+ version_requirements: *70317314439520
69
+ - !ruby/object:Gem::Dependency
70
+ name: bundler
71
+ requirement: &70317314438740 !ruby/object:Gem::Requirement
72
+ none: false
73
+ requirements:
74
+ - - ~>
75
+ - !ruby/object:Gem::Version
76
+ version: 1.0.0
77
+ type: :development
78
+ prerelease: false
79
+ version_requirements: *70317314438740
80
+ - !ruby/object:Gem::Dependency
81
+ name: jeweler
82
+ requirement: &70317314454140 !ruby/object:Gem::Requirement
83
+ none: false
84
+ requirements:
85
+ - - ~>
86
+ - !ruby/object:Gem::Version
87
+ version: 1.8.3
88
+ type: :development
89
+ prerelease: false
90
+ version_requirements: *70317314454140
91
+ description: Shoji is a package of reading various formats of table structured data
92
+ file.
93
+ email: shinsaku@chikura.me
94
+ executables: []
95
+ extensions: []
96
+ extra_rdoc_files:
97
+ - LICENSE.txt
98
+ - README.rdoc
99
+ files:
100
+ - .document
101
+ - .rvmrc
102
+ - Gemfile
103
+ - Gemfile.lock
104
+ - LICENSE.txt
105
+ - README.rdoc
106
+ - Rakefile
107
+ - VERSION
108
+ - lib/shoji.rb
109
+ - lib/shoji/base.rb
110
+ - lib/shoji/csv.rb
111
+ - lib/shoji/excel.rb
112
+ - lib/shoji/excel/reader.rb
113
+ - lib/shoji/ods.rb
114
+ - lib/shoji/ods/reader.rb
115
+ - lib/shoji/text_base.rb
116
+ - lib/shoji/tsv.rb
117
+ - lib/shoji/utf8_file.rb
118
+ - lib/shoji_main.rb
119
+ - shoji.gemspec
120
+ - test/files/test-reps.ods
121
+ - test/files/test01.csv
122
+ - test/files/test01.ods
123
+ - test/files/test01.tsv
124
+ - test/files/test01.xls
125
+ - test/files/testcsv.data
126
+ - test/files/testtsv.data
127
+ - test/files/testxls.data
128
+ - test/helper.rb
129
+ - test/test_shoji.rb
130
+ homepage: http://github.com/chsh/shoji
131
+ licenses:
132
+ - MIT
133
+ post_install_message:
134
+ rdoc_options: []
135
+ require_paths:
136
+ - lib
137
+ required_ruby_version: !ruby/object:Gem::Requirement
138
+ none: false
139
+ requirements:
140
+ - - ! '>='
141
+ - !ruby/object:Gem::Version
142
+ version: '0'
143
+ segments:
144
+ - 0
145
+ hash: -2476244410315930325
146
+ required_rubygems_version: !ruby/object:Gem::Requirement
147
+ none: false
148
+ requirements:
149
+ - - ! '>='
150
+ - !ruby/object:Gem::Version
151
+ version: '0'
152
+ requirements: []
153
+ rubyforge_project:
154
+ rubygems_version: 1.8.10
155
+ signing_key:
156
+ specification_version: 3
157
+ summary: Shoji is a package of reading various formats of table structured data file.
158
+ test_files: []