excel2csv 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,3 @@
1
1
  module Excel2CSV
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
data/lib/excel2csv.rb CHANGED
@@ -7,6 +7,7 @@ module Excel2CSV
7
7
 
8
8
  class Info
9
9
  attr_accessor :sheets
10
+ attr_accessor :previews
10
11
  attr_accessor :tmp_dir
11
12
  attr_accessor :working_dir
12
13
 
@@ -17,12 +18,21 @@ module Excel2CSV
17
18
  end
18
19
 
19
20
  def read
20
- @sheets = Dir["#{@working_dir}/*.csv"].map do |file|
21
- {path: file}
21
+ Dir["#{@working_dir}/*.csv"].map do |file|
22
+ name = File.basename(file)
23
+ m = /(?<sheet>\d+)-(?<rows>\d+)(-of-(?<total_rows>\d+))?/.match(name)
24
+ next if !m
25
+ total_rows = (m[:total_rows] || m[:rows]).to_i
26
+ preview_rows = m[:rows].to_i
27
+ if name =~ /preview/
28
+ @previews << {path: file, total_rows:total_rows, rows:preview_rows}
29
+ else
30
+ @sheets << {path: file, total_rows:total_rows, rows:total_rows}
31
+ end
22
32
  end
23
33
  end
24
34
 
25
- def close
35
+ def clean
26
36
  FileUtils.remove_entry_secure(@tmp_dir, true) if @tmp_dir
27
37
  end
28
38
 
@@ -32,13 +42,14 @@ module Excel2CSV
32
42
  @working_dir = working_dir
33
43
  @tmp_dir = tmp_dir
34
44
  @sheets = []
45
+ @previews = []
35
46
  end
36
47
 
37
48
  end
38
49
 
39
50
  def foreach(path, options = {}, &block)
40
51
  convert(path, options) do |info|
41
- CSV.foreach(info.sheets.first[:path], options, &block)
52
+ CSV.foreach(path_to_sheet(info, options), clean_options(options), &block)
42
53
  end
43
54
  end
44
55
 
@@ -46,30 +57,83 @@ module Excel2CSV
46
57
 
47
58
  def read(path, options = {})
48
59
  convert(path, options) do |info|
49
- CSV.read(info.sheets.first[:path], options)
60
+ CSV.read(path_to_sheet(info, options), clean_options(options))
50
61
  end
51
62
  end
52
63
 
53
64
  module_function :read
54
65
 
55
66
  def convert(path, options = {})
67
+ info = options[:info]
68
+ if info && Dir.exists?(info.working_dir)
69
+ return block_given? ? yield(info) : info
70
+ end
56
71
  begin
57
- tmp_dir = Dir.mktmpdir
58
- dest_folder = options[:dest_folder] || tmp_dir
59
- java_options = options[:java_options] || "-Dfile.encoding=utf8 -Xms512m -Xmx512m -XX:MaxPermSize=256m"
60
- jar_path = File.join(File.dirname(__FILE__), "excel2csv.jar")
61
- `java #{java_options} -jar #{jar_path} #{path} #{dest_folder}`
62
- info = Info.read dest_folder, tmp_dir
72
+ info = create_cvs_files(path, options)
63
73
  if block_given?
64
74
  yield info
65
75
  else
66
76
  info
67
77
  end
68
78
  ensure
69
- info.close if block_given?
79
+ info.clean if block_given? && info
70
80
  end
71
81
  end
72
82
 
73
83
  module_function :convert
74
84
 
85
+ def path_to_sheet(info, options = {})
86
+ if options[:preview]
87
+ options.delete(:encoding) # all previews are in utf-8
88
+ collection = info.previews
89
+ else
90
+ collection = info.sheets
91
+ end
92
+ index = (idx = options[:index]) ? idx : 0
93
+ collection[index][:path]
94
+ end
95
+
96
+ module_function :path_to_sheet
97
+
98
+ def create_cvs_files(path, options)
99
+ tmp_dir = Dir.mktmpdir
100
+ dest_folder = options[:dest_folder] || tmp_dir
101
+ limit = options[:rows_limit]
102
+
103
+ if path =~ /\.csv/
104
+ total_rows = 0
105
+ preview_rows = []
106
+ opts = clean_options(options)
107
+ CSV.foreach(path, opts) do |row|
108
+ if limit && total_rows <= limit
109
+ preview_rows << row
110
+ end
111
+ total_rows += 1
112
+ end
113
+ FileUtils.cp path, "#{dest_folder}/1-#{total_rows}.csv"
114
+ if limit
115
+ CSV.open("#{dest_folder}/1-#{limit}-of-#{total_rows}-preview.csv", "wb") do |csv|
116
+ preview_rows.each {|row| csv << row}
117
+ end
118
+ end
119
+ else
120
+ java_options = options[:java_options] || "-Dfile.encoding=utf8 -Xms512m -Xmx512m -XX:MaxPermSize=256m"
121
+ rows_limit = limit ? "-r #{limit}" : ""
122
+ jar_path = File.join(File.dirname(__FILE__), "excel2csv.jar")
123
+ `java #{java_options} -jar #{jar_path} #{rows_limit} #{path} #{dest_folder}`
124
+ end
125
+
126
+ Info.read(dest_folder, tmp_dir)
127
+ end
128
+
129
+ module_function :create_cvs_files
130
+
131
+ def clean_options options
132
+ options.dup.delete_if do |key, value|
133
+ [:dest_folder, :java_options, :preview, :index, :path, :rows_limit, :info].include?(key)
134
+ end
135
+ end
136
+
137
+ module_function :clean_options
138
+
75
139
  end
@@ -1,27 +1,31 @@
1
1
  #encoding: utf-8
2
+
2
3
  require 'excel2csv'
3
4
 
4
5
  describe Excel2CSV do
5
6
 
6
7
  let(:excel) {Excel2CSV}
8
+ let(:csv_basic_types) {"spec/fixtures/basic_types.csv"}
9
+ let(:xls_basic_types) {"spec/fixtures/basic_types.xls"}
10
+ let(:xlsx_basic_types) {"spec/fixtures/basic_types.xlsx"}
7
11
 
8
12
  it "reads xls files" do
9
- data = excel.read "spec/fixtures/basic_types.xls"
13
+ data = excel.read xls_basic_types
10
14
  data[0].should == ["1.00", "2011-12-23 21:00:00 UTC(+0000)", "Hello"]
11
15
  data[1].should == ["2.00", "2011-12-24 21:00:00 UTC(+0000)", "Привет"]
12
16
  data[2].should == ["3.00", "2011-12-25 21:00:00 UTC(+0000)", 'Привет, "я excel!"']
13
17
  end
14
18
 
15
19
  it "reads xlsx files" do
16
- data = excel.read "spec/fixtures/basic_types.xlsx"
20
+ data = excel.read xlsx_basic_types
17
21
  data[0].should == ["1.00", "2011-12-23 21:00:00 UTC(+0000)", "Hello"]
18
22
  data[1].should == ["2.00", "2011-12-24 21:00:00 UTC(+0000)", "Привет"]
19
23
  data[2].should == ["3.00", "2011-12-25 21:00:00 UTC(+0000)", 'Привет, "я excel!"']
20
24
  end
21
25
 
22
- it "iterates rows" do
26
+ it "iterates rows like CSV lib" do
23
27
  count = 0
24
- excel.foreach "spec/fixtures/basic_types.xls" do |row|
28
+ excel.foreach xls_basic_types do |row|
25
29
  row.length.should == 3
26
30
  count += 1
27
31
  end
@@ -30,7 +34,7 @@ describe Excel2CSV do
30
34
 
31
35
  it "removes tmp dir after work" do
32
36
  tmp_dir = nil
33
- excel.convert "spec/fixtures/basic_types.xlsx" do |info|
37
+ excel.convert xlsx_basic_types do |info|
34
38
  # puts IO.read(info.sheets.first[:path])
35
39
  tmp_dir = info.tmp_dir
36
40
  end
@@ -38,4 +42,56 @@ describe Excel2CSV do
38
42
  Dir.exists?(tmp_dir).should == false
39
43
  end
40
44
 
45
+ it "converts once if info is passed" do
46
+ info = excel.convert xlsx_basic_types
47
+ info.sheets.length.should == 1
48
+ info.previews.length.should == 0
49
+ info.should == excel.convert("spec/fixtures/basic_types.xlsx", info:info)
50
+ end
51
+
52
+ it "regenerate csv files if working_dir is removed" do
53
+ info = excel.convert xlsx_basic_types
54
+ info.clean
55
+ info.should_not == excel.convert(xlsx_basic_types, info:info)
56
+ end
57
+
58
+ it "generates preview csv files with rows limit" do
59
+ info = excel.convert xls_basic_types, rows_limit:1
60
+ info.sheets.length.should == 1
61
+ info.previews.length.should == 1
62
+
63
+ info.sheets.first[:total_rows].should == 3
64
+ info.previews.first[:total_rows].should == 3
65
+
66
+ info.sheets.first[:rows].should == 3
67
+ info.previews.first[:rows].should == 1
68
+ end
69
+
70
+ it "reads previews" do
71
+ data = excel.read(xls_basic_types, rows_limit:1, preview:true, index:0)
72
+ data.length.should == 1
73
+ data[0].should == ["1.00", "2011-12-23 21:00:00 UTC(+0000)", "Hello"]
74
+ end
75
+
76
+ it "reads csv files" do
77
+ data = excel.read(csv_basic_types, encoding:'windows-1251:utf-8')
78
+ data[0].should == ["1.00","12/24/11 12:00 AM","Hello"]
79
+ data[1].should == ["2.00","12/25/11 12:00 AM","Привет"]
80
+ data[2].should == ["3.00","12/26/11 12:00 AM",'Привет, "я excel!"']
81
+ end
82
+
83
+ it "reads csv files with preview" do
84
+ data = excel.read(csv_basic_types,
85
+ encoding: 'windows-1251:utf-8',
86
+ rows_limit: 2,
87
+ preview: true
88
+ )
89
+ data[0].should == ["1.00","12/24/11 12:00 AM","Hello"]
90
+ data[1].should == ["2.00","12/25/11 12:00 AM","Привет"]
91
+ end
92
+
93
+ # Date, Boolean, String, [Phone, Percent, Email, Gender, Url]
94
+ # Несколько телефонов,
95
+
96
+
41
97
  end
@@ -0,0 +1,3 @@
1
+ 1.00,12/24/11 12:00 AM,Hello
2
+ 2.00,12/25/11 12:00 AM,������
3
+ 3.00,12/26/11 12:00 AM,"������, ""� excel!"""
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: excel2csv
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-09-15 00:00:00.000000000 Z
12
+ date: 2011-09-20 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rspec
16
- requirement: &70366534136320 !ruby/object:Gem::Requirement
16
+ requirement: &70270996778580 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - <=
@@ -21,7 +21,7 @@ dependencies:
21
21
  version: '2.6'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *70366534136320
24
+ version_requirements: *70270996778580
25
25
  description: gem for converting Excel files to csv
26
26
  email:
27
27
  - yury.korolev@gmail.com
@@ -38,6 +38,7 @@ files:
38
38
  - lib/excel2csv.rb
39
39
  - lib/excel2csv/version.rb
40
40
  - spec/excel2csv_spec.rb
41
+ - spec/fixtures/basic_types.csv
41
42
  - spec/fixtures/basic_types.xls
42
43
  - spec/fixtures/basic_types.xlsx
43
44
  - spec/spec_helper.rb
@@ -67,6 +68,7 @@ specification_version: 3
67
68
  summary: extract excel worksheets to csv files
68
69
  test_files:
69
70
  - spec/excel2csv_spec.rb
71
+ - spec/fixtures/basic_types.csv
70
72
  - spec/fixtures/basic_types.xls
71
73
  - spec/fixtures/basic_types.xlsx
72
74
  - spec/spec_helper.rb