parseexcel_mod 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,4 @@
1
+ module ParseexcelMod
2
+ VERSION = "0.0.1"
3
+ end
4
+
@@ -0,0 +1,74 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Spreadsheet::ParseExcel -- Extract Data from an Excel File
4
+ # Copyright (C) 2003 ywesee -- intellectual capital connected
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2.1 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # ywesee - intellectual capital connected, Winterthurerstrasse 52, CH-8006 Z�rich, Switzerland
21
+ # hwyss@ywesee.com
22
+ #
23
+ # Workbook -- Spreadsheet::ParseExcel -- 10.06.2003 -- hwyss@ywesee.com
24
+
25
+ require 'parseexcel_mod/worksheet'
26
+
27
+ module Spreadsheet
28
+ module ParseExcel
29
+ class Annotation < String
30
+ attr_accessor :author
31
+ end
32
+ class Workbook
33
+ attr_accessor :biffversion, :version, :flg_1904
34
+ attr_writer :format
35
+ def initialize
36
+ @worksheets = []
37
+ @pkg_strs = []
38
+ @annotations = []
39
+ @formats = []
40
+ end
41
+ def add_annotation(pkg_str)
42
+ @annotations.push(pkg_str)
43
+ end
44
+ def add_text_format(idx, fmt_str)
45
+ @format.add_text_format(idx, fmt_str)
46
+ end
47
+ def add_cell_format(format)
48
+ @formats.push(format)
49
+ end
50
+ def add_pkg_str(pkg_str)
51
+ @pkg_strs.push(pkg_str)
52
+ end
53
+ def annotation(idx)
54
+ @annotations.at(idx)
55
+ end
56
+ def format(idx=nil)
57
+ (idx.nil?) ? @format : @formats.at(idx)
58
+ end
59
+ def pkg_str(idx)
60
+ @pkg_strs.at(idx)
61
+ end
62
+ def sheet_count
63
+ @worksheets.size
64
+ end
65
+ def worksheet(idx, encoding=nil)
66
+ if(idx.is_a?(Integer))
67
+ @worksheets[idx] ||= Worksheet.new
68
+ else
69
+ @worksheets.find { |sheet| sheet.name(encoding) == idx }
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,170 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Spreadsheet::ParseExcel -- Extract Data from an Excel File
4
+ # Copyright (C) 2003 ywesee -- intellectual capital connected
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2.1 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # ywesee - intellectual capital connected, Winterthurerstrasse 52, CH-8006 Z�rich, Switzerland
21
+ # hwyss@ywesee.com
22
+ #
23
+ # Worksheet -- Spreadsheet::ParseExcel -- 10.06.2003 -- hwyss@ywesee.com
24
+
25
+ require 'parseexcel_mod/olestorage'
26
+ require 'iconv'
27
+
28
+ module Spreadsheet
29
+ module ParseExcel
30
+ class Worksheet
31
+ include Enumerable
32
+ attr_accessor :default_row_height, :resolution, :v_resolution, :paper,
33
+ :scale, :page_start, :fit_width, :fit_height, :header_margin,
34
+ :footer_margin, :copies, :left_to_right, :no_pls, :no_color, :draft,
35
+ :notes, :no_orient, :use_page, :landscape, :sheet_version, :sheet_type,
36
+ :header, :footer, :page_fit, :name
37
+ class Cell
38
+ attr_accessor :value, :kind, :numeric, :code, :book, :format_no,
39
+ :format, :rich, :encoding, :annotation
40
+ def initialize(params={:value=>'',:kind=>:blank,:numeric=>false})
41
+ @encoding = 'UTF-16LE'
42
+ params.each { |key, val|
43
+ mthd = key.to_s + '='
44
+ if(self.respond_to?(mthd))
45
+ self.send(mthd, val)
46
+ end
47
+ }
48
+ end
49
+ def date
50
+ datetime.date
51
+ end
52
+ def datetime
53
+ date = @value.to_i
54
+ time = @value.to_f - date
55
+ #1. Calc Days
56
+ year = 1900
57
+ if (@book.flg_1904)
58
+ year = 1904
59
+ date += 1 #Start from Jan 1st
60
+ end
61
+ ydays = year_days(year)
62
+ while (date > ydays)
63
+ date -= ydays
64
+ year += 1
65
+ ydays = year_days(year)
66
+ end
67
+ month = 1
68
+ 1.upto(12) { |mon|
69
+ month = mon
70
+ mdays = month_days(mon, year)
71
+ break if(date <= mdays)
72
+ date -= mdays
73
+ }
74
+ #2. Calc Time
75
+ day = date
76
+ time += (0.0005 / 86400.0)
77
+ time *= 24.0
78
+ hour = time.to_i
79
+ time -= hour
80
+ time *= 60.0
81
+ min = time.to_i
82
+ time -= min
83
+ time *= 60.0
84
+ sec = time.to_i
85
+ time -= sec
86
+ time *= 1000.0
87
+ msec = time.to_i
88
+ OLE::DateTime.new(year,month,day,hour,min,sec,msec)
89
+ end
90
+ def encoding=(enc)
91
+ @encoding = enc
92
+ end
93
+ def to_i
94
+ @value.to_i
95
+ end
96
+ def to_f
97
+ @value.to_f
98
+ end
99
+ def to_s(target_encoding=nil)
100
+ if(target_encoding)
101
+ begin
102
+ Iconv.new(target_encoding, @encoding).iconv(@value)
103
+ rescue
104
+ Iconv.new(target_encoding, 'ascii').iconv(@value.to_s)
105
+ end
106
+ else
107
+ @value.to_s
108
+ end
109
+ end
110
+ def type
111
+ @format.cell_type(self) if @format
112
+ end
113
+ private
114
+ def month_days(month, year)
115
+ if(year == 1900 && month == 2)
116
+ 29
117
+ else
118
+ OLE::DateTime::month_days(month, year)
119
+ end
120
+ end
121
+ def year_days(year)
122
+ (year == 1900) ? 366 : OLE::DateTime::year_days(year)
123
+ end
124
+ end
125
+ class PkgString
126
+ attr_reader :text, :unicode, :rich, :ext
127
+ def initialize(text, unicode, rich, ext)
128
+ @text, @unicode, @rich, @ext = text, unicode, rich, ext
129
+ end
130
+ end
131
+ def initialize
132
+ @cells = []
133
+ @row_heights = []
134
+ end
135
+ def add_cell(row, col, cell)
136
+ (@cells[row] ||= [])[col] ||= cell
137
+ self.set_dimensions(row, col)
138
+ @cells[row][col]
139
+ end
140
+ def cell(row, col)
141
+ (@cells[row] ||= [])[col] ||= Cell.new
142
+ end
143
+ def each(skip=0)
144
+ @cells[skip..-1].each()
145
+ end
146
+ def name(target_encoding=nil)
147
+ if(target_encoding)
148
+ Iconv.new(target_encoding, 'UTF-16LE').iconv(@name.to_s)
149
+ else
150
+ @name
151
+ end
152
+ end
153
+ def num_rows
154
+ @cells.size
155
+ end
156
+ def row(row)
157
+ @cells[row] ||= []
158
+ end
159
+ def set_dimensions(row, scol, ecol=scol)
160
+ @min_row = [row, @min_row || row].min
161
+ @max_row = [row, @max_row || row].max
162
+ @min_col = [scol, @min_col || scol].min
163
+ @max_col = [ecol, @max_col || ecol].max
164
+ end
165
+ def set_row_height(row, height)
166
+ @row_heights[row] = height
167
+ end
168
+ end
169
+ end
170
+ end
@@ -0,0 +1,23 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'parseexcel_mod/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "parseexcel_mod"
8
+ spec.version = ParseexcelMod::VERSION
9
+ spec.authors = ["t0pep0"]
10
+ spec.email = ["t0pep0.gentoo@gmail.com"]
11
+ spec.description = "ParseExcel with utf8"
12
+ spec.summary = "Parse excel"
13
+ spec.homepage = "https://github.com/t0pep0/parseexcel_mod"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_development_dependency "bundler", "~> 1.3"
22
+ spec.add_development_dependency "rake"
23
+ end
Binary file
data/test/data/bar.xls ADDED
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
data/test/data/foo.xls ADDED
Binary file
Binary file
data/test/data/nil.xls ADDED
File without changes
Binary file
Binary file
Binary file
data/test/suite.rb ADDED
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+ # TestSuite -- spreadsheet -- 02.06.2003 -- hwyss@ywesee.com
3
+
4
+ $: << File.expand_path(File.dirname(__FILE__))
5
+ $: << File.expand_path('../lib', File.dirname(__FILE__))
6
+
7
+ Dir.foreach(File.dirname(__FILE__)) { |file|
8
+ require file if /^test_.*\.rb$/o.match(file)
9
+ }
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Spreadsheet::ParseExcel -- Extract Data from an Excel File
4
+ # Copyright (C) 2003 ywesee -- intellectual capital connected
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2.1 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # ywesee - intellectual capital connected, Winterthurerstrasse 52, CH-8006 Z�rich, Switzerland
21
+ # hwyss@ywesee.com
22
+ #
23
+ # TestFormat -- Spreadsheet::ParseExcel -- 10.06.2003 -- hwyss@ywesee.com
24
+
25
+ $: << File.expand_path("../lib", File.dirname(__FILE__))
26
+
27
+ require 'test/unit'
28
+ require 'parseexcel/format'
29
+
30
+ module Spreadsheet
31
+ module ParseExcel
32
+ class Format
33
+ attr_writer :index
34
+ end
35
+ end
36
+ end
37
+ class StubFormatCell
38
+ attr_accessor :numeric
39
+ end
40
+
41
+ class TestFormat < Test::Unit::TestCase
42
+ def setup
43
+ @format = Spreadsheet::ParseExcel::Format.new
44
+ end
45
+ def test_cell_type
46
+ cell = StubFormatCell.new
47
+ assert_equal(:text, @format.cell_type(cell))
48
+ cell.numeric = true
49
+ assert_equal(:numeric, @format.cell_type(cell))
50
+ @format.fmt_idx = 0x12
51
+ assert_equal(:date, @format.cell_type(cell))
52
+ @format.fmt_idx = 0
53
+ assert_equal(:numeric, @format.cell_type(cell))
54
+ @format.add_text_format(0x46, 'General')
55
+ @format.fmt_idx = 0x46
56
+ assert_equal(:numeric, @format.cell_type(cell))
57
+ end
58
+ def test_text_format
59
+ assert_equal('foo', @format.text_format('foo'))
60
+ assert_equal('foo', @format.text_format('foo', :_native_))
61
+ assert_equal('foo', @format.text_format("\x00f\x00o\x00o", :ucs2))
62
+ end
63
+ end
@@ -0,0 +1,180 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Spreadsheet::ParseExcel -- Extract Data from an Excel File
4
+ # Copyright (C) 2003 ywesee -- intellectual capital connected
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2.1 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # ywesee - intellectual capital connected, Winterthurerstrasse 52, CH-8006 Z�rich, Switzerland
21
+ # hwyss@ywesee.com
22
+ #
23
+ # TestOLEReader -- Spreadsheet::ParseExcel -- 05.06.2003 -- hwyss@ywesee.com
24
+
25
+ $: << File.expand_path("../lib", File.dirname(__FILE__))
26
+
27
+ require 'test/unit'
28
+ require 'parseexcel/olestorage'
29
+
30
+ module OLE
31
+ class Storage
32
+ public :get_header
33
+ class Header
34
+ attr_writer :bbd_info
35
+ end
36
+ end
37
+ end
38
+
39
+ class TestOLEStorageClass < Test::Unit::TestCase
40
+ def test_is_normal_block
41
+ assert(OLE::Storage.is_normal_block?(0))
42
+ assert(OLE::Storage.is_normal_block?(0xFF))
43
+ assert(!OLE::Storage.is_normal_block?(0xFFFFFFFC))
44
+ end
45
+ def test_invalid_header
46
+ filename = File.expand_path('data/nil.xls', File.dirname(__FILE__))
47
+ file = File.open(filename)
48
+ assert_raises(OLE::UnknownFormatError) {
49
+ h = OLE::Storage::Header.new(file)
50
+ }
51
+ file.close
52
+ end
53
+ def test_asc2ucs
54
+ expected = "W\000o\000r\000k\000b\000o\000o\000k\000"
55
+ assert_equal(expected, OLE.asc2ucs('Workbook'))
56
+ expected = "R\000o\000o\000t\000 \000E\000n\000t\000r\000y\000"
57
+ assert_equal(expected, OLE.asc2ucs('Root Entry'))
58
+ end
59
+ end
60
+ class TestOLEDateTime < Test::Unit::TestCase
61
+ def test_year_days
62
+ assert_equal(perl_year_days(2000), OLE::DateTime.year_days(2000))
63
+ assert_equal(perl_year_days(1999), OLE::DateTime.year_days(1999))
64
+ assert_equal(perl_year_days(1900), OLE::DateTime.year_days(1900))
65
+ assert_equal(perl_year_days(1980), OLE::DateTime.year_days(1980))
66
+ assert_equal(366, OLE::DateTime.year_days(2000))
67
+ assert_equal(365, OLE::DateTime.year_days(1999))
68
+ assert_equal(365, OLE::DateTime.year_days(1900))
69
+ assert_equal(366, OLE::DateTime.year_days(1980))
70
+ end
71
+ def test_month_days
72
+ assert_equal(31, OLE::DateTime.month_days(8,1975))
73
+ assert_equal(30, OLE::DateTime.month_days(6,2003))
74
+ assert_equal(29, OLE::DateTime.month_days(2,2000))
75
+ assert_equal(29, OLE::DateTime.month_days(2,1980))
76
+ assert_equal(28, OLE::DateTime.month_days(2,1900))
77
+ end
78
+ def test_parse
79
+ # how?
80
+ end
81
+ def test_date
82
+ datetime = OLE::DateTime.new(2002,4,19)
83
+ assert_equal(Date.new(2002, 4, 19), datetime.date)
84
+ end
85
+
86
+ # helper methods
87
+ def perl_year_days(year)
88
+ perl_leap_year?(year) ? 366 : 365
89
+ end
90
+ def perl_leap_year?(iYear)
91
+ (((iYear % 4)==0) && ((iYear % 100).nonzero? || (iYear % 400)==0))
92
+ end
93
+ end
94
+ class TestOLEStorage < Test::Unit::TestCase
95
+ def setup
96
+ @datadir = File.expand_path('data', File.dirname(__FILE__))
97
+ @filename = File.expand_path('foo.xls', @datadir)
98
+ @ole = OLE::Storage.new(@filename)
99
+ end
100
+ def test_get_header
101
+ header = @ole.get_header
102
+ assert_equal(1, header.bdb_count)
103
+ assert_equal(512, header.big_block_size)
104
+ assert_equal(0, header.extra_bbd_count)
105
+ assert_equal(4294967294, header.extra_bbd_start)
106
+ assert_equal(11, header.root_start)
107
+ assert_equal(1, header.sbd_count)
108
+ assert_equal(2, header.sbd_start)
109
+ assert_equal(64, header.small_block_size)
110
+ end
111
+ def test_search_pps
112
+ expected = [
113
+ @ole.header.get_nth_pps(0),
114
+ @ole.header.get_nth_pps(1),
115
+ ]
116
+ result = @ole.search_pps([
117
+ OLE.asc2ucs('Root Entry'),
118
+ OLE.asc2ucs('Workbook'),
119
+ ])
120
+ assert_equal(expected, result)
121
+ lowercase = [
122
+ OLE.asc2ucs('root entry'),
123
+ OLE.asc2ucs('workbook'),
124
+ ]
125
+ assert_equal([], @ole.search_pps(lowercase))
126
+ result = @ole.search_pps(lowercase, true)
127
+ assert_equal(expected, result)
128
+ end
129
+ def test_unknown_format
130
+ assert_raises(OLE::UnknownFormatError) {
131
+ OLE::Storage.new(StringIO.new('12345678'))
132
+ }
133
+ end
134
+ end
135
+ class TestOLEStorageHeader < Test::Unit::TestCase
136
+ def setup
137
+ @filename = File.expand_path('data/foo.xls', File.dirname(__FILE__))
138
+ @file = OLE::Storage.new(@filename)
139
+ @header = @file.get_header
140
+ end
141
+ def test_get_next_block_no
142
+ @header.bbd_info = {
143
+ 1 => 2,
144
+ 3 => 5,
145
+ }
146
+ assert_equal(2, @header.get_next_block_no(1))
147
+ assert_equal(3, @header.get_next_block_no(2))
148
+ assert_equal(5, @header.get_next_block_no(3))
149
+ end
150
+ def test_get_nth_block_no
151
+ @header.bbd_info = {
152
+ 1 => 2,
153
+ 3 => 5,
154
+ }
155
+ assert_equal(2, @header.get_nth_block_no(1,1))
156
+ assert_equal(3, @header.get_nth_block_no(1,2))
157
+ assert_equal(5, @header.get_nth_block_no(1,3))
158
+ assert_equal(3, @header.get_nth_block_no(2,1))
159
+ assert_equal(5, @header.get_nth_block_no(2,2))
160
+ assert_equal(5, @header.get_nth_block_no(3,1))
161
+ end
162
+ def test_get_nth_pps
163
+ root = @header.get_nth_pps(0)
164
+ assert_instance_of(OLE::Storage::PPS::Root, root)
165
+ file = @header.get_nth_pps(1)
166
+ assert_instance_of(OLE::Storage::PPS::File, file)
167
+ assert_equal(1, root.dir_pps)
168
+ min_1, = [-1].pack('V').unpack('V')
169
+ assert_equal(min_1, root.next_pps)
170
+ assert_equal(min_1, root.prev_pps)
171
+ assert_equal(3904, root.data.size)
172
+ assert_equal(3335, file.data.size)
173
+ end
174
+ def test_sb_start
175
+ assert_equal(3, @header.sb_start)
176
+ end
177
+ def test_sb_size
178
+ assert_equal(3904, @header.sb_size)
179
+ end
180
+ end