parseexcel 0.5.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,70 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Spreadsheet::ParseExcel -- Extract Data from an Excel File
4
+ # Copyright (C) 2003 ywesee -- intellectual capital connected
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2.1 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # ywesee - intellectual capital connected, Winterthurerstrasse 52, CH-8006 Z�rich, Switzerland
21
+ # hwyss@ywesee.com
22
+ #
23
+ # Workbook -- Spreadsheet::ParseExcel -- 10.06.2003 -- hwyss@ywesee.com
24
+
25
+ require 'parseexcel/worksheet'
26
+
27
+ module Spreadsheet
28
+ module ParseExcel
29
+ class Annotation < String
30
+ attr_accessor :author
31
+ end
32
+ class Workbook
33
+ attr_accessor :biffversion, :version, :flg_1904
34
+ attr_writer :format
35
+ def initialize
36
+ @worksheets = []
37
+ @pkg_strs = []
38
+ @annotations = []
39
+ @formats = []
40
+ end
41
+ def add_annotation(pkg_str)
42
+ @annotations.push(pkg_str)
43
+ end
44
+ def add_text_format(idx, fmt_str)
45
+ @format.add_text_format(idx, fmt_str)
46
+ end
47
+ def add_cell_format(format)
48
+ @formats.push(format)
49
+ end
50
+ def add_pkg_str(pkg_str)
51
+ @pkg_strs.push(pkg_str)
52
+ end
53
+ def annotation(idx)
54
+ @annotations.at(idx)
55
+ end
56
+ def format(idx=nil)
57
+ (idx.nil?) ? @format : @formats.at(idx)
58
+ end
59
+ def pkg_str(idx)
60
+ @pkg_strs.at(idx)
61
+ end
62
+ def sheet_count
63
+ @worksheets.size
64
+ end
65
+ def worksheet(idx)
66
+ @worksheets[idx] ||= Worksheet.new
67
+ end
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,152 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Spreadsheet::ParseExcel -- Extract Data from an Excel File
4
+ # Copyright (C) 2003 ywesee -- intellectual capital connected
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2.1 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # ywesee - intellectual capital connected, Winterthurerstrasse 52, CH-8006 Z�rich, Switzerland
21
+ # hwyss@ywesee.com
22
+ #
23
+ # Worksheet -- Spreadsheet::ParseExcel -- 10.06.2003 -- hwyss@ywesee.com
24
+
25
+ require 'parseexcel/olestorage'
26
+ require 'iconv'
27
+
28
+ module Spreadsheet
29
+ module ParseExcel
30
+ class Worksheet
31
+ include Enumerable
32
+ attr_accessor :default_row_height, :resolution, :v_resolution, :paper,
33
+ :scale, :page_start, :fit_width, :fit_height, :header_margin,
34
+ :footer_margin, :copies, :left_to_right, :no_pls, :no_color, :draft,
35
+ :notes, :no_orient, :use_page, :landscape, :sheet_version, :sheet_type,
36
+ :header, :footer, :page_fit
37
+ class Cell
38
+ attr_accessor :value, :kind, :numeric, :code, :book, :format_no,
39
+ :format, :rich, :encoding, :annotation
40
+ def initialize(params={:value=>'',:kind=>:blank,:numeric=>false})
41
+ @encoding = 'UTF-16LE'
42
+ params.each { |key, val|
43
+ mthd = key.to_s + '='
44
+ if(self.respond_to?(mthd))
45
+ self.send(mthd, val)
46
+ end
47
+ }
48
+ end
49
+ def date
50
+ datetime.date
51
+ end
52
+ def datetime
53
+ date = @value.to_i
54
+ time = @value.to_f - date
55
+ #1. Calc Days
56
+ year = 1900
57
+ if (@book.flg_1904)
58
+ year = 1904
59
+ date += 1 #Start from Jan 1st
60
+ end
61
+ ydays = year_days(year)
62
+ while (date > ydays)
63
+ date -= ydays
64
+ year += 1
65
+ ydays = year_days(year)
66
+ end
67
+ month = 1
68
+ 1.upto(12) { |month|
69
+ mdays = month_days(month, year)
70
+ break if(date <= mdays)
71
+ date -= mdays
72
+ }
73
+ #2. Calc Time
74
+ day = date
75
+ time += (0.0005 / 86400.0)
76
+ time *= 24.0
77
+ hour = time.to_i
78
+ time -= hour
79
+ time *= 60.0
80
+ min = time.to_i
81
+ time -= min
82
+ time *= 60.0
83
+ sec = time.to_i
84
+ time -= sec
85
+ time *= 1000.0
86
+ msec = time.to_i
87
+ OLE::DateTime.new(year,month,day,hour,min,sec,msec)
88
+ end
89
+ def to_i
90
+ @value.to_i
91
+ end
92
+ def to_f
93
+ @value.to_f
94
+ end
95
+ def to_s(target_encoding=nil)
96
+ if(target_encoding)
97
+ Iconv.new(target_encoding, @encoding).iconv(@value.to_s)
98
+ else
99
+ @value.to_s
100
+ end
101
+ end
102
+ def type
103
+ @format.cell_type(self) if @format
104
+ end
105
+ private
106
+ def month_days(month, year)
107
+ if(year == 1900 && month == 2)
108
+ 29
109
+ else
110
+ OLE::DateTime::month_days(month, year)
111
+ end
112
+ end
113
+ def year_days(year)
114
+ (year == 1900) ? 366 : OLE::DateTime::year_days(year)
115
+ end
116
+ end
117
+ class PkgString
118
+ attr_reader :text, :unicode, :rich, :ext
119
+ def initialize(text, unicode, rich, ext)
120
+ @text, @unicode, @rich, @ext = text, unicode, rich, ext
121
+ end
122
+ end
123
+ def initialize
124
+ @cells = []
125
+ @row_heights = []
126
+ end
127
+ def add_cell(row, col, cell)
128
+ (@cells[row] ||= [])[col] ||= cell
129
+ self.set_dimensions(row, col)
130
+ @cells[row][col]
131
+ end
132
+ def cell(row, col)
133
+ (@cells[row] ||= [])[col] ||= Cell.new
134
+ end
135
+ def each(skip=0, &block)
136
+ @cells[skip..-1].each(&block)
137
+ end
138
+ def row(row)
139
+ @cells[row] ||= []
140
+ end
141
+ def set_dimensions(row, scol, ecol=scol)
142
+ @min_row = [row, @min_row || row].min
143
+ @max_row = [row, @max_row || row].max
144
+ @min_col = [scol, @min_col || scol].min
145
+ @max_col = [ecol, @max_col || ecol].max
146
+ end
147
+ def set_row_height(row, height)
148
+ @row_heights[row] = height
149
+ end
150
+ end
151
+ end
152
+ end
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
Binary file
File without changes
Binary file
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+ # TestSuite -- spreadsheet -- 02.06.2003 -- hwyss@ywesee.com
3
+
4
+ $: << File.expand_path(File.dirname(__FILE__))
5
+ $: << File.expand_path('../lib', File.dirname(__FILE__))
6
+
7
+ Dir.foreach(File.dirname(__FILE__)) { |file|
8
+ require file if /^test_.*\.rb$/o.match(file)
9
+ }
@@ -0,0 +1,63 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Spreadsheet::ParseExcel -- Extract Data from an Excel File
4
+ # Copyright (C) 2003 ywesee -- intellectual capital connected
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2.1 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # ywesee - intellectual capital connected, Winterthurerstrasse 52, CH-8006 Z�rich, Switzerland
21
+ # hwyss@ywesee.com
22
+ #
23
+ # TestFormat -- Spreadsheet::ParseExcel -- 10.06.2003 -- hwyss@ywesee.com
24
+
25
+ $: << File.expand_path("../lib", File.dirname(__FILE__))
26
+
27
+ require 'test/unit'
28
+ require 'parseexcel/format'
29
+
30
+ module Spreadsheet
31
+ module ParseExcel
32
+ class Format
33
+ attr_writer :index
34
+ end
35
+ end
36
+ end
37
+ class StubFormatCell
38
+ attr_accessor :numeric
39
+ end
40
+
41
+ class TestFormat < Test::Unit::TestCase
42
+ def setup
43
+ @format = Spreadsheet::ParseExcel::Format.new
44
+ end
45
+ def test_cell_type
46
+ cell = StubFormatCell.new
47
+ assert_equal(:text, @format.cell_type(cell))
48
+ cell.numeric = true
49
+ assert_equal(:numeric, @format.cell_type(cell))
50
+ @format.fmt_idx = 0x12
51
+ assert_equal(:date, @format.cell_type(cell))
52
+ @format.fmt_idx = 0
53
+ assert_equal(:numeric, @format.cell_type(cell))
54
+ @format.add_text_format(0x46, 'General')
55
+ @format.fmt_idx = 0x46
56
+ assert_equal(:numeric, @format.cell_type(cell))
57
+ end
58
+ def test_text_format
59
+ assert_equal('foo', @format.text_format('foo'))
60
+ assert_equal('foo', @format.text_format('foo', :_native_))
61
+ assert_equal('foo', @format.text_format("\x00f\x00o\x00o", :ucs2))
62
+ end
63
+ end
@@ -0,0 +1,175 @@
1
+ #!/usr/bin/env ruby
2
+ #
3
+ # Spreadsheet::ParseExcel -- Extract Data from an Excel File
4
+ # Copyright (C) 2003 ywesee -- intellectual capital connected
5
+ #
6
+ # This library is free software; you can redistribute it and/or
7
+ # modify it under the terms of the GNU Lesser General Public
8
+ # License as published by the Free Software Foundation; either
9
+ # version 2.1 of the License, or (at your option) any later version.
10
+ #
11
+ # This library is distributed in the hope that it will be useful,
12
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
13
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14
+ # Lesser General Public License for more details.
15
+ #
16
+ # You should have received a copy of the GNU Lesser General Public
17
+ # License along with this library; if not, write to the Free Software
18
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19
+ #
20
+ # ywesee - intellectual capital connected, Winterthurerstrasse 52, CH-8006 Z�rich, Switzerland
21
+ # hwyss@ywesee.com
22
+ #
23
+ # TestOLEReader -- Spreadsheet::ParseExcel -- 05.06.2003 -- hwyss@ywesee.com
24
+
25
+ $: << File.expand_path("../lib", File.dirname(__FILE__))
26
+
27
+ require 'test/unit'
28
+ require 'parseexcel/olestorage'
29
+
30
+ module OLE
31
+ class Storage < File
32
+ public :get_header
33
+ class Header
34
+ attr_writer :bbd_info
35
+ end
36
+ end
37
+ end
38
+
39
+ class TestOLEStorageClass < Test::Unit::TestCase
40
+ def test_is_normal_block
41
+ assert(OLE::Storage.is_normal_block?(0))
42
+ assert(OLE::Storage.is_normal_block?(0xFF))
43
+ assert(!OLE::Storage.is_normal_block?(0xFFFFFFFC))
44
+ end
45
+ def test_invalid_header
46
+ filename = File.expand_path('data/nil.xls', File.dirname(__FILE__))
47
+ file = File.open(filename)
48
+ assert_raises(OLE::UnknownFormatError) {
49
+ h = OLE::Storage::Header.new(file)
50
+ }
51
+ file.close
52
+ end
53
+ def test_asc2ucs
54
+ expected = "W\000o\000r\000k\000b\000o\000o\000k\000"
55
+ assert_equal(expected, OLE.asc2ucs('Workbook'))
56
+ expected = "R\000o\000o\000t\000 \000E\000n\000t\000r\000y\000"
57
+ assert_equal(expected, OLE.asc2ucs('Root Entry'))
58
+ end
59
+ end
60
+ class TestOLEDateTime < Test::Unit::TestCase
61
+ def test_year_days
62
+ assert_equal(perl_year_days(2000), OLE::DateTime.year_days(2000))
63
+ assert_equal(perl_year_days(1999), OLE::DateTime.year_days(1999))
64
+ assert_equal(perl_year_days(1900), OLE::DateTime.year_days(1900))
65
+ assert_equal(perl_year_days(1980), OLE::DateTime.year_days(1980))
66
+ assert_equal(366, OLE::DateTime.year_days(2000))
67
+ assert_equal(365, OLE::DateTime.year_days(1999))
68
+ assert_equal(365, OLE::DateTime.year_days(1900))
69
+ assert_equal(366, OLE::DateTime.year_days(1980))
70
+ end
71
+ def test_month_days
72
+ assert_equal(31, OLE::DateTime.month_days(8,1975))
73
+ assert_equal(30, OLE::DateTime.month_days(6,2003))
74
+ assert_equal(29, OLE::DateTime.month_days(2,2000))
75
+ assert_equal(29, OLE::DateTime.month_days(2,1980))
76
+ assert_equal(28, OLE::DateTime.month_days(2,1900))
77
+ end
78
+ def test_parse
79
+ # how?
80
+ end
81
+ def test_date
82
+ datetime = OLE::DateTime.new(2002,4,19)
83
+ assert_equal(Date.new(2002, 4, 19), datetime.date)
84
+ end
85
+
86
+ # helper methods
87
+ def perl_year_days(year)
88
+ perl_leap_year?(year) ? 366 : 365
89
+ end
90
+ def perl_leap_year?(iYear)
91
+ (((iYear % 4)==0) && ((iYear % 100).nonzero? || (iYear % 400)==0))
92
+ end
93
+ end
94
+ class TestOLEStorage < Test::Unit::TestCase
95
+ def setup
96
+ @datadir = File.expand_path('data', File.dirname(__FILE__))
97
+ @filename = File.expand_path('foo.xls', @datadir)
98
+ @ole = OLE::Storage.new(@filename)
99
+ end
100
+ def test_get_header
101
+ header = @ole.get_header
102
+ assert_equal(1, header.bdb_count)
103
+ assert_equal(512, header.big_block_size)
104
+ assert_equal(0, header.extra_bbd_count)
105
+ assert_equal(4294967294, header.extra_bbd_start)
106
+ assert_equal(11, header.root_start)
107
+ assert_equal(1, header.sbd_count)
108
+ assert_equal(2, header.sbd_start)
109
+ assert_equal(64, header.small_block_size)
110
+ end
111
+ def test_search_pps
112
+ expected = [
113
+ @ole.header.get_nth_pps(0),
114
+ @ole.header.get_nth_pps(1),
115
+ ]
116
+ result = @ole.search_pps([
117
+ OLE.asc2ucs('Root Entry'),
118
+ OLE.asc2ucs('Workbook'),
119
+ ])
120
+ assert_equal(expected, result)
121
+ lowercase = [
122
+ OLE.asc2ucs('root entry'),
123
+ OLE.asc2ucs('workbook'),
124
+ ]
125
+ assert_equal([], @ole.search_pps(lowercase))
126
+ result = @ole.search_pps(lowercase, true)
127
+ assert_equal(expected, result)
128
+ end
129
+ end
130
+ class TestOLEStorageHeader < Test::Unit::TestCase
131
+ def setup
132
+ @filename = File.expand_path('data/foo.xls', File.dirname(__FILE__))
133
+ @file = OLE::Storage.new(@filename)
134
+ @header = @file.get_header
135
+ end
136
+ def test_get_next_block_no
137
+ @header.bbd_info = {
138
+ 1 => 2,
139
+ 3 => 5,
140
+ }
141
+ assert_equal(2, @header.get_next_block_no(1))
142
+ assert_equal(3, @header.get_next_block_no(2))
143
+ assert_equal(5, @header.get_next_block_no(3))
144
+ end
145
+ def test_get_nth_block_no
146
+ @header.bbd_info = {
147
+ 1 => 2,
148
+ 3 => 5,
149
+ }
150
+ assert_equal(2, @header.get_nth_block_no(1,1))
151
+ assert_equal(3, @header.get_nth_block_no(1,2))
152
+ assert_equal(5, @header.get_nth_block_no(1,3))
153
+ assert_equal(3, @header.get_nth_block_no(2,1))
154
+ assert_equal(5, @header.get_nth_block_no(2,2))
155
+ assert_equal(5, @header.get_nth_block_no(3,1))
156
+ end
157
+ def test_get_nth_pps
158
+ root = @header.get_nth_pps(0)
159
+ assert_instance_of(OLE::Storage::PPS::Root, root)
160
+ file = @header.get_nth_pps(1)
161
+ assert_instance_of(OLE::Storage::PPS::File, file)
162
+ assert_equal(1, root.dir_pps)
163
+ min_1, = [-1].pack('V').unpack('V')
164
+ assert_equal(min_1, root.next_pps)
165
+ assert_equal(min_1, root.prev_pps)
166
+ assert_equal(3904, root.data.size)
167
+ assert_equal(3335, file.data.size)
168
+ end
169
+ def test_sb_start
170
+ assert_equal(3, @header.sb_start)
171
+ end
172
+ def test_sb_size
173
+ assert_equal(3904, @header.sb_size)
174
+ end
175
+ end