read_xls 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.travis.yml +4 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +4 -0
- data/LICENSE +21 -0
- data/README.md +52 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/lib/read_xls.rb +49 -0
- data/lib/read_xls/evaluator/blank.rb +9 -0
- data/lib/read_xls/evaluator/boolean.rb +15 -0
- data/lib/read_xls/evaluator/extended_format.rb +20 -0
- data/lib/read_xls/evaluator/format_number.rb +47 -0
- data/lib/read_xls/evaluator/formula.rb +33 -0
- data/lib/read_xls/evaluator/number.rb +24 -0
- data/lib/read_xls/evaluator/rk_number.rb +38 -0
- data/lib/read_xls/evaluator/row.rb +26 -0
- data/lib/read_xls/evaluator/sst_string.rb +16 -0
- data/lib/read_xls/record_handler.rb +262 -0
- data/lib/read_xls/record_handler/base.rb +19 -0
- data/lib/read_xls/record_handler/blank.rb +11 -0
- data/lib/read_xls/record_handler/bof.rb +9 -0
- data/lib/read_xls/record_handler/boolerr.rb +11 -0
- data/lib/read_xls/record_handler/boundsheet.rb +45 -0
- data/lib/read_xls/record_handler/format.rb +20 -0
- data/lib/read_xls/record_handler/formula.rb +17 -0
- data/lib/read_xls/record_handler/label_sst.rb +15 -0
- data/lib/read_xls/record_handler/mul_rk.rb +30 -0
- data/lib/read_xls/record_handler/not_implemented.rb +11 -0
- data/lib/read_xls/record_handler/number.rb +18 -0
- data/lib/read_xls/record_handler/rk.rb +23 -0
- data/lib/read_xls/record_handler/row.rb +10 -0
- data/lib/read_xls/record_handler/skip.rb +8 -0
- data/lib/read_xls/record_handler/sst.rb +36 -0
- data/lib/read_xls/record_handler/string.rb +13 -0
- data/lib/read_xls/record_handler/xf.rb +19 -0
- data/lib/read_xls/spreadsheet.rb +60 -0
- data/lib/read_xls/type/extended_format.rb +25 -0
- data/lib/read_xls/version.rb +3 -0
- data/lib/read_xls/workbook.rb +11 -0
- data/lib/read_xls/workbook/shared_string_table.rb +15 -0
- data/lib/read_xls/workbook/worksheet.rb +11 -0
- data/lib/read_xls/workbook/worksheet_builder.rb +44 -0
- data/lib/read_xls/workbook_builder.rb +96 -0
- data/read_xls.gemspec +27 -0
- metadata +147 -0
@@ -0,0 +1,11 @@
|
|
1
|
+
module ReadXls
|
2
|
+
module RecordHandler
|
3
|
+
class NotImplemented < ::ReadXls::RecordHandler::Base
|
4
|
+
RecordHandlerNotImplementedError = Class.new(StandardError)
|
5
|
+
|
6
|
+
def call
|
7
|
+
raise RecordHandlerNotImplementedError, "there is no implementation for #{record_number}"
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module ReadXls
|
2
|
+
module RecordHandler
|
3
|
+
class Number < ::ReadXls::RecordHandler::Base
|
4
|
+
def call
|
5
|
+
row, column, xf_index = record_data.byteslice(0, 6).unpack("v3")
|
6
|
+
number = record_data.byteslice(6, 8).unpack("E").first
|
7
|
+
|
8
|
+
number_column = ::ReadXls::Evaluator::Number.new(builder, number, xf_index)
|
9
|
+
|
10
|
+
builder.add_column_to_row(
|
11
|
+
row,
|
12
|
+
column,
|
13
|
+
number_column
|
14
|
+
)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module ReadXls
|
2
|
+
module RecordHandler
|
3
|
+
class Rk < ::ReadXls::RecordHandler::Base
|
4
|
+
def call
|
5
|
+
row, column, xf_index = record_data
|
6
|
+
.byteslice(0, 6)
|
7
|
+
.unpack("v3")
|
8
|
+
rk_bits = record_data
|
9
|
+
.byteslice(6, 4)
|
10
|
+
.unpack("V")
|
11
|
+
.first
|
12
|
+
|
13
|
+
rk_column = ::ReadXls::Evaluator::RkNumber.new(builder, rk_bits, xf_index)
|
14
|
+
|
15
|
+
builder.add_column_to_row(
|
16
|
+
row,
|
17
|
+
column,
|
18
|
+
rk_column
|
19
|
+
)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
module ReadXls
|
2
|
+
module RecordHandler
|
3
|
+
class Row < ::ReadXls::RecordHandler::Base
|
4
|
+
def call
|
5
|
+
row_number, first_col, last_col = record_data.unpack("v3")
|
6
|
+
builder.add_row(row_number, ::ReadXls::Evaluator::Row.new(row_number, first_col, last_col))
|
7
|
+
end
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module ReadXls
|
2
|
+
module RecordHandler
|
3
|
+
class Sst < ::ReadXls::RecordHandler::Base
|
4
|
+
F_HIGH_BYTE = 0x01
|
5
|
+
STRING_BEGIN_OFFSET = 3
|
6
|
+
DATA_OFFSET = 8
|
7
|
+
STRING_COUNT_OFFSET = 4
|
8
|
+
|
9
|
+
def call
|
10
|
+
string_count = record_data
|
11
|
+
.byteslice(STRING_COUNT_OFFSET, 4)
|
12
|
+
.unpack("V")
|
13
|
+
.first
|
14
|
+
|
15
|
+
string_data = record_data.byteslice(DATA_OFFSET..-1)
|
16
|
+
string_position = 0
|
17
|
+
|
18
|
+
strings = string_count.times.map do |i|
|
19
|
+
char_count, grbit = string_data
|
20
|
+
.byteslice(string_position, STRING_BEGIN_OFFSET)
|
21
|
+
.unpack("vC")
|
22
|
+
|
23
|
+
char_byte_size = (grbit & F_HIGH_BYTE) == 0 ? 1 : 2
|
24
|
+
string_begin = string_position + STRING_BEGIN_OFFSET
|
25
|
+
string_length = char_count * char_byte_size
|
26
|
+
|
27
|
+
string_position = string_begin + string_length
|
28
|
+
|
29
|
+
string_data.byteslice(string_begin, string_length)
|
30
|
+
end
|
31
|
+
|
32
|
+
builder.sst = ::ReadXls::Workbook::SharedStringTable.new(strings.uniq)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module ReadXls
|
2
|
+
module RecordHandler
|
3
|
+
class String < ::ReadXls::RecordHandler::Base
|
4
|
+
def call
|
5
|
+
char_length, grbit = record_data.byteslice(0, 3).unpack("vC")
|
6
|
+
char_byte_size = grbit == 0 ? 1 : 2
|
7
|
+
|
8
|
+
string = record_data.byteslice(3, char_byte_size * char_length)
|
9
|
+
builder.add_formula_string(string)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module ReadXls
|
2
|
+
module RecordHandler
|
3
|
+
class Xf < ::ReadXls::RecordHandler::Base
|
4
|
+
def call
|
5
|
+
format_index = record_data
|
6
|
+
.byteslice(2, 2)
|
7
|
+
.unpack("v")
|
8
|
+
.first
|
9
|
+
|
10
|
+
builder.add_extended_format(
|
11
|
+
::ReadXls::Evaluator::ExtendedFormat.new(
|
12
|
+
:builder => builder,
|
13
|
+
:format_index => format_index
|
14
|
+
)
|
15
|
+
)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module ReadXls
|
2
|
+
class Spreadsheet
|
3
|
+
ParsingFailedError = Class.new(StandardError)
|
4
|
+
BYTE_LENGTH = 2
|
5
|
+
|
6
|
+
attr_accessor :biff, :position, :workbook
|
7
|
+
|
8
|
+
def self.parse(xls_file_path)
|
9
|
+
new(
|
10
|
+
Ole::Storage.open(xls_file_path, "rb+")
|
11
|
+
)
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize(ole)
|
15
|
+
self.position = 0
|
16
|
+
self.biff = ole.file.read("Workbook")
|
17
|
+
self.workbook = parse_workbook
|
18
|
+
ensure
|
19
|
+
ole.close
|
20
|
+
end
|
21
|
+
|
22
|
+
def sheets
|
23
|
+
workbook.worksheets
|
24
|
+
end
|
25
|
+
|
26
|
+
def parse_workbook
|
27
|
+
workbook_builder = WorkbookBuilder.new(biff)
|
28
|
+
|
29
|
+
loop do
|
30
|
+
record_number = read_byte
|
31
|
+
break if record_number == ::ReadXls::RecordHandler::EOF
|
32
|
+
|
33
|
+
record_length = read_byte
|
34
|
+
record_data = read_data(record_length)
|
35
|
+
|
36
|
+
::ReadXls::RecordHandler.call(
|
37
|
+
record_number,
|
38
|
+
workbook_builder,
|
39
|
+
biff,
|
40
|
+
record_data
|
41
|
+
)
|
42
|
+
end
|
43
|
+
|
44
|
+
workbook_builder.build
|
45
|
+
end
|
46
|
+
|
47
|
+
def read_data(bytes)
|
48
|
+
val = biff[position, bytes]
|
49
|
+
self.position += bytes
|
50
|
+
val
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
def read_byte
|
55
|
+
val = biff[position, BYTE_LENGTH].unpack("v")
|
56
|
+
self.position += BYTE_LENGTH
|
57
|
+
val.first || raise(ParsingFailedError, "expected to get value, got nil")
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module ReadXls
|
2
|
+
module Type
|
3
|
+
class ExtendedFormat
|
4
|
+
attr_accessor :format_string
|
5
|
+
|
6
|
+
FORMAT_MATCHERS = {
|
7
|
+
/[YMDymd]/ => :date
|
8
|
+
}
|
9
|
+
|
10
|
+
def initialize(options)
|
11
|
+
self.format_string = options.fetch(:format_string)
|
12
|
+
end
|
13
|
+
|
14
|
+
def format_type
|
15
|
+
matched_types = FORMAT_MATCHERS.select { |matcher, _| format_string =~ matcher }
|
16
|
+
|
17
|
+
if matched_types.length > 1
|
18
|
+
raise "got more than one match, expected only one matched format type"
|
19
|
+
end
|
20
|
+
|
21
|
+
matched_types.values.first
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module ReadXls
|
2
|
+
class Workbook
|
3
|
+
class WorksheetBuilder
|
4
|
+
attr_accessor :rows, :formula_strings, :sst, :formats, :extended_formats
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
self.rows = []
|
8
|
+
self.formula_strings = []
|
9
|
+
end
|
10
|
+
|
11
|
+
def add_row(row_index, row)
|
12
|
+
rows[row_index] = row
|
13
|
+
end
|
14
|
+
|
15
|
+
def add_column_to_row(row_index, column_index, value)
|
16
|
+
row = rows[row_index] || raise("could not find row")
|
17
|
+
row.add_column(column_index, value)
|
18
|
+
end
|
19
|
+
|
20
|
+
def add_formula_string(string)
|
21
|
+
self.formula_strings.push(string)
|
22
|
+
end
|
23
|
+
|
24
|
+
def build
|
25
|
+
::ReadXls::Workbook::Worksheet.new(:rows => build_rows)
|
26
|
+
end
|
27
|
+
|
28
|
+
def next_formula_string!
|
29
|
+
self.formula_strings.shift
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def build_rows
|
36
|
+
rows.each_with_index.each do |_, row_index|
|
37
|
+
rows[row_index] ||= ::ReadXls::Evaluator::Row.new(row_index, 0, 0)
|
38
|
+
end
|
39
|
+
|
40
|
+
rows.map(&:evaluate)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
module ReadXls
|
2
|
+
class WorkbookBuilder
|
3
|
+
attr_accessor :biff, :worksheet_builders, :sst, :formats, :extended_formats
|
4
|
+
|
5
|
+
def initialize(biff)
|
6
|
+
self.biff = biff
|
7
|
+
self.worksheet_builders = []
|
8
|
+
self.formats = default_formats
|
9
|
+
self.extended_formats = []
|
10
|
+
end
|
11
|
+
|
12
|
+
def add_worksheet_builder(worksheet_builder)
|
13
|
+
self.worksheet_builders.push(worksheet_builder)
|
14
|
+
end
|
15
|
+
|
16
|
+
def add_format(format_index, format_string)
|
17
|
+
self.formats[format_index] = format_string
|
18
|
+
end
|
19
|
+
|
20
|
+
def add_extended_format(extended_format)
|
21
|
+
self.extended_formats.push(extended_format)
|
22
|
+
end
|
23
|
+
|
24
|
+
def build
|
25
|
+
workbook = ::ReadXls::Workbook.new
|
26
|
+
workbook.formats = build_formats
|
27
|
+
workbook.extended_formats = build_extended_formats
|
28
|
+
workbook.worksheets = build_worksheets
|
29
|
+
workbook
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def build_formats
|
36
|
+
@_formats ||= formats.dup
|
37
|
+
end
|
38
|
+
|
39
|
+
def build_extended_formats
|
40
|
+
@_extended_formats ||= extended_formats.map(&:evaluate)
|
41
|
+
end
|
42
|
+
|
43
|
+
def build_worksheets
|
44
|
+
worksheet_builders.map do |worksheet_builder|
|
45
|
+
raise "no sst found!" if sst.nil?
|
46
|
+
|
47
|
+
worksheet_builder.sst = sst
|
48
|
+
worksheet_builder.formats = build_formats
|
49
|
+
worksheet_builder.extended_formats = build_extended_formats
|
50
|
+
|
51
|
+
worksheet_builder.build
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def default_formats
|
56
|
+
[
|
57
|
+
"General",
|
58
|
+
"0",
|
59
|
+
"0.00",
|
60
|
+
"#,##0",
|
61
|
+
"#,##0.00",
|
62
|
+
"$#,##0_);($#,##0)",
|
63
|
+
"$#,##0_);[Red]($#,##0)",
|
64
|
+
"$#,##0.00_);($#,##0.00)",
|
65
|
+
"$#,##0.00_);[Red]($#,##0.00)",
|
66
|
+
"0%",
|
67
|
+
"0.00%",
|
68
|
+
"0.00E+00",
|
69
|
+
"# ?/?",
|
70
|
+
"# ??/??",
|
71
|
+
"M/D/YY",
|
72
|
+
"D-MMM-YY",
|
73
|
+
"D-MMM",
|
74
|
+
"MMM-YY",
|
75
|
+
"h:mm AM/PM",
|
76
|
+
"h:mm:ss AM/PM",
|
77
|
+
"h:mm",
|
78
|
+
"h:mm:ss",
|
79
|
+
"M/D/YY h:mm",
|
80
|
+
"_(#,##0_);(#,##0)",
|
81
|
+
"_(#,##0_);[Red](#,##0)",
|
82
|
+
"_(#,##0.00_);(#,##0.00)",
|
83
|
+
"_(#,##0.00_);[Red](#,##0.00)",
|
84
|
+
'_($* #,##0_);_($* (#,##0);_($* "-"_);_(@_)',
|
85
|
+
'_(* #,##0_);_(* (#,##0);_(* "-"_);_(@_)',
|
86
|
+
'_($* #,##0.00_);_($* (#,##0.00);_($* "-"??_);_(@_)',
|
87
|
+
'_(* #,##0.00_);_(* (#,##0.00);_(* "-"??_);_(@_)',
|
88
|
+
"mm:ss",
|
89
|
+
"[h]:mm:ss",
|
90
|
+
"mm:ss.0",
|
91
|
+
"##0.0E+0",
|
92
|
+
"@"
|
93
|
+
]
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
data/read_xls.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require "read_xls/version"
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "read_xls"
|
8
|
+
spec.version = ReadXls::VERSION
|
9
|
+
spec.authors = ["P2Binvestor"]
|
10
|
+
spec.email = ["techadmin@p2bi.com"]
|
11
|
+
|
12
|
+
spec.summary = "Parse XLS files."
|
13
|
+
spec.description = ""
|
14
|
+
spec.homepage = "http://github.com/p2bi/read_xls"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
18
|
+
spec.bindir = "exe"
|
19
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
20
|
+
spec.require_paths = ["lib"]
|
21
|
+
|
22
|
+
spec.add_dependency "ruby-ole"
|
23
|
+
|
24
|
+
spec.add_development_dependency "bundler", "~> 1.10"
|
25
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
26
|
+
spec.add_development_dependency "rspec"
|
27
|
+
end
|