read_xls 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +9 -0
- data/.rspec +2 -0
- data/.travis.yml +4 -0
- data/CODE_OF_CONDUCT.md +13 -0
- data/Gemfile +4 -0
- data/LICENSE +21 -0
- data/README.md +52 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +7 -0
- data/lib/read_xls.rb +49 -0
- data/lib/read_xls/evaluator/blank.rb +9 -0
- data/lib/read_xls/evaluator/boolean.rb +15 -0
- data/lib/read_xls/evaluator/extended_format.rb +20 -0
- data/lib/read_xls/evaluator/format_number.rb +47 -0
- data/lib/read_xls/evaluator/formula.rb +33 -0
- data/lib/read_xls/evaluator/number.rb +24 -0
- data/lib/read_xls/evaluator/rk_number.rb +38 -0
- data/lib/read_xls/evaluator/row.rb +26 -0
- data/lib/read_xls/evaluator/sst_string.rb +16 -0
- data/lib/read_xls/record_handler.rb +262 -0
- data/lib/read_xls/record_handler/base.rb +19 -0
- data/lib/read_xls/record_handler/blank.rb +11 -0
- data/lib/read_xls/record_handler/bof.rb +9 -0
- data/lib/read_xls/record_handler/boolerr.rb +11 -0
- data/lib/read_xls/record_handler/boundsheet.rb +45 -0
- data/lib/read_xls/record_handler/format.rb +20 -0
- data/lib/read_xls/record_handler/formula.rb +17 -0
- data/lib/read_xls/record_handler/label_sst.rb +15 -0
- data/lib/read_xls/record_handler/mul_rk.rb +30 -0
- data/lib/read_xls/record_handler/not_implemented.rb +11 -0
- data/lib/read_xls/record_handler/number.rb +18 -0
- data/lib/read_xls/record_handler/rk.rb +23 -0
- data/lib/read_xls/record_handler/row.rb +10 -0
- data/lib/read_xls/record_handler/skip.rb +8 -0
- data/lib/read_xls/record_handler/sst.rb +36 -0
- data/lib/read_xls/record_handler/string.rb +13 -0
- data/lib/read_xls/record_handler/xf.rb +19 -0
- data/lib/read_xls/spreadsheet.rb +60 -0
- data/lib/read_xls/type/extended_format.rb +25 -0
- data/lib/read_xls/version.rb +3 -0
- data/lib/read_xls/workbook.rb +11 -0
- data/lib/read_xls/workbook/shared_string_table.rb +15 -0
- data/lib/read_xls/workbook/worksheet.rb +11 -0
- data/lib/read_xls/workbook/worksheet_builder.rb +44 -0
- data/lib/read_xls/workbook_builder.rb +96 -0
- data/read_xls.gemspec +27 -0
- metadata +147 -0
@@ -0,0 +1,11 @@
|
|
1
|
+
module ReadXls
|
2
|
+
module RecordHandler
|
3
|
+
class NotImplemented < ::ReadXls::RecordHandler::Base
|
4
|
+
RecordHandlerNotImplementedError = Class.new(StandardError)
|
5
|
+
|
6
|
+
def call
|
7
|
+
raise RecordHandlerNotImplementedError, "there is no implementation for #{record_number}"
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
11
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
module ReadXls
|
2
|
+
module RecordHandler
|
3
|
+
class Number < ::ReadXls::RecordHandler::Base
|
4
|
+
def call
|
5
|
+
row, column, xf_index = record_data.byteslice(0, 6).unpack("v3")
|
6
|
+
number = record_data.byteslice(6, 8).unpack("E").first
|
7
|
+
|
8
|
+
number_column = ::ReadXls::Evaluator::Number.new(builder, number, xf_index)
|
9
|
+
|
10
|
+
builder.add_column_to_row(
|
11
|
+
row,
|
12
|
+
column,
|
13
|
+
number_column
|
14
|
+
)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module ReadXls
|
2
|
+
module RecordHandler
|
3
|
+
class Rk < ::ReadXls::RecordHandler::Base
|
4
|
+
def call
|
5
|
+
row, column, xf_index = record_data
|
6
|
+
.byteslice(0, 6)
|
7
|
+
.unpack("v3")
|
8
|
+
rk_bits = record_data
|
9
|
+
.byteslice(6, 4)
|
10
|
+
.unpack("V")
|
11
|
+
.first
|
12
|
+
|
13
|
+
rk_column = ::ReadXls::Evaluator::RkNumber.new(builder, rk_bits, xf_index)
|
14
|
+
|
15
|
+
builder.add_column_to_row(
|
16
|
+
row,
|
17
|
+
column,
|
18
|
+
rk_column
|
19
|
+
)
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,10 @@
|
|
1
|
+
module ReadXls
|
2
|
+
module RecordHandler
|
3
|
+
class Row < ::ReadXls::RecordHandler::Base
|
4
|
+
def call
|
5
|
+
row_number, first_col, last_col = record_data.unpack("v3")
|
6
|
+
builder.add_row(row_number, ::ReadXls::Evaluator::Row.new(row_number, first_col, last_col))
|
7
|
+
end
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
module ReadXls
|
2
|
+
module RecordHandler
|
3
|
+
class Sst < ::ReadXls::RecordHandler::Base
|
4
|
+
F_HIGH_BYTE = 0x01
|
5
|
+
STRING_BEGIN_OFFSET = 3
|
6
|
+
DATA_OFFSET = 8
|
7
|
+
STRING_COUNT_OFFSET = 4
|
8
|
+
|
9
|
+
def call
|
10
|
+
string_count = record_data
|
11
|
+
.byteslice(STRING_COUNT_OFFSET, 4)
|
12
|
+
.unpack("V")
|
13
|
+
.first
|
14
|
+
|
15
|
+
string_data = record_data.byteslice(DATA_OFFSET..-1)
|
16
|
+
string_position = 0
|
17
|
+
|
18
|
+
strings = string_count.times.map do |i|
|
19
|
+
char_count, grbit = string_data
|
20
|
+
.byteslice(string_position, STRING_BEGIN_OFFSET)
|
21
|
+
.unpack("vC")
|
22
|
+
|
23
|
+
char_byte_size = (grbit & F_HIGH_BYTE) == 0 ? 1 : 2
|
24
|
+
string_begin = string_position + STRING_BEGIN_OFFSET
|
25
|
+
string_length = char_count * char_byte_size
|
26
|
+
|
27
|
+
string_position = string_begin + string_length
|
28
|
+
|
29
|
+
string_data.byteslice(string_begin, string_length)
|
30
|
+
end
|
31
|
+
|
32
|
+
builder.sst = ::ReadXls::Workbook::SharedStringTable.new(strings.uniq)
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
module ReadXls
|
2
|
+
module RecordHandler
|
3
|
+
class String < ::ReadXls::RecordHandler::Base
|
4
|
+
def call
|
5
|
+
char_length, grbit = record_data.byteslice(0, 3).unpack("vC")
|
6
|
+
char_byte_size = grbit == 0 ? 1 : 2
|
7
|
+
|
8
|
+
string = record_data.byteslice(3, char_byte_size * char_length)
|
9
|
+
builder.add_formula_string(string)
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module ReadXls
|
2
|
+
module RecordHandler
|
3
|
+
class Xf < ::ReadXls::RecordHandler::Base
|
4
|
+
def call
|
5
|
+
format_index = record_data
|
6
|
+
.byteslice(2, 2)
|
7
|
+
.unpack("v")
|
8
|
+
.first
|
9
|
+
|
10
|
+
builder.add_extended_format(
|
11
|
+
::ReadXls::Evaluator::ExtendedFormat.new(
|
12
|
+
:builder => builder,
|
13
|
+
:format_index => format_index
|
14
|
+
)
|
15
|
+
)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module ReadXls
|
2
|
+
class Spreadsheet
|
3
|
+
ParsingFailedError = Class.new(StandardError)
|
4
|
+
BYTE_LENGTH = 2
|
5
|
+
|
6
|
+
attr_accessor :biff, :position, :workbook
|
7
|
+
|
8
|
+
def self.parse(xls_file_path)
|
9
|
+
new(
|
10
|
+
Ole::Storage.open(xls_file_path, "rb+")
|
11
|
+
)
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize(ole)
|
15
|
+
self.position = 0
|
16
|
+
self.biff = ole.file.read("Workbook")
|
17
|
+
self.workbook = parse_workbook
|
18
|
+
ensure
|
19
|
+
ole.close
|
20
|
+
end
|
21
|
+
|
22
|
+
def sheets
|
23
|
+
workbook.worksheets
|
24
|
+
end
|
25
|
+
|
26
|
+
def parse_workbook
|
27
|
+
workbook_builder = WorkbookBuilder.new(biff)
|
28
|
+
|
29
|
+
loop do
|
30
|
+
record_number = read_byte
|
31
|
+
break if record_number == ::ReadXls::RecordHandler::EOF
|
32
|
+
|
33
|
+
record_length = read_byte
|
34
|
+
record_data = read_data(record_length)
|
35
|
+
|
36
|
+
::ReadXls::RecordHandler.call(
|
37
|
+
record_number,
|
38
|
+
workbook_builder,
|
39
|
+
biff,
|
40
|
+
record_data
|
41
|
+
)
|
42
|
+
end
|
43
|
+
|
44
|
+
workbook_builder.build
|
45
|
+
end
|
46
|
+
|
47
|
+
def read_data(bytes)
|
48
|
+
val = biff[position, bytes]
|
49
|
+
self.position += bytes
|
50
|
+
val
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
def read_byte
|
55
|
+
val = biff[position, BYTE_LENGTH].unpack("v")
|
56
|
+
self.position += BYTE_LENGTH
|
57
|
+
val.first || raise(ParsingFailedError, "expected to get value, got nil")
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module ReadXls
|
2
|
+
module Type
|
3
|
+
class ExtendedFormat
|
4
|
+
attr_accessor :format_string
|
5
|
+
|
6
|
+
FORMAT_MATCHERS = {
|
7
|
+
/[YMDymd]/ => :date
|
8
|
+
}
|
9
|
+
|
10
|
+
def initialize(options)
|
11
|
+
self.format_string = options.fetch(:format_string)
|
12
|
+
end
|
13
|
+
|
14
|
+
def format_type
|
15
|
+
matched_types = FORMAT_MATCHERS.select { |matcher, _| format_string =~ matcher }
|
16
|
+
|
17
|
+
if matched_types.length > 1
|
18
|
+
raise "got more than one match, expected only one matched format type"
|
19
|
+
end
|
20
|
+
|
21
|
+
matched_types.values.first
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module ReadXls
|
2
|
+
class Workbook
|
3
|
+
class WorksheetBuilder
|
4
|
+
attr_accessor :rows, :formula_strings, :sst, :formats, :extended_formats
|
5
|
+
|
6
|
+
def initialize
|
7
|
+
self.rows = []
|
8
|
+
self.formula_strings = []
|
9
|
+
end
|
10
|
+
|
11
|
+
def add_row(row_index, row)
|
12
|
+
rows[row_index] = row
|
13
|
+
end
|
14
|
+
|
15
|
+
def add_column_to_row(row_index, column_index, value)
|
16
|
+
row = rows[row_index] || raise("could not find row")
|
17
|
+
row.add_column(column_index, value)
|
18
|
+
end
|
19
|
+
|
20
|
+
def add_formula_string(string)
|
21
|
+
self.formula_strings.push(string)
|
22
|
+
end
|
23
|
+
|
24
|
+
def build
|
25
|
+
::ReadXls::Workbook::Worksheet.new(:rows => build_rows)
|
26
|
+
end
|
27
|
+
|
28
|
+
def next_formula_string!
|
29
|
+
self.formula_strings.shift
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def build_rows
|
36
|
+
rows.each_with_index.each do |_, row_index|
|
37
|
+
rows[row_index] ||= ::ReadXls::Evaluator::Row.new(row_index, 0, 0)
|
38
|
+
end
|
39
|
+
|
40
|
+
rows.map(&:evaluate)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
module ReadXls
|
2
|
+
class WorkbookBuilder
|
3
|
+
attr_accessor :biff, :worksheet_builders, :sst, :formats, :extended_formats
|
4
|
+
|
5
|
+
def initialize(biff)
|
6
|
+
self.biff = biff
|
7
|
+
self.worksheet_builders = []
|
8
|
+
self.formats = default_formats
|
9
|
+
self.extended_formats = []
|
10
|
+
end
|
11
|
+
|
12
|
+
def add_worksheet_builder(worksheet_builder)
|
13
|
+
self.worksheet_builders.push(worksheet_builder)
|
14
|
+
end
|
15
|
+
|
16
|
+
def add_format(format_index, format_string)
|
17
|
+
self.formats[format_index] = format_string
|
18
|
+
end
|
19
|
+
|
20
|
+
def add_extended_format(extended_format)
|
21
|
+
self.extended_formats.push(extended_format)
|
22
|
+
end
|
23
|
+
|
24
|
+
def build
|
25
|
+
workbook = ::ReadXls::Workbook.new
|
26
|
+
workbook.formats = build_formats
|
27
|
+
workbook.extended_formats = build_extended_formats
|
28
|
+
workbook.worksheets = build_worksheets
|
29
|
+
workbook
|
30
|
+
end
|
31
|
+
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def build_formats
|
36
|
+
@_formats ||= formats.dup
|
37
|
+
end
|
38
|
+
|
39
|
+
def build_extended_formats
|
40
|
+
@_extended_formats ||= extended_formats.map(&:evaluate)
|
41
|
+
end
|
42
|
+
|
43
|
+
def build_worksheets
|
44
|
+
worksheet_builders.map do |worksheet_builder|
|
45
|
+
raise "no sst found!" if sst.nil?
|
46
|
+
|
47
|
+
worksheet_builder.sst = sst
|
48
|
+
worksheet_builder.formats = build_formats
|
49
|
+
worksheet_builder.extended_formats = build_extended_formats
|
50
|
+
|
51
|
+
worksheet_builder.build
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def default_formats
|
56
|
+
[
|
57
|
+
"General",
|
58
|
+
"0",
|
59
|
+
"0.00",
|
60
|
+
"#,##0",
|
61
|
+
"#,##0.00",
|
62
|
+
"$#,##0_);($#,##0)",
|
63
|
+
"$#,##0_);[Red]($#,##0)",
|
64
|
+
"$#,##0.00_);($#,##0.00)",
|
65
|
+
"$#,##0.00_);[Red]($#,##0.00)",
|
66
|
+
"0%",
|
67
|
+
"0.00%",
|
68
|
+
"0.00E+00",
|
69
|
+
"# ?/?",
|
70
|
+
"# ??/??",
|
71
|
+
"M/D/YY",
|
72
|
+
"D-MMM-YY",
|
73
|
+
"D-MMM",
|
74
|
+
"MMM-YY",
|
75
|
+
"h:mm AM/PM",
|
76
|
+
"h:mm:ss AM/PM",
|
77
|
+
"h:mm",
|
78
|
+
"h:mm:ss",
|
79
|
+
"M/D/YY h:mm",
|
80
|
+
"_(#,##0_);(#,##0)",
|
81
|
+
"_(#,##0_);[Red](#,##0)",
|
82
|
+
"_(#,##0.00_);(#,##0.00)",
|
83
|
+
"_(#,##0.00_);[Red](#,##0.00)",
|
84
|
+
'_($* #,##0_);_($* (#,##0);_($* "-"_);_(@_)',
|
85
|
+
'_(* #,##0_);_(* (#,##0);_(* "-"_);_(@_)',
|
86
|
+
'_($* #,##0.00_);_($* (#,##0.00);_($* "-"??_);_(@_)',
|
87
|
+
'_(* #,##0.00_);_(* (#,##0.00);_(* "-"??_);_(@_)',
|
88
|
+
"mm:ss",
|
89
|
+
"[h]:mm:ss",
|
90
|
+
"mm:ss.0",
|
91
|
+
"##0.0E+0",
|
92
|
+
"@"
|
93
|
+
]
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
data/read_xls.gemspec
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require "read_xls/version"
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "read_xls"
|
8
|
+
spec.version = ReadXls::VERSION
|
9
|
+
spec.authors = ["P2Binvestor"]
|
10
|
+
spec.email = ["techadmin@p2bi.com"]
|
11
|
+
|
12
|
+
spec.summary = "Parse XLS files."
|
13
|
+
spec.description = ""
|
14
|
+
spec.homepage = "http://github.com/p2bi/read_xls"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
18
|
+
spec.bindir = "exe"
|
19
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
20
|
+
spec.require_paths = ["lib"]
|
21
|
+
|
22
|
+
spec.add_dependency "ruby-ole"
|
23
|
+
|
24
|
+
spec.add_development_dependency "bundler", "~> 1.10"
|
25
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
26
|
+
spec.add_development_dependency "rspec"
|
27
|
+
end
|