read_xls 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +9 -0
  3. data/.rspec +2 -0
  4. data/.travis.yml +4 -0
  5. data/CODE_OF_CONDUCT.md +13 -0
  6. data/Gemfile +4 -0
  7. data/LICENSE +21 -0
  8. data/README.md +52 -0
  9. data/Rakefile +6 -0
  10. data/bin/console +14 -0
  11. data/bin/setup +7 -0
  12. data/lib/read_xls.rb +49 -0
  13. data/lib/read_xls/evaluator/blank.rb +9 -0
  14. data/lib/read_xls/evaluator/boolean.rb +15 -0
  15. data/lib/read_xls/evaluator/extended_format.rb +20 -0
  16. data/lib/read_xls/evaluator/format_number.rb +47 -0
  17. data/lib/read_xls/evaluator/formula.rb +33 -0
  18. data/lib/read_xls/evaluator/number.rb +24 -0
  19. data/lib/read_xls/evaluator/rk_number.rb +38 -0
  20. data/lib/read_xls/evaluator/row.rb +26 -0
  21. data/lib/read_xls/evaluator/sst_string.rb +16 -0
  22. data/lib/read_xls/record_handler.rb +262 -0
  23. data/lib/read_xls/record_handler/base.rb +19 -0
  24. data/lib/read_xls/record_handler/blank.rb +11 -0
  25. data/lib/read_xls/record_handler/bof.rb +9 -0
  26. data/lib/read_xls/record_handler/boolerr.rb +11 -0
  27. data/lib/read_xls/record_handler/boundsheet.rb +45 -0
  28. data/lib/read_xls/record_handler/format.rb +20 -0
  29. data/lib/read_xls/record_handler/formula.rb +17 -0
  30. data/lib/read_xls/record_handler/label_sst.rb +15 -0
  31. data/lib/read_xls/record_handler/mul_rk.rb +30 -0
  32. data/lib/read_xls/record_handler/not_implemented.rb +11 -0
  33. data/lib/read_xls/record_handler/number.rb +18 -0
  34. data/lib/read_xls/record_handler/rk.rb +23 -0
  35. data/lib/read_xls/record_handler/row.rb +10 -0
  36. data/lib/read_xls/record_handler/skip.rb +8 -0
  37. data/lib/read_xls/record_handler/sst.rb +36 -0
  38. data/lib/read_xls/record_handler/string.rb +13 -0
  39. data/lib/read_xls/record_handler/xf.rb +19 -0
  40. data/lib/read_xls/spreadsheet.rb +60 -0
  41. data/lib/read_xls/type/extended_format.rb +25 -0
  42. data/lib/read_xls/version.rb +3 -0
  43. data/lib/read_xls/workbook.rb +11 -0
  44. data/lib/read_xls/workbook/shared_string_table.rb +15 -0
  45. data/lib/read_xls/workbook/worksheet.rb +11 -0
  46. data/lib/read_xls/workbook/worksheet_builder.rb +44 -0
  47. data/lib/read_xls/workbook_builder.rb +96 -0
  48. data/read_xls.gemspec +27 -0
  49. metadata +147 -0
@@ -0,0 +1,11 @@
1
+ module ReadXls
2
+ module RecordHandler
3
+ class NotImplemented < ::ReadXls::RecordHandler::Base
4
+ RecordHandlerNotImplementedError = Class.new(StandardError)
5
+
6
+ def call
7
+ raise RecordHandlerNotImplementedError, "there is no implementation for #{record_number}"
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,18 @@
1
+ module ReadXls
2
+ module RecordHandler
3
+ class Number < ::ReadXls::RecordHandler::Base
4
+ def call
5
+ row, column, xf_index = record_data.byteslice(0, 6).unpack("v3")
6
+ number = record_data.byteslice(6, 8).unpack("E").first
7
+
8
+ number_column = ::ReadXls::Evaluator::Number.new(builder, number, xf_index)
9
+
10
+ builder.add_column_to_row(
11
+ row,
12
+ column,
13
+ number_column
14
+ )
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,23 @@
1
+ module ReadXls
2
+ module RecordHandler
3
+ class Rk < ::ReadXls::RecordHandler::Base
4
+ def call
5
+ row, column, xf_index = record_data
6
+ .byteslice(0, 6)
7
+ .unpack("v3")
8
+ rk_bits = record_data
9
+ .byteslice(6, 4)
10
+ .unpack("V")
11
+ .first
12
+
13
+ rk_column = ::ReadXls::Evaluator::RkNumber.new(builder, rk_bits, xf_index)
14
+
15
+ builder.add_column_to_row(
16
+ row,
17
+ column,
18
+ rk_column
19
+ )
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,10 @@
1
+ module ReadXls
2
+ module RecordHandler
3
+ class Row < ::ReadXls::RecordHandler::Base
4
+ def call
5
+ row_number, first_col, last_col = record_data.unpack("v3")
6
+ builder.add_row(row_number, ::ReadXls::Evaluator::Row.new(row_number, first_col, last_col))
7
+ end
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,8 @@
1
+ module ReadXls
2
+ module RecordHandler
3
+ class Skip < ::ReadXls::RecordHandler::Base
4
+ def call(*)
5
+ end
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,36 @@
1
+ module ReadXls
2
+ module RecordHandler
3
+ class Sst < ::ReadXls::RecordHandler::Base
4
+ F_HIGH_BYTE = 0x01
5
+ STRING_BEGIN_OFFSET = 3
6
+ DATA_OFFSET = 8
7
+ STRING_COUNT_OFFSET = 4
8
+
9
+ def call
10
+ string_count = record_data
11
+ .byteslice(STRING_COUNT_OFFSET, 4)
12
+ .unpack("V")
13
+ .first
14
+
15
+ string_data = record_data.byteslice(DATA_OFFSET..-1)
16
+ string_position = 0
17
+
18
+ strings = string_count.times.map do |i|
19
+ char_count, grbit = string_data
20
+ .byteslice(string_position, STRING_BEGIN_OFFSET)
21
+ .unpack("vC")
22
+
23
+ char_byte_size = (grbit & F_HIGH_BYTE) == 0 ? 1 : 2
24
+ string_begin = string_position + STRING_BEGIN_OFFSET
25
+ string_length = char_count * char_byte_size
26
+
27
+ string_position = string_begin + string_length
28
+
29
+ string_data.byteslice(string_begin, string_length)
30
+ end
31
+
32
+ builder.sst = ::ReadXls::Workbook::SharedStringTable.new(strings.uniq)
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,13 @@
1
+ module ReadXls
2
+ module RecordHandler
3
+ class String < ::ReadXls::RecordHandler::Base
4
+ def call
5
+ char_length, grbit = record_data.byteslice(0, 3).unpack("vC")
6
+ char_byte_size = grbit == 0 ? 1 : 2
7
+
8
+ string = record_data.byteslice(3, char_byte_size * char_length)
9
+ builder.add_formula_string(string)
10
+ end
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,19 @@
1
+ module ReadXls
2
+ module RecordHandler
3
+ class Xf < ::ReadXls::RecordHandler::Base
4
+ def call
5
+ format_index = record_data
6
+ .byteslice(2, 2)
7
+ .unpack("v")
8
+ .first
9
+
10
+ builder.add_extended_format(
11
+ ::ReadXls::Evaluator::ExtendedFormat.new(
12
+ :builder => builder,
13
+ :format_index => format_index
14
+ )
15
+ )
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,60 @@
1
+ module ReadXls
2
+ class Spreadsheet
3
+ ParsingFailedError = Class.new(StandardError)
4
+ BYTE_LENGTH = 2
5
+
6
+ attr_accessor :biff, :position, :workbook
7
+
8
+ def self.parse(xls_file_path)
9
+ new(
10
+ Ole::Storage.open(xls_file_path, "rb+")
11
+ )
12
+ end
13
+
14
+ def initialize(ole)
15
+ self.position = 0
16
+ self.biff = ole.file.read("Workbook")
17
+ self.workbook = parse_workbook
18
+ ensure
19
+ ole.close
20
+ end
21
+
22
+ def sheets
23
+ workbook.worksheets
24
+ end
25
+
26
+ def parse_workbook
27
+ workbook_builder = WorkbookBuilder.new(biff)
28
+
29
+ loop do
30
+ record_number = read_byte
31
+ break if record_number == ::ReadXls::RecordHandler::EOF
32
+
33
+ record_length = read_byte
34
+ record_data = read_data(record_length)
35
+
36
+ ::ReadXls::RecordHandler.call(
37
+ record_number,
38
+ workbook_builder,
39
+ biff,
40
+ record_data
41
+ )
42
+ end
43
+
44
+ workbook_builder.build
45
+ end
46
+
47
+ def read_data(bytes)
48
+ val = biff[position, bytes]
49
+ self.position += bytes
50
+ val
51
+ end
52
+
53
+
54
+ def read_byte
55
+ val = biff[position, BYTE_LENGTH].unpack("v")
56
+ self.position += BYTE_LENGTH
57
+ val.first || raise(ParsingFailedError, "expected to get value, got nil")
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,25 @@
1
+ module ReadXls
2
+ module Type
3
+ class ExtendedFormat
4
+ attr_accessor :format_string
5
+
6
+ FORMAT_MATCHERS = {
7
+ /[YMDymd]/ => :date
8
+ }
9
+
10
+ def initialize(options)
11
+ self.format_string = options.fetch(:format_string)
12
+ end
13
+
14
+ def format_type
15
+ matched_types = FORMAT_MATCHERS.select { |matcher, _| format_string =~ matcher }
16
+
17
+ if matched_types.length > 1
18
+ raise "got more than one match, expected only one matched format type"
19
+ end
20
+
21
+ matched_types.values.first
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,3 @@
1
+ module ReadXls
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,11 @@
1
+ module ReadXls
2
+ class Workbook
3
+ attr_accessor :worksheets, :formats, :extended_formats
4
+
5
+ def initialize
6
+ self.worksheets = []
7
+ self.formats = []
8
+ self.extended_formats = []
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,15 @@
1
+ module ReadXls
2
+ class Workbook
3
+ class SharedStringTable
4
+ attr_accessor :strings
5
+
6
+ def initialize(strings)
7
+ self.strings = strings
8
+ end
9
+
10
+ def index(i)
11
+ strings[i]
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,11 @@
1
+ module ReadXls
2
+ class Workbook
3
+ class Worksheet
4
+ attr_accessor :rows
5
+
6
+ def initialize(options)
7
+ self.rows = options.fetch(:rows)
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,44 @@
1
+ module ReadXls
2
+ class Workbook
3
+ class WorksheetBuilder
4
+ attr_accessor :rows, :formula_strings, :sst, :formats, :extended_formats
5
+
6
+ def initialize
7
+ self.rows = []
8
+ self.formula_strings = []
9
+ end
10
+
11
+ def add_row(row_index, row)
12
+ rows[row_index] = row
13
+ end
14
+
15
+ def add_column_to_row(row_index, column_index, value)
16
+ row = rows[row_index] || raise("could not find row")
17
+ row.add_column(column_index, value)
18
+ end
19
+
20
+ def add_formula_string(string)
21
+ self.formula_strings.push(string)
22
+ end
23
+
24
+ def build
25
+ ::ReadXls::Workbook::Worksheet.new(:rows => build_rows)
26
+ end
27
+
28
+ def next_formula_string!
29
+ self.formula_strings.shift
30
+ end
31
+
32
+
33
+ private
34
+
35
+ def build_rows
36
+ rows.each_with_index.each do |_, row_index|
37
+ rows[row_index] ||= ::ReadXls::Evaluator::Row.new(row_index, 0, 0)
38
+ end
39
+
40
+ rows.map(&:evaluate)
41
+ end
42
+ end
43
+ end
44
+ end
@@ -0,0 +1,96 @@
1
+ module ReadXls
2
+ class WorkbookBuilder
3
+ attr_accessor :biff, :worksheet_builders, :sst, :formats, :extended_formats
4
+
5
+ def initialize(biff)
6
+ self.biff = biff
7
+ self.worksheet_builders = []
8
+ self.formats = default_formats
9
+ self.extended_formats = []
10
+ end
11
+
12
+ def add_worksheet_builder(worksheet_builder)
13
+ self.worksheet_builders.push(worksheet_builder)
14
+ end
15
+
16
+ def add_format(format_index, format_string)
17
+ self.formats[format_index] = format_string
18
+ end
19
+
20
+ def add_extended_format(extended_format)
21
+ self.extended_formats.push(extended_format)
22
+ end
23
+
24
+ def build
25
+ workbook = ::ReadXls::Workbook.new
26
+ workbook.formats = build_formats
27
+ workbook.extended_formats = build_extended_formats
28
+ workbook.worksheets = build_worksheets
29
+ workbook
30
+ end
31
+
32
+
33
+ private
34
+
35
+ def build_formats
36
+ @_formats ||= formats.dup
37
+ end
38
+
39
+ def build_extended_formats
40
+ @_extended_formats ||= extended_formats.map(&:evaluate)
41
+ end
42
+
43
+ def build_worksheets
44
+ worksheet_builders.map do |worksheet_builder|
45
+ raise "no sst found!" if sst.nil?
46
+
47
+ worksheet_builder.sst = sst
48
+ worksheet_builder.formats = build_formats
49
+ worksheet_builder.extended_formats = build_extended_formats
50
+
51
+ worksheet_builder.build
52
+ end
53
+ end
54
+
55
+ def default_formats
56
+ [
57
+ "General",
58
+ "0",
59
+ "0.00",
60
+ "#,##0",
61
+ "#,##0.00",
62
+ "$#,##0_);($#,##0)",
63
+ "$#,##0_);[Red]($#,##0)",
64
+ "$#,##0.00_);($#,##0.00)",
65
+ "$#,##0.00_);[Red]($#,##0.00)",
66
+ "0%",
67
+ "0.00%",
68
+ "0.00E+00",
69
+ "# ?/?",
70
+ "# ??/??",
71
+ "M/D/YY",
72
+ "D-MMM-YY",
73
+ "D-MMM",
74
+ "MMM-YY",
75
+ "h:mm AM/PM",
76
+ "h:mm:ss AM/PM",
77
+ "h:mm",
78
+ "h:mm:ss",
79
+ "M/D/YY h:mm",
80
+ "_(#,##0_);(#,##0)",
81
+ "_(#,##0_);[Red](#,##0)",
82
+ "_(#,##0.00_);(#,##0.00)",
83
+ "_(#,##0.00_);[Red](#,##0.00)",
84
+ '_($* #,##0_);_($* (#,##0);_($* "-"_);_(@_)',
85
+ '_(* #,##0_);_(* (#,##0);_(* "-"_);_(@_)',
86
+ '_($* #,##0.00_);_($* (#,##0.00);_($* "-"??_);_(@_)',
87
+ '_(* #,##0.00_);_(* (#,##0.00);_(* "-"??_);_(@_)',
88
+ "mm:ss",
89
+ "[h]:mm:ss",
90
+ "mm:ss.0",
91
+ "##0.0E+0",
92
+ "@"
93
+ ]
94
+ end
95
+ end
96
+ end
@@ -0,0 +1,27 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require "read_xls/version"
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "read_xls"
8
+ spec.version = ReadXls::VERSION
9
+ spec.authors = ["P2Binvestor"]
10
+ spec.email = ["techadmin@p2bi.com"]
11
+
12
+ spec.summary = "Parse XLS files."
13
+ spec.description = ""
14
+ spec.homepage = "http://github.com/p2bi/read_xls"
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
+ spec.bindir = "exe"
19
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_dependency "ruby-ole"
23
+
24
+ spec.add_development_dependency "bundler", "~> 1.10"
25
+ spec.add_development_dependency "rake", "~> 10.0"
26
+ spec.add_development_dependency "rspec"
27
+ end