mindreframer-oxcelix 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,115 @@
1
+ module Oxcelix
2
+ # The Numformats module provides helper methods that either return the Cell object's raw @value as a ruby value
3
+ # (e.g. Numeric, DateTime, String) or formats it according to the excel _numformat_ string (#Cell.numformat).
4
+ module Numformats
5
+ # Map containing the Excel formatting strings and their ruby counterpart
6
+ Dtmap = {'hh'=>'%H', 'ii'=>'%M', 'i'=>'%-M', 'H'=>'%-k', 'h'=>'%-k',\
7
+ 'ss'=>'%-S', 's'=>'%S', 'mmmmm'=>'%b', 'mmmm'=>'%B', 'mmm'=>'%b', 'mm'=>'%m', \
8
+ 'm'=>'%-m', 'dddd'=>'%A', 'ddd'=>'%a', 'dd'=>'%d', 'd'=>'%-d', 'yyyy'=>'%Y', \
9
+ 'yy'=>'%y', 'AM/PM'=>'%p', 'A/P'=>'%p', '.0'=>'', 'ss'=>'%-S', 's'=>'%S'}
10
+
11
+ # Convert the temporary format array (the collection of non-default number formatting strings defined in the excel sheet in use)
12
+ # to a series of hashes containing an id, an excel format string, a converted format string and an object class the format is
13
+ # interpreted on.
14
+ def add_custom_formats fmtary
15
+ fmtary.each do |x|
16
+ if x[:formatCode] =~ /[#0%\?]/
17
+ ostring = numeric x[:formatCode]
18
+ if x[:formatCode] =~ /\//
19
+ cls = 'rational'
20
+ else
21
+ cls = 'numeric'
22
+ end
23
+ elsif x[:formatCode].downcase =~ /[dmysh]/
24
+ ostring = datetime x[:formatCode]
25
+ cls = 'date'
26
+ elsif x[:formatCode].downcase == "general"
27
+ ostring = nil
28
+ cls = 'string'
29
+ end
30
+ Formatarray << {:id => x[:numFmtId].to_s, :xl => x[:formatCode].to_s, :ostring => ostring, :cls => cls}
31
+ end
32
+ end
33
+
34
+ # Convert the excel-style number format to a ruby #Kernel::Format string and return that String.
35
+ # The conversion is internally done by regexp'ing 7 groups: prefix, decimals, separator, floats, exponential (E+)
36
+ # and postfix. Rational numbers ar not handled yet.
37
+ # @param [String] val an Excel number format string.
38
+ # @return [String] a rubyish Kernel::Format string.
39
+ def numeric val
40
+ ostring = "%"
41
+ strippedfmt = val.gsub(/\?/, '0').gsub(',','')
42
+ prefix, decimals, sep, floats, expo, postfix=/(^[^\#0e].?)?([\#0]*)?(\.)?([\#0]*)?(e.?)?(.?[^\#0e]$)?/i.match(strippedfmt).captures
43
+ ostring.prepend prefix.to_s
44
+ if !decimals.nil? && decimals.size != 0
45
+ if (eval decimals) == nil
46
+ ostring += "##{decimals.size}"
47
+ elsif (eval decimals) == 0
48
+ ostring += decimals.size.to_s
49
+ end
50
+ else
51
+ ostring += decimals
52
+ end
53
+ ostring += sep.to_s
54
+ if !floats.nil? && floats.size != 0 # expo!!!
55
+ ostring += ((floats.size.to_s) +"f")
56
+ end
57
+ if sep.nil? && floats.nil? || floats.size == 0
58
+ ostring += "d"
59
+ end
60
+ ostring += (expo.to_s + postfix.to_s) #postfix '+' ?
61
+ return ostring
62
+ end
63
+
64
+ # Convert excel-style date formats into ruby DateTime strftime format strings
65
+ # @param [String] formatcode an Excel number format string.
66
+ # @return [String] a DateTime::strftime format string.
67
+ def datetime formatcode
68
+ deminutified = formatcode.downcase.gsub(/(?<hrs>H|h)(?<div>.)m/, '\k<hrs>\k<div>i')
69
+ .gsub(/im/, 'ii')
70
+ .gsub(/m(?<div>.)(?<secs>s)/, 'i\k<div>\k<secs>')
71
+ .gsub(/mi/, 'ii')
72
+ return deminutified.gsub(/[yMmDdHhSsi]*/, Dtmap)
73
+ end
74
+ end
75
+
76
+ # The Numberhelper module implements methods that return the formatted value or the value converted into a Ruby type (DateTime, Numeric, etc)
77
+ module Numberhelper
78
+ include Numformats
79
+ # Get the cell's value and excel format string and return a string, a ruby Numeric or a DateTime object accordingly
80
+ # @return [Object] A ruby object that holds and represents the value stored in the cell. Conversion is based on cell formatting.
81
+ # @example Get the value of a cell:
82
+ # c = w.sheets[0]["B3"] # => <Oxcelix::Cell:0x00000002a5b368 @xlcoords="A3", @style="84", @type="n", @value="41155", @numformat=14>
83
+ # c.to_ru # => <DateTime: 2012-09-03T00:00:00+00:00 ((2456174j,0s,0n),+0s,2299161j)>
84
+ #
85
+ def to_ru
86
+ if !@value.numeric? || Numformats::Formatarray[@numformat.to_i][:xl] == nil || Numformats::Formatarray[@numformat.to_i][:xl].downcase == "general"
87
+ return @value
88
+ end
89
+ if Numformats::Formatarray[@numformat.to_i][:cls] == 'date'
90
+ return DateTime.new(1899, 12, 30) + (eval @value)
91
+ elsif Numformats::Formatarray[@numformat.to_i][:cls] == 'numeric' || Numformats::Formatarray[@numformat.to_i][:cls] == 'rational'
92
+ return eval @value rescue @value
93
+ end
94
+ end
95
+
96
+ # Get the cell's value, convert it with to_ru and finally, format it based on the value's type.
97
+ # @return [String] Value gets formatted depending on its class. If it is a DateTime, the #DateTime.strftime method is used,
98
+ # if it holds a number, the #Kernel::sprintf is run.
99
+ # @example Get the formatted value of a cell:
100
+ # c = w.sheets[0]["B3"] # => <Oxcelix::Cell:0x00000002a5b368 @xlcoords="A3", @style="84", @type="n", @value="41155", @numformat=14>
101
+ # c.to_fmt # => "3/9/2012"
102
+ #
103
+ def to_fmt
104
+ begin
105
+ if Numformats::Formatarray[@numformat.to_i][:cls] == 'date'
106
+ self.to_ru.strftime(Numformats::Formatarray[@numformat][:ostring]) rescue @value
107
+ elsif Numformats::Formatarray[@numformat.to_i][:cls] == 'numeric' || Numformats::Formatarray[@numformat.to_i][:cls] == 'rational'
108
+ sprintf(Numformats::Formatarray[@numformat][:ostring], self.to_ru) rescue @value
109
+ else
110
+ return @value
111
+ end
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,28 @@
1
+ module Oxcelix
2
+ # The Comments class is a parser which builds an array of comments
3
+ class Comments < ::Ox::Sax
4
+ # @!attribute [rw] commarray
5
+ # @return [Array] the array of all comments of a given sheet
6
+ # @!attribute [rw] comment
7
+ # @return [Hash] a hash representing a comment
8
+ attr_accessor :commarray, :comment
9
+ def initialize
10
+ @commarray = []
11
+ @comment = {}
12
+ end
13
+
14
+ # Push Cell comment hash (comment + reference) to @commarray
15
+ def text(str)
16
+ @comment[:comment] = str.gsub('&#10;', '')
17
+ @commarray << @comment
18
+ @comment = Hash.new
19
+ end
20
+
21
+ # Returns reference
22
+ def attr(name, str)
23
+ if name == :ref
24
+ @comment[:ref] = str
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,17 @@
1
+ module Oxcelix
2
+ # Ox based SAX parser which pushes shared strings (taken from the sharedString.xml file) to an array
3
+ # These strings will replace the references in the cells (interpolation).
4
+ class Sharedstrings < ::Ox::Sax
5
+ # @!attribute [rw] stringarray
6
+ # @return [Array] the array of all the strings found in sharedStrings.xml
7
+ attr_accessor :stringarray
8
+ def initialize
9
+ @stringarray = []
10
+ end
11
+
12
+ # Push the comment string into @stringarray
13
+ def text(str)
14
+ @stringarray << str
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,49 @@
1
+ require 'ox'
2
+ module Oxcelix
3
+
4
+ # Ox based SAX parser which pushes the number formats (taken from the styles.xml file) to an array
5
+ # The reference taken from the cell's 's' attribute points to an element of the
6
+ # style array, which in turn points to a number format (numFmt) that can be
7
+ # either built-in (@formats) or defined in the styles.xml itself.
8
+ class Styles < ::Ox::Sax
9
+ attr_accessor :styleary, :xmlstack, :temparray
10
+ def initialize
11
+ @temparray = []
12
+ @styleary = []
13
+ @xmlstack = []
14
+ @numform = {}
15
+ end
16
+
17
+ def nf key, value
18
+ @numform[key]=value
19
+ if @numform.size == 2
20
+ @temparray << @numform
21
+ @numform = {}
22
+ end
23
+ end
24
+
25
+ def numFmtId str
26
+ if @xmlstack[-2] == :cellXfs
27
+ @styleary << str
28
+ elsif @xmlstack[-2] == :numFmts
29
+ nf :numFmtId, str
30
+ end
31
+ end
32
+
33
+ def formatCode str
34
+ nf :formatCode, str
35
+ end
36
+
37
+ def start_element(name)
38
+ @xmlstack << name
39
+ end
40
+
41
+ def end_element(name)
42
+ @xmlstack.pop
43
+ end
44
+
45
+ def attr(name, str)
46
+ self.send name, str if self.respond_to?(name)
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,136 @@
1
+
2
+ module Oxcelix
3
+ ##
4
+ # The Xlsheet class is a SAX parser based on the Ox library. It parses a
5
+ # SpreadsheetML (AKA Office Open XML) formatted XML file and returns an array
6
+ # of Cell objects {#cellarray} and an array of merged cells {#mergedcells}.
7
+ #
8
+ # Xlsheet will omit the following:
9
+ # * empty cells
10
+ # * cells containing formulas
11
+ #
12
+ # Only non-empty cells of merged groups will be added to {#cellarray}. A separate array
13
+ # {#mergedcells} is reserved for merging.
14
+ class Xlsheet < ::Ox::Sax
15
+ # @!attribute [rw] xmlstack
16
+ # @return [Array] Stores the state machine's actual state
17
+ # @!attribute [rw] mergedcells
18
+ # @return [Array] the array of merged cells
19
+ # @!attribute [rw] cellarray
20
+ # @return [Array] the array of non-empty (meaningful) cells of the current sheet
21
+ # @!attribute [rw] cell
22
+ # @return [Cell] the cell currently being processed.
23
+ attr_accessor :xmlstack, :mergedcells, :cellarray, :cell
24
+ def initialize()
25
+ @xmlstack = []
26
+ @mergedcells = []
27
+ @cellarray = []
28
+ @cell = Cell.new
29
+ end
30
+
31
+ # Save SAX state-machine state to {#xmlstack} if and only if the processed
32
+ # element is a :c (column) or a :mergeCell (merged cell)
33
+ # @param [String] name Start element
34
+ def start_element(name)
35
+ case name
36
+ when :c
37
+ @xmlstack << name
38
+ when :mergeCell
39
+ @xmlstack << name
40
+ end
41
+ end
42
+
43
+ # Step back in the stack ({#xmlstack}.pop), clear actual cell information
44
+ # @param [String] name Element ends
45
+ def end_element(name)
46
+ @xmlstack.pop
47
+ case name
48
+ when :c
49
+ @cell = Cell.new
50
+ when :mergeCell
51
+ @cell = Cell.new
52
+ end
53
+ end
54
+
55
+ # Set cell value, style, etc. This will only happen if the cell has an
56
+ # actual value AND the parser's state is :c.
57
+ # If the state is :mergeCell AND the actual attribute name is :ref the
58
+ # attribute will be added to the merged cells array.
59
+ # The attribute name is tested against the Cell object: if the cell
60
+ # has a method named the same way, that method is called with the str parameter.
61
+ # @param [String] name of the attribute.
62
+ # @param [String] str Content of the attribute
63
+ def attr(name, str)
64
+ case @xmlstack.last
65
+ when :c
66
+ @cell.send name, str if @cell.respond_to?(name)
67
+ when :mergeCell
68
+ @mergedcells << str if name == :ref
69
+ end
70
+ end
71
+
72
+ # Cell content is parsed here. For cells containing strings, interpolation using the
73
+ # sharedStrings.xml file is done in the #Sharedstrings class.
74
+ # The numformat attribute gets a value here based on the styles variable, to preserve the numeric formatting (thus the type) of values.
75
+ def text(str)
76
+ if @xmlstack.last == :c
77
+ if @cell.type != "shared" && @cell.type != "e" && str.numeric?
78
+ @cell.v str
79
+ @cellarray << @cell
80
+ end
81
+ @cell = Cell.new
82
+ end
83
+ end
84
+ end
85
+
86
+ # A class that is inherited from the Xlsheet parser, but only parses a "page" of the given sheet.
87
+ # Its initialize will honor the per_page option (lines per page) and the pageno option (actual page to be parsed)
88
+ # Cells outside the actual page will be omitted from the parsing process. Mergegroups will only be included
89
+ # if the starting cell is within the actual page
90
+ class PagSheet < Xlsheet
91
+ attr_accessor :xmlstack, :mergedcells, :cellarray, :cell
92
+
93
+ def initialize(per_page, pageno)
94
+ @PER_PAGE = per_page
95
+ @PAGENO = pageno
96
+ super()
97
+ end
98
+
99
+ def text(str)
100
+ if @xmlstack.last == :c
101
+ if @cell.type != "shared" && @cell.type != "e" && str.numeric? && ((@PER_PAGE * (@PAGENO-1)..(@PER_PAGE*@PAGENO-1)).include?@cell.y)
102
+ @cell.v str
103
+ @cellarray << @cell
104
+ end
105
+ @cell = Cell.new
106
+ end
107
+ end
108
+ end
109
+
110
+ # A class that is inherited from the Xlsheet parser, but only parses a given range of the given sheet.
111
+ # Its initialize will accept a range parameter. Cells outside this range will not be parsed at all.
112
+ # Mergegroups will only be included if the starting cell is within the selected range.
113
+ class Cellrange < Xlsheet
114
+ attr_accessor :xmlstack, :mergedcells, :cellarray, :cell
115
+
116
+ def initialize(range)
117
+ @cell = Cell.new
118
+ @RANGE_START = range.begin
119
+ @RANGE_END = range.end
120
+ super()
121
+ end
122
+
123
+ def text(str)
124
+ if @xmlstack.last == :c
125
+ if @cell.type != "shared" && @cell.type != "e" && str.numeric?
126
+ if (((@cell.x(@RANGE_START)..@cell.x(@RANGE_END)).include? @cell.x) && ((@cell.y(@RANGE_START)..@cell.y(@RANGE_END)).include? @cell.y))
127
+ @cell.v str
128
+ @cellarray << @cell
129
+ end
130
+ end
131
+ @cell = Cell.new
132
+ end
133
+ end
134
+
135
+ end
136
+ end
@@ -0,0 +1,97 @@
1
+
2
+ module Oxcelix
3
+ # The Sheet class represents an excel sheet.
4
+ class Sheet < Matrix
5
+ include Cellhelper
6
+ include Numberhelper
7
+
8
+ # @!attribute [rw] name
9
+ # @return [String] Sheet name
10
+ # @!attribute [rw] sheetId
11
+ # @return [String] returns the sheetId SheetML internal attribute
12
+ # @!attribute [rw] relationId
13
+ # @return [String] Internal reference key. relationID is used internally by Excel 2010 to e.g. build up the relationship between worksheets and comments
14
+ attr_accessor :name, :sheetId, :relationId
15
+
16
+ # The [] method overrides the standard Matrix::[]. It will now accept Excel-style cell coordinates.
17
+ # @param [String] i
18
+ # @return [Cell] the object denoted with the Excel column-row name.
19
+ # @example Select a cell in a sheet
20
+ # w = Oxcelix::Workbook.new('Example.xlsx')
21
+ # w.sheets[0][3,1] #=> #<Oxcelix::Cell:0x00000001e00fa0 @xlcoords="B4", @style="0", @type="n", @value="3">
22
+ # w.sheets[0]['B4'] #=> #<Oxcelix::Cell:0x00000001e00fa0 @xlcoords="B4", @style="0", @type="n", @value="3">
23
+ def [](i, *j)
24
+ if i.is_a? String
25
+ super(y(i),x(i))
26
+ else
27
+ super(i,j[0])
28
+ end
29
+ end
30
+
31
+ #The to_m method returns a simple Matrix object filled with the raw values of the original Sheet object.
32
+ # @return [Matrix] a collection of string values (the former #Cell::value)
33
+ def to_m(*attrs)
34
+ m = Matrix.build(self.row_size, self.column_size){nil}
35
+ self.each_with_index do |x, row, col|
36
+ if attrs.size == 0 || attrs.nil?
37
+ m[row, col] = x.value
38
+ end
39
+ end
40
+ return m
41
+ end
42
+
43
+ # The to_ru method returns a Matrix of "rubified" values. It basically builds a new Matrix
44
+ # and puts the result of the #Cell::to_ru method of every cell of the original sheet in
45
+ # the corresponding Matrix cell.
46
+ # @return [Matrix] a collection of ruby objects (#Integers, #Floats, #DateTimes, #Rationals, #Strings)
47
+ def to_ru
48
+ m = Matrix.build(self.row_size, self.column_size){nil}
49
+ self.each_with_index do |x, row, col|
50
+ if x.nil? || x.value.nil?
51
+ m[row, col] = nil
52
+ else
53
+ m[row, col] = x.to_ru
54
+ end
55
+ end
56
+ return m
57
+ end
58
+
59
+ # Invokes the #Cell::to_ru method on each element of self, replacing each element of the Sheet with the value returned.
60
+ def to_ru!
61
+ self.each_with_index do |x, row, col|
62
+ if x.nil? || x.value.nil?
63
+ self[row, col] = nil
64
+ else
65
+ self[row, col] = x.to_ru
66
+ end
67
+ end
68
+ end
69
+
70
+ # The to_fmt method returns a Matrix of "formatted" values. It basically builds a new Matrix
71
+ # and puts the result of the #Cell::to_fmt method of every cell of the original sheet in
72
+ # the corresponding Matrix cell. The #Cell::to_fmt will pass the original values to to_ru, and then
73
+ # depending on the value, will run strftime on DateTime objects and sprintf on numeric types.
74
+ # @return [Matrix] a collection of Strings
75
+ def to_fmt
76
+ m = Matrix.build(self.row_size, self.column_size){nil}
77
+ self.each_with_index do |x, row, col|
78
+ if x.nil? || x.value.nil?
79
+ m[row, col] = nil
80
+ else
81
+ m[row, col] = x.to_fmt
82
+ end
83
+ end
84
+ return m
85
+ end
86
+ # Invokes the #Cell::to_fmt method on each element of self, replacing each element of the Sheet with the value returned.
87
+ def to_fmt!
88
+ self.each_with_index do |x, row, col|
89
+ if x.nil? || x.value.nil?
90
+ self[row, col] = nil
91
+ else
92
+ self[row, col] = x.to_fmt
93
+ end
94
+ end
95
+ end
96
+ end
97
+ end