ooxl 0.0.1.5.0 → 0.0.1.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: d3da0847d61333cf06c4a9e3212604a6b6f1e1e0
4
- data.tar.gz: 2b5d30b7c28f88c14adfe0fcbe4e1c0085ccfb61
2
+ SHA256:
3
+ metadata.gz: 79ac42fd268e59703904fe713eb0d8369072371b96f632687dd4aac471dbd94c
4
+ data.tar.gz: c336a07951a0e066debbfd0ae59d080a623fc3321da0ab1050e53724e31878ec
5
5
  SHA512:
6
- metadata.gz: cb1e938d50dbfa698605986ae790670f0b2c557438aeb73925baf261326fe8f79b05948d409d06a5fbf620aaa88adba26642206b3beed11ca05e05372487a0ac
7
- data.tar.gz: e30534d829e6b0ba29388a950cf5743977ee0b73c942c798df26eca177f9e17a70d365515c76bf37d20d6bb42c027ddad937223a75b582143a1cf1cac7c00d82
6
+ metadata.gz: e9a19b166119394c6f883c2c049cae44cea55fc4d3624ad45adb48866618175ac3326ae54a768f589a2e35c18eed81dc7c4171cf01b641aa02257ce56dd809f9
7
+ data.tar.gz: 8a5fa75a6f2941b38ed379ca6cdf8dee5f6fa893c2e69e70bcff6db0591ce129e1dc175d9b9bf465034c00f643d74ac63bb6dcc8d937a3bbda4a41fcc6f27ed1
data/bin/console CHANGED
@@ -1,7 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
3
  require "bundler/setup"
4
- require "ooxml_excel"
4
+ require "./lib/ooxl"
5
5
 
6
6
  # You can add fixtures and/or initialization code here to make experimenting
7
7
  # with your gem easier. You can also use a different console, if you like.
@@ -10,5 +10,5 @@ require "ooxml_excel"
10
10
  # require "pry"
11
11
  # Pry.start
12
12
 
13
- require "irb"
14
- IRB.start
13
+ require "pry"
14
+ Pry.start
data/lib/ooxl/ooxl.rb CHANGED
@@ -3,20 +3,33 @@ class OOXL
3
3
  include ListHelper
4
4
  attr_reader :filename
5
5
 
6
- def initialize(spreadsheet_filepath, options={})
6
+ def initialize(filepath = nil, contents: nil, **options)
7
7
  @workbook = nil
8
8
  @sheets = {}
9
9
  @styles = []
10
10
  @comments = {}
11
- @relationships = {}
11
+ @workbook_relationships = nil
12
+ @sheet_relationships = {}
12
13
  @options = options
13
14
  @tables = []
14
- @filename = File.basename(spreadsheet_filepath)
15
- parse_spreadsheet_contents(spreadsheet_filepath)
15
+
16
+ @filename = filepath && File.basename(filepath)
17
+ if contents.present?
18
+ parse_spreadsheet_contents(contents)
19
+ elsif filepath.present?
20
+ parse_spreadsheet_file(filepath)
21
+ else
22
+ raise 'no file path or contents were provided'
23
+ end
16
24
  end
17
25
 
18
26
  def self.open(spreadsheet_filepath, options={})
19
- new(spreadsheet_filepath, options)
27
+ new(spreadsheet_filepath, **options)
28
+ end
29
+
30
+ def self.parse(spreadsheet_contents, options={})
31
+ spreadsheet_contents.force_encoding('ASCII-8BIT') if spreadsheet_contents.respond_to?(:force_encoding)
32
+ new(nil, contents: spreadsheet_contents, **options)
20
33
  end
21
34
 
22
35
  def sheets(skip_hidden: false)
@@ -33,9 +46,11 @@ class OOXL
33
46
  end
34
47
 
35
48
  def sheet(sheet_name)
36
- sheet_index = @workbook.sheets.index { |sheet| sheet[:name] == sheet_name}
37
- raise "No #{sheet_name} in workbook." if sheet_index.nil?
38
- sheet = @sheets.fetch((sheet_index+1).to_s)
49
+ sheet_meta = @workbook.sheets.find { |sheet| sheet[:name] == sheet_name }
50
+ raise "No #{sheet_name} in workbook." if sheet_meta.nil?
51
+
52
+ sheet_index = @workbook_relationships[sheet_meta[:relationship_id]].scan(/\d+/).first
53
+ sheet = @sheets.fetch(sheet_index)
39
54
 
40
55
  # shared variables
41
56
  sheet.name = sheet_name
@@ -73,38 +88,46 @@ class OOXL
73
88
  end
74
89
 
75
90
  def fetch_comments(sheet_index)
76
- final_sheet_index = sheet_index+1
77
- relationship = @relationships[final_sheet_index.to_s]
91
+ relationship = @sheet_relationships[sheet_index]
78
92
  @comments[relationship.comment_id] if relationship.present?
79
93
  end
80
94
 
81
- def parse_spreadsheet_contents(spreadsheet)
95
+ def parse_spreadsheet_file(file_path)
96
+ Zip::File.open(file_path) { |zip| parse_zip(zip) }
97
+ end
98
+
99
+ def parse_spreadsheet_contents(file_contents)
100
+ # open_buffer works for strings and IO streams
101
+ Zip::File.open_buffer(file_contents) { |zip| parse_zip(zip) }
102
+ end
103
+
104
+ def parse_zip(spreadsheet_zip)
82
105
  shared_strings = []
83
- Zip::File.open(spreadsheet) do |spreadsheet_zip|
84
- spreadsheet_zip.each do |entry|
85
- case entry.name
86
- when /xl\/worksheets\/sheet(\d+)?\.xml/
87
- sheet_id = entry.name.scan(/xl\/worksheets\/sheet(\d+)?\.xml/).flatten.first
88
- @sheets[sheet_id] = OOXL::Sheet.new(entry.get_input_stream.read, shared_strings, @options)
89
- when /xl\/styles\.xml/
90
- @styles = OOXL::Styles.load_from_stream(entry.get_input_stream.read)
91
- when /xl\/comments(\d+)?\.xml/
92
- comment_id = entry.name.scan(/xl\/comments(\d+)\.xml/).flatten.first
93
- @comments[comment_id] = OOXL::Comments.load_from_stream(entry.get_input_stream.read)
94
- when "xl/sharedStrings.xml"
95
- Nokogiri.XML(entry.get_input_stream.read).remove_namespaces!.xpath('sst/si').each do |shared_string_node|
96
- shared_strings << shared_string_node.xpath('r/t|t').map { |value_node| value_node.text}.join('')
97
- end
98
- when /xl\/tables\/.*?/i
99
- @tables << OOXL::Table.new(entry.get_input_stream.read)
100
- when "xl/workbook.xml"
101
- @workbook = OOXL::Workbook.load_from_stream(entry.get_input_stream.read)
102
- when /xl\/worksheets\/_rels\/sheet\d+\.xml\.rels/
103
- sheet_id = entry.name.scan(/sheet(\d+)/).flatten.first
104
- @relationships[sheet_id] = Relationships.new(entry.get_input_stream.read)
105
- else
106
- # unsupported for now..
106
+ spreadsheet_zip.each do |entry|
107
+ case entry.name
108
+ when /xl\/worksheets\/sheet(\d+)?\.xml/
109
+ sheet_id = entry.name.scan(/xl\/worksheets\/sheet(\d+)?\.xml/).flatten.first
110
+ @sheets[sheet_id] = OOXL::Sheet.new(entry.get_input_stream.read, shared_strings, @options)
111
+ when /xl\/styles\.xml/
112
+ @styles = OOXL::Styles.load_from_stream(entry.get_input_stream.read)
113
+ when /xl\/comments(\d+)?\.xml/
114
+ comment_id = entry.name.scan(/xl\/comments(\d+)\.xml/).flatten.first
115
+ @comments[comment_id] = OOXL::Comments.load_from_stream(entry.get_input_stream.read)
116
+ when "xl/sharedStrings.xml"
117
+ Nokogiri.XML(entry.get_input_stream.read).remove_namespaces!.xpath('sst/si').each do |shared_string_node|
118
+ shared_strings << shared_string_node.xpath('r/t|t').map { |value_node| value_node.text}.join('')
107
119
  end
120
+ when /xl\/tables\/.*?/i
121
+ @tables << OOXL::Table.new(entry.get_input_stream.read)
122
+ when "xl/workbook.xml"
123
+ @workbook = OOXL::Workbook.load_from_stream(entry.get_input_stream.read)
124
+ when /xl\/worksheets\/_rels\/sheet\d+\.xml\.rels/
125
+ sheet_id = entry.name.scan(/sheet(\d+)/).flatten.first
126
+ @sheet_relationships[sheet_id] = Relationships.new(entry.get_input_stream.read)
127
+ when /xl\/_rels\/workbook\.xml\.rels/
128
+ @workbook_relationships = Relationships.new(entry.get_input_stream.read)
129
+ else
130
+ # unsupported for now..
108
131
  end
109
132
  end
110
133
  end
data/lib/ooxl/util.rb CHANGED
@@ -1,12 +1,13 @@
1
- class OOXL
1
+ class OOXL
2
2
  module Util
3
- COLUMN_LETTERS = ('A'..'ZZZZ').to_a
4
- def letter_equivalent(index)
5
- COLUMN_LETTERS.fetch(index)
3
+ COLUMN_LETTERS = [nil] + ('A'..'ZZZZ').to_a
4
+
5
+ def letter_index(col_letter)
6
+ column_letter_to_number(col_letter) - 1
6
7
  end
7
8
 
8
- def letter_index(letter)
9
- COLUMN_LETTERS.index { |c_letter| c_letter == letter}
9
+ def letter_equivalent(col_index)
10
+ column_number_to_letter(col_index + 1)
10
11
  end
11
12
 
12
13
  def to_column_letter(reference)
@@ -14,13 +15,17 @@ class OOXL
14
15
  end
15
16
 
16
17
  def uniform_reference(ref)
17
- ref.to_s[/[A-Z]/] ? letter_index(ref) + 1 : ref
18
+ ref.to_s[/[A-Z]/] ? column_letter_to_number(ref) : ref
18
19
  end
19
20
 
20
21
  def node_value_extractor(node)
21
22
  node.try(:value)
22
23
  end
23
24
 
25
+ def column_number_to_letter(index)
26
+ COLUMN_LETTERS.fetch(index)
27
+ end
28
+
24
29
  def column_letter_to_number(column_letter)
25
30
  pow = column_letter.length - 1
26
31
  result = 0
data/lib/ooxl/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  class OOXL
2
- VERSION = "0.0.1.5.0"
2
+ VERSION = "0.0.1.5.5"
3
3
  end
@@ -1,29 +1,41 @@
1
1
  class OOXL
2
2
  class Relationships
3
3
  SUPPORTED_TYPES = ['http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments']
4
+
4
5
  def initialize(relationships_node)
5
- @types = {}
6
+ @relationships = []
6
7
  parse_relationships(relationships_node)
7
8
  end
8
9
 
9
10
  def comment_id
10
- @types['comments']
11
+ comment_target = by_type('comments').first
12
+ comment_target && extract_file_reference(comment_target)
13
+ end
14
+
15
+ def [](id)
16
+ @relationships.find { |rel| rel.id == id }&.target
17
+ end
18
+
19
+ def by_type(type)
20
+ @relationships.select { |rel| rel.type == type }.map(&:target)
11
21
  end
12
22
 
13
23
  private
24
+
14
25
  def parse_relationships(relationships_node)
15
26
  relationships_node = Nokogiri.XML(relationships_node).remove_namespaces!
16
27
  relationships_node.xpath('//Relationship').each do |relationship_node|
17
28
  relationship_type = relationship_node.attributes["Type"].value
18
29
  target = relationship_node.attributes["Target"].value
19
- if supported_type?(relationship_type)
20
- @types[extract_type(relationship_type)] = extract_file_reference(target)
21
- end
30
+ id = extract_number(relationship_node.attributes["Id"].value)
31
+ type = extract_type(relationship_type)
32
+ target = relationship_node.attributes["Target"].value
33
+ @relationships << Relationship.new(id, type, target)
22
34
  end
23
35
  end
24
36
 
25
- def supported_type?(type)
26
- SUPPORTED_TYPES.include?(type)
37
+ def extract_number(str)
38
+ str.scan(/(\d+)/).flatten.first
27
39
  end
28
40
 
29
41
  def extract_type(type)
@@ -34,6 +46,7 @@ class OOXL
34
46
  file.scan(/(\d+)\.[\w]/).flatten.first
35
47
  end
36
48
 
49
+ Relationship = Struct.new(:id, :type, :target)
37
50
  end
38
51
  end
39
52
 
@@ -45,12 +45,16 @@ class OOXL
45
45
  end
46
46
 
47
47
  def self.load_from_node(row_node, shared_strings, styles, options)
48
- new(id: row_node.attributes["r"].try(:value),
48
+ new(id: extract_id(row_node),
49
49
  spans: row_node.attributes["spans"].try(:value),
50
50
  height: row_node.attributes["ht"].try(:value),
51
51
  cells: row_node.xpath('c').map { |cell_node| OOXL::Cell.load_from_node(cell_node, shared_strings, styles)},
52
52
  options: options )
53
53
  end
54
+
55
+ def self.extract_id(row_node)
56
+ row_node.attributes["r"].try(:value)
57
+ end
54
58
  end
55
59
  end
56
60
 
@@ -10,6 +10,8 @@ class OOXL
10
10
  # built on-demand -- use fetch_row_by_id instead
11
11
  attr_reader :row_id_map
12
12
 
13
+ delegate :size, to: :row_nodes
14
+
13
15
  def initialize(sheet_xml, shared_strings, options = {})
14
16
  @shared_strings = shared_strings
15
17
  @sheet_xml = sheet_xml
@@ -47,11 +49,33 @@ class OOXL
47
49
  row_cache
48
50
  end
49
51
 
52
+ def row_range(start_index, end_index)
53
+ return enum_for(:row_range, start_index, end_index) unless block_given?
54
+
55
+ rows do |row|
56
+ row_id = row.id.to_i
57
+ next if row_id < start_index
58
+ break if row_id > end_index
59
+
60
+ yield row
61
+ end
62
+ end
63
+
64
+ def max_row_index
65
+ return 0 if row_nodes.empty?
66
+
67
+ if all_rows_loaded?
68
+ row_cache.last.id.to_i
69
+ else
70
+ Row.extract_id(row_nodes.last).to_i
71
+ end
72
+ end
73
+
50
74
  private
51
75
 
52
76
  def parse_more_rows
53
77
  row_nodes.drop(row_cache.count).each do |row_node|
54
- row = Row.load_from_node(row_node, @shared_strings, @styles, @options)
78
+ row = parse_row(row_node)
55
79
  row_cache << row
56
80
  row_id_map[row.id] = row
57
81
  yield row if block_given?
@@ -62,10 +86,6 @@ class OOXL
62
86
  row_cache.count == row_nodes.count
63
87
  end
64
88
 
65
- def row_nodes
66
- @row_nodes ||= @sheet_xml.xpath('//sheetData/row')
67
- end
68
-
69
89
  def fetch_row_by_id(row_id)
70
90
  row_id = row_id.to_s
71
91
  return row_id_map[row_id] if all_rows_loaded? || row_id_map.key?(row_id)
@@ -95,6 +115,14 @@ class OOXL
95
115
  end
96
116
  yielded_rows
97
117
  end
118
+
119
+ def row_nodes
120
+ @row_nodes ||= @sheet_xml.xpath('//sheetData/row')&.to_a
121
+ end
122
+
123
+ def parse_row(row_node)
124
+ Row.load_from_node(row_node, @shared_strings, @styles, @options)
125
+ end
98
126
  end
99
127
  end
100
128
 
@@ -128,7 +128,7 @@ class OOXL
128
128
  # cell_range values separated by comma
129
129
  if cell_range.include?(":")
130
130
  cell_letters = cell_range.gsub(/[\d]/, '').split(':')
131
- start_index, end_index = cell_range[/[A-Z]{1,}\d+/] ? cell_range.gsub(/[^\d:]/, '').split(':').map(&:to_i) : [1, rows.size]
131
+ start_index, end_index = cell_range[/[A-Z]{1,}\d+/] ? cell_range.gsub(/[^\d:]/, '').split(':').map(&:to_i) : [1, @row_cache.max_row_index]
132
132
  if cell_letters.uniq.size > 1
133
133
  list_values_from_rectangle(cell_letters, start_index, end_index)
134
134
  else
@@ -148,22 +148,19 @@ class OOXL
148
148
  # 3 => end_index
149
149
  # Expected output would be: [['value', 'value', 'value'], ['value', 'value', 'value'], ['value', 'value', 'value']]
150
150
  def list_values_from_rectangle(cell_letters, start_index, end_index)
151
- start_index.upto(end_index).map do |row_index|
152
- (letter_index(cell_letters.first)..letter_index(cell_letters.last)).map do |cell_index|
153
- row = row(row_index)
154
- next if row.blank?
155
-
156
- cell_letter = letter_equivalent(cell_index)
157
- row["#{cell_letter}#{row_index}"].value
151
+ start_col = column_letter_to_number(cell_letters.first)
152
+ end_col = column_letter_to_number(cell_letters.last)
153
+ @row_cache.row_range(start_index, end_index).map do |row|
154
+ (start_col..end_col).map do |col_index|
155
+ col_letter = column_number_to_letter(col_index)
156
+ row["#{col_letter}#{row.id}"].value
158
157
  end
159
158
  end
160
159
  end
161
160
 
162
161
  def list_values_from_column(column_letter, start_index, end_index)
163
- (start_index..end_index).to_a.map do |row_index|
164
- row = row(row_index)
165
- next if row.blank?
166
- row["#{column_letter}#{row_index}"].value
162
+ @row_cache.row_range(start_index, end_index).map do |row|
163
+ row["#{column_letter}#{row.id}"].value
167
164
  end
168
165
  end
169
166
 
@@ -174,19 +171,19 @@ class OOXL
174
171
  [row[cell_ref].value]
175
172
  end
176
173
 
177
- def self.load_from_stream(xml_stream, shared_strings)
178
- self.new(Nokogiri.XML(xml_stream).remove_namespaces!, shared_strings)
179
- end
180
-
181
174
  def in_merged_cells?(cell_id)
182
175
  column_letter, column_index = cell_id.partition(/\d+/)
183
- range = merged_cells_range.find { |column_range, index_range| column_range.cover?(column_letter) && index_range.cover?(column_index) }
176
+ range = merged_cells.find { |column_range, index_range| column_range.cover?(column_letter) && index_range.cover?(column_index) }
184
177
  range.present?
185
178
  end
186
179
 
180
+ def self.load_from_stream(xml_stream, shared_strings)
181
+ self.new(xml_stream, shared_strings)
182
+ end
183
+
187
184
  private
188
185
 
189
- def merged_cells_range
186
+ def merged_cells
190
187
  @merged_cells ||= @xml.xpath('//mergeCells/mergeCell').map do |merged_cell|
191
188
  # <mergeCell ref="Q381:R381"/>
192
189
  start_reference, end_reference = merged_cell.attributes["ref"].try(:value).split(':')
data/ooxml_excel.gemspec CHANGED
@@ -26,7 +26,7 @@ Gem::Specification.new do |spec|
26
26
  spec.require_paths = ["lib"]
27
27
  spec.add_dependency 'activesupport'
28
28
  spec.add_dependency 'nokogiri', '~> 1'
29
- spec.add_dependency 'rubyzip', '~> 1.0', '< 2.0.0'
29
+ spec.add_dependency 'rubyzip', '~> 1.3.0', '< 2.0.0'
30
30
 
31
31
  spec.add_development_dependency "bundler"
32
32
  spec.add_development_dependency "pry-byebug"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ooxl
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1.5.0
4
+ version: 0.0.1.5.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Mones
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-10-08 00:00:00.000000000 Z
11
+ date: 2021-08-09 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport
@@ -44,7 +44,7 @@ dependencies:
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '1.0'
47
+ version: 1.3.0
48
48
  - - "<"
49
49
  - !ruby/object:Gem::Version
50
50
  version: 2.0.0
@@ -54,7 +54,7 @@ dependencies:
54
54
  requirements:
55
55
  - - "~>"
56
56
  - !ruby/object:Gem::Version
57
- version: '1.0'
57
+ version: 1.3.0
58
58
  - - "<"
59
59
  - !ruby/object:Gem::Version
60
60
  version: 2.0.0
@@ -153,7 +153,7 @@ files:
153
153
  homepage: https://github.com/halcjames/ooxl
154
154
  licenses: []
155
155
  metadata: {}
156
- post_install_message:
156
+ post_install_message:
157
157
  rdoc_options: []
158
158
  require_paths:
159
159
  - lib
@@ -168,9 +168,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
168
168
  - !ruby/object:Gem::Version
169
169
  version: '0'
170
170
  requirements: []
171
- rubyforge_project:
172
- rubygems_version: 2.6.12
173
- signing_key:
171
+ rubygems_version: 3.2.6
172
+ signing_key:
174
173
  specification_version: 4
175
174
  summary: OOXL Excel - Parse Excel Spreadsheets (xlsx, xlsm).
176
175
  test_files: []