ooxl 0.0.1.5.0 → 0.0.1.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/bin/console +3 -3
- data/lib/ooxl/ooxl.rb +58 -35
- data/lib/ooxl/util.rb +12 -7
- data/lib/ooxl/version.rb +1 -1
- data/lib/ooxl/xl_objects/relationships.rb +20 -7
- data/lib/ooxl/xl_objects/row.rb +5 -1
- data/lib/ooxl/xl_objects/row_cache.rb +33 -5
- data/lib/ooxl/xl_objects/sheet.rb +15 -18
- data/ooxml_excel.gemspec +1 -1
- metadata +8 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 79ac42fd268e59703904fe713eb0d8369072371b96f632687dd4aac471dbd94c
|
4
|
+
data.tar.gz: c336a07951a0e066debbfd0ae59d080a623fc3321da0ab1050e53724e31878ec
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e9a19b166119394c6f883c2c049cae44cea55fc4d3624ad45adb48866618175ac3326ae54a768f589a2e35c18eed81dc7c4171cf01b641aa02257ce56dd809f9
|
7
|
+
data.tar.gz: 8a5fa75a6f2941b38ed379ca6cdf8dee5f6fa893c2e69e70bcff6db0591ce129e1dc175d9b9bf465034c00f643d74ac63bb6dcc8d937a3bbda4a41fcc6f27ed1
|
data/bin/console
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
3
|
require "bundler/setup"
|
4
|
-
require "
|
4
|
+
require "./lib/ooxl"
|
5
5
|
|
6
6
|
# You can add fixtures and/or initialization code here to make experimenting
|
7
7
|
# with your gem easier. You can also use a different console, if you like.
|
@@ -10,5 +10,5 @@ require "ooxml_excel"
|
|
10
10
|
# require "pry"
|
11
11
|
# Pry.start
|
12
12
|
|
13
|
-
require "
|
14
|
-
|
13
|
+
require "pry"
|
14
|
+
Pry.start
|
data/lib/ooxl/ooxl.rb
CHANGED
@@ -3,20 +3,33 @@ class OOXL
|
|
3
3
|
include ListHelper
|
4
4
|
attr_reader :filename
|
5
5
|
|
6
|
-
def initialize(
|
6
|
+
def initialize(filepath = nil, contents: nil, **options)
|
7
7
|
@workbook = nil
|
8
8
|
@sheets = {}
|
9
9
|
@styles = []
|
10
10
|
@comments = {}
|
11
|
-
@
|
11
|
+
@workbook_relationships = nil
|
12
|
+
@sheet_relationships = {}
|
12
13
|
@options = options
|
13
14
|
@tables = []
|
14
|
-
|
15
|
-
|
15
|
+
|
16
|
+
@filename = filepath && File.basename(filepath)
|
17
|
+
if contents.present?
|
18
|
+
parse_spreadsheet_contents(contents)
|
19
|
+
elsif filepath.present?
|
20
|
+
parse_spreadsheet_file(filepath)
|
21
|
+
else
|
22
|
+
raise 'no file path or contents were provided'
|
23
|
+
end
|
16
24
|
end
|
17
25
|
|
18
26
|
def self.open(spreadsheet_filepath, options={})
|
19
|
-
new(spreadsheet_filepath, options)
|
27
|
+
new(spreadsheet_filepath, **options)
|
28
|
+
end
|
29
|
+
|
30
|
+
def self.parse(spreadsheet_contents, options={})
|
31
|
+
spreadsheet_contents.force_encoding('ASCII-8BIT') if spreadsheet_contents.respond_to?(:force_encoding)
|
32
|
+
new(nil, contents: spreadsheet_contents, **options)
|
20
33
|
end
|
21
34
|
|
22
35
|
def sheets(skip_hidden: false)
|
@@ -33,9 +46,11 @@ class OOXL
|
|
33
46
|
end
|
34
47
|
|
35
48
|
def sheet(sheet_name)
|
36
|
-
|
37
|
-
raise "No #{sheet_name} in workbook." if
|
38
|
-
|
49
|
+
sheet_meta = @workbook.sheets.find { |sheet| sheet[:name] == sheet_name }
|
50
|
+
raise "No #{sheet_name} in workbook." if sheet_meta.nil?
|
51
|
+
|
52
|
+
sheet_index = @workbook_relationships[sheet_meta[:relationship_id]].scan(/\d+/).first
|
53
|
+
sheet = @sheets.fetch(sheet_index)
|
39
54
|
|
40
55
|
# shared variables
|
41
56
|
sheet.name = sheet_name
|
@@ -73,38 +88,46 @@ class OOXL
|
|
73
88
|
end
|
74
89
|
|
75
90
|
def fetch_comments(sheet_index)
|
76
|
-
|
77
|
-
relationship = @relationships[final_sheet_index.to_s]
|
91
|
+
relationship = @sheet_relationships[sheet_index]
|
78
92
|
@comments[relationship.comment_id] if relationship.present?
|
79
93
|
end
|
80
94
|
|
81
|
-
def
|
95
|
+
def parse_spreadsheet_file(file_path)
|
96
|
+
Zip::File.open(file_path) { |zip| parse_zip(zip) }
|
97
|
+
end
|
98
|
+
|
99
|
+
def parse_spreadsheet_contents(file_contents)
|
100
|
+
# open_buffer works for strings and IO streams
|
101
|
+
Zip::File.open_buffer(file_contents) { |zip| parse_zip(zip) }
|
102
|
+
end
|
103
|
+
|
104
|
+
def parse_zip(spreadsheet_zip)
|
82
105
|
shared_strings = []
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
shared_strings << shared_string_node.xpath('r/t|t').map { |value_node| value_node.text}.join('')
|
97
|
-
end
|
98
|
-
when /xl\/tables\/.*?/i
|
99
|
-
@tables << OOXL::Table.new(entry.get_input_stream.read)
|
100
|
-
when "xl/workbook.xml"
|
101
|
-
@workbook = OOXL::Workbook.load_from_stream(entry.get_input_stream.read)
|
102
|
-
when /xl\/worksheets\/_rels\/sheet\d+\.xml\.rels/
|
103
|
-
sheet_id = entry.name.scan(/sheet(\d+)/).flatten.first
|
104
|
-
@relationships[sheet_id] = Relationships.new(entry.get_input_stream.read)
|
105
|
-
else
|
106
|
-
# unsupported for now..
|
106
|
+
spreadsheet_zip.each do |entry|
|
107
|
+
case entry.name
|
108
|
+
when /xl\/worksheets\/sheet(\d+)?\.xml/
|
109
|
+
sheet_id = entry.name.scan(/xl\/worksheets\/sheet(\d+)?\.xml/).flatten.first
|
110
|
+
@sheets[sheet_id] = OOXL::Sheet.new(entry.get_input_stream.read, shared_strings, @options)
|
111
|
+
when /xl\/styles\.xml/
|
112
|
+
@styles = OOXL::Styles.load_from_stream(entry.get_input_stream.read)
|
113
|
+
when /xl\/comments(\d+)?\.xml/
|
114
|
+
comment_id = entry.name.scan(/xl\/comments(\d+)\.xml/).flatten.first
|
115
|
+
@comments[comment_id] = OOXL::Comments.load_from_stream(entry.get_input_stream.read)
|
116
|
+
when "xl/sharedStrings.xml"
|
117
|
+
Nokogiri.XML(entry.get_input_stream.read).remove_namespaces!.xpath('sst/si').each do |shared_string_node|
|
118
|
+
shared_strings << shared_string_node.xpath('r/t|t').map { |value_node| value_node.text}.join('')
|
107
119
|
end
|
120
|
+
when /xl\/tables\/.*?/i
|
121
|
+
@tables << OOXL::Table.new(entry.get_input_stream.read)
|
122
|
+
when "xl/workbook.xml"
|
123
|
+
@workbook = OOXL::Workbook.load_from_stream(entry.get_input_stream.read)
|
124
|
+
when /xl\/worksheets\/_rels\/sheet\d+\.xml\.rels/
|
125
|
+
sheet_id = entry.name.scan(/sheet(\d+)/).flatten.first
|
126
|
+
@sheet_relationships[sheet_id] = Relationships.new(entry.get_input_stream.read)
|
127
|
+
when /xl\/_rels\/workbook\.xml\.rels/
|
128
|
+
@workbook_relationships = Relationships.new(entry.get_input_stream.read)
|
129
|
+
else
|
130
|
+
# unsupported for now..
|
108
131
|
end
|
109
132
|
end
|
110
133
|
end
|
data/lib/ooxl/util.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
|
-
class OOXL
|
1
|
+
class OOXL
|
2
2
|
module Util
|
3
|
-
COLUMN_LETTERS = ('A'..'ZZZZ').to_a
|
4
|
-
|
5
|
-
|
3
|
+
COLUMN_LETTERS = [nil] + ('A'..'ZZZZ').to_a
|
4
|
+
|
5
|
+
def letter_index(col_letter)
|
6
|
+
column_letter_to_number(col_letter) - 1
|
6
7
|
end
|
7
8
|
|
8
|
-
def
|
9
|
-
|
9
|
+
def letter_equivalent(col_index)
|
10
|
+
column_number_to_letter(col_index + 1)
|
10
11
|
end
|
11
12
|
|
12
13
|
def to_column_letter(reference)
|
@@ -14,13 +15,17 @@ class OOXL
|
|
14
15
|
end
|
15
16
|
|
16
17
|
def uniform_reference(ref)
|
17
|
-
ref.to_s[/[A-Z]/] ?
|
18
|
+
ref.to_s[/[A-Z]/] ? column_letter_to_number(ref) : ref
|
18
19
|
end
|
19
20
|
|
20
21
|
def node_value_extractor(node)
|
21
22
|
node.try(:value)
|
22
23
|
end
|
23
24
|
|
25
|
+
def column_number_to_letter(index)
|
26
|
+
COLUMN_LETTERS.fetch(index)
|
27
|
+
end
|
28
|
+
|
24
29
|
def column_letter_to_number(column_letter)
|
25
30
|
pow = column_letter.length - 1
|
26
31
|
result = 0
|
data/lib/ooxl/version.rb
CHANGED
@@ -1,29 +1,41 @@
|
|
1
1
|
class OOXL
|
2
2
|
class Relationships
|
3
3
|
SUPPORTED_TYPES = ['http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments']
|
4
|
+
|
4
5
|
def initialize(relationships_node)
|
5
|
-
@
|
6
|
+
@relationships = []
|
6
7
|
parse_relationships(relationships_node)
|
7
8
|
end
|
8
9
|
|
9
10
|
def comment_id
|
10
|
-
|
11
|
+
comment_target = by_type('comments').first
|
12
|
+
comment_target && extract_file_reference(comment_target)
|
13
|
+
end
|
14
|
+
|
15
|
+
def [](id)
|
16
|
+
@relationships.find { |rel| rel.id == id }&.target
|
17
|
+
end
|
18
|
+
|
19
|
+
def by_type(type)
|
20
|
+
@relationships.select { |rel| rel.type == type }.map(&:target)
|
11
21
|
end
|
12
22
|
|
13
23
|
private
|
24
|
+
|
14
25
|
def parse_relationships(relationships_node)
|
15
26
|
relationships_node = Nokogiri.XML(relationships_node).remove_namespaces!
|
16
27
|
relationships_node.xpath('//Relationship').each do |relationship_node|
|
17
28
|
relationship_type = relationship_node.attributes["Type"].value
|
18
29
|
target = relationship_node.attributes["Target"].value
|
19
|
-
|
20
|
-
|
21
|
-
|
30
|
+
id = extract_number(relationship_node.attributes["Id"].value)
|
31
|
+
type = extract_type(relationship_type)
|
32
|
+
target = relationship_node.attributes["Target"].value
|
33
|
+
@relationships << Relationship.new(id, type, target)
|
22
34
|
end
|
23
35
|
end
|
24
36
|
|
25
|
-
def
|
26
|
-
|
37
|
+
def extract_number(str)
|
38
|
+
str.scan(/(\d+)/).flatten.first
|
27
39
|
end
|
28
40
|
|
29
41
|
def extract_type(type)
|
@@ -34,6 +46,7 @@ class OOXL
|
|
34
46
|
file.scan(/(\d+)\.[\w]/).flatten.first
|
35
47
|
end
|
36
48
|
|
49
|
+
Relationship = Struct.new(:id, :type, :target)
|
37
50
|
end
|
38
51
|
end
|
39
52
|
|
data/lib/ooxl/xl_objects/row.rb
CHANGED
@@ -45,12 +45,16 @@ class OOXL
|
|
45
45
|
end
|
46
46
|
|
47
47
|
def self.load_from_node(row_node, shared_strings, styles, options)
|
48
|
-
new(id: row_node
|
48
|
+
new(id: extract_id(row_node),
|
49
49
|
spans: row_node.attributes["spans"].try(:value),
|
50
50
|
height: row_node.attributes["ht"].try(:value),
|
51
51
|
cells: row_node.xpath('c').map { |cell_node| OOXL::Cell.load_from_node(cell_node, shared_strings, styles)},
|
52
52
|
options: options )
|
53
53
|
end
|
54
|
+
|
55
|
+
def self.extract_id(row_node)
|
56
|
+
row_node.attributes["r"].try(:value)
|
57
|
+
end
|
54
58
|
end
|
55
59
|
end
|
56
60
|
|
@@ -10,6 +10,8 @@ class OOXL
|
|
10
10
|
# built on-demand -- use fetch_row_by_id instead
|
11
11
|
attr_reader :row_id_map
|
12
12
|
|
13
|
+
delegate :size, to: :row_nodes
|
14
|
+
|
13
15
|
def initialize(sheet_xml, shared_strings, options = {})
|
14
16
|
@shared_strings = shared_strings
|
15
17
|
@sheet_xml = sheet_xml
|
@@ -47,11 +49,33 @@ class OOXL
|
|
47
49
|
row_cache
|
48
50
|
end
|
49
51
|
|
52
|
+
def row_range(start_index, end_index)
|
53
|
+
return enum_for(:row_range, start_index, end_index) unless block_given?
|
54
|
+
|
55
|
+
rows do |row|
|
56
|
+
row_id = row.id.to_i
|
57
|
+
next if row_id < start_index
|
58
|
+
break if row_id > end_index
|
59
|
+
|
60
|
+
yield row
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def max_row_index
|
65
|
+
return 0 if row_nodes.empty?
|
66
|
+
|
67
|
+
if all_rows_loaded?
|
68
|
+
row_cache.last.id.to_i
|
69
|
+
else
|
70
|
+
Row.extract_id(row_nodes.last).to_i
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
50
74
|
private
|
51
75
|
|
52
76
|
def parse_more_rows
|
53
77
|
row_nodes.drop(row_cache.count).each do |row_node|
|
54
|
-
row =
|
78
|
+
row = parse_row(row_node)
|
55
79
|
row_cache << row
|
56
80
|
row_id_map[row.id] = row
|
57
81
|
yield row if block_given?
|
@@ -62,10 +86,6 @@ class OOXL
|
|
62
86
|
row_cache.count == row_nodes.count
|
63
87
|
end
|
64
88
|
|
65
|
-
def row_nodes
|
66
|
-
@row_nodes ||= @sheet_xml.xpath('//sheetData/row')
|
67
|
-
end
|
68
|
-
|
69
89
|
def fetch_row_by_id(row_id)
|
70
90
|
row_id = row_id.to_s
|
71
91
|
return row_id_map[row_id] if all_rows_loaded? || row_id_map.key?(row_id)
|
@@ -95,6 +115,14 @@ class OOXL
|
|
95
115
|
end
|
96
116
|
yielded_rows
|
97
117
|
end
|
118
|
+
|
119
|
+
def row_nodes
|
120
|
+
@row_nodes ||= @sheet_xml.xpath('//sheetData/row')&.to_a
|
121
|
+
end
|
122
|
+
|
123
|
+
def parse_row(row_node)
|
124
|
+
Row.load_from_node(row_node, @shared_strings, @styles, @options)
|
125
|
+
end
|
98
126
|
end
|
99
127
|
end
|
100
128
|
|
@@ -128,7 +128,7 @@ class OOXL
|
|
128
128
|
# cell_range values separated by comma
|
129
129
|
if cell_range.include?(":")
|
130
130
|
cell_letters = cell_range.gsub(/[\d]/, '').split(':')
|
131
|
-
start_index, end_index = cell_range[/[A-Z]{1,}\d+/] ? cell_range.gsub(/[^\d:]/, '').split(':').map(&:to_i) : [1,
|
131
|
+
start_index, end_index = cell_range[/[A-Z]{1,}\d+/] ? cell_range.gsub(/[^\d:]/, '').split(':').map(&:to_i) : [1, @row_cache.max_row_index]
|
132
132
|
if cell_letters.uniq.size > 1
|
133
133
|
list_values_from_rectangle(cell_letters, start_index, end_index)
|
134
134
|
else
|
@@ -148,22 +148,19 @@ class OOXL
|
|
148
148
|
# 3 => end_index
|
149
149
|
# Expected output would be: [['value', 'value', 'value'], ['value', 'value', 'value'], ['value', 'value', 'value']]
|
150
150
|
def list_values_from_rectangle(cell_letters, start_index, end_index)
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
row["#{cell_letter}#{row_index}"].value
|
151
|
+
start_col = column_letter_to_number(cell_letters.first)
|
152
|
+
end_col = column_letter_to_number(cell_letters.last)
|
153
|
+
@row_cache.row_range(start_index, end_index).map do |row|
|
154
|
+
(start_col..end_col).map do |col_index|
|
155
|
+
col_letter = column_number_to_letter(col_index)
|
156
|
+
row["#{col_letter}#{row.id}"].value
|
158
157
|
end
|
159
158
|
end
|
160
159
|
end
|
161
160
|
|
162
161
|
def list_values_from_column(column_letter, start_index, end_index)
|
163
|
-
(start_index
|
164
|
-
row
|
165
|
-
next if row.blank?
|
166
|
-
row["#{column_letter}#{row_index}"].value
|
162
|
+
@row_cache.row_range(start_index, end_index).map do |row|
|
163
|
+
row["#{column_letter}#{row.id}"].value
|
167
164
|
end
|
168
165
|
end
|
169
166
|
|
@@ -174,19 +171,19 @@ class OOXL
|
|
174
171
|
[row[cell_ref].value]
|
175
172
|
end
|
176
173
|
|
177
|
-
def self.load_from_stream(xml_stream, shared_strings)
|
178
|
-
self.new(Nokogiri.XML(xml_stream).remove_namespaces!, shared_strings)
|
179
|
-
end
|
180
|
-
|
181
174
|
def in_merged_cells?(cell_id)
|
182
175
|
column_letter, column_index = cell_id.partition(/\d+/)
|
183
|
-
range =
|
176
|
+
range = merged_cells.find { |column_range, index_range| column_range.cover?(column_letter) && index_range.cover?(column_index) }
|
184
177
|
range.present?
|
185
178
|
end
|
186
179
|
|
180
|
+
def self.load_from_stream(xml_stream, shared_strings)
|
181
|
+
self.new(xml_stream, shared_strings)
|
182
|
+
end
|
183
|
+
|
187
184
|
private
|
188
185
|
|
189
|
-
def
|
186
|
+
def merged_cells
|
190
187
|
@merged_cells ||= @xml.xpath('//mergeCells/mergeCell').map do |merged_cell|
|
191
188
|
# <mergeCell ref="Q381:R381"/>
|
192
189
|
start_reference, end_reference = merged_cell.attributes["ref"].try(:value).split(':')
|
data/ooxml_excel.gemspec
CHANGED
@@ -26,7 +26,7 @@ Gem::Specification.new do |spec|
|
|
26
26
|
spec.require_paths = ["lib"]
|
27
27
|
spec.add_dependency 'activesupport'
|
28
28
|
spec.add_dependency 'nokogiri', '~> 1'
|
29
|
-
spec.add_dependency 'rubyzip', '~> 1.0', '< 2.0.0'
|
29
|
+
spec.add_dependency 'rubyzip', '~> 1.3.0', '< 2.0.0'
|
30
30
|
|
31
31
|
spec.add_development_dependency "bundler"
|
32
32
|
spec.add_development_dependency "pry-byebug"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ooxl
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.1.5.
|
4
|
+
version: 0.0.1.5.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Mones
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-08-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|
@@ -44,7 +44,7 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
47
|
+
version: 1.3.0
|
48
48
|
- - "<"
|
49
49
|
- !ruby/object:Gem::Version
|
50
50
|
version: 2.0.0
|
@@ -54,7 +54,7 @@ dependencies:
|
|
54
54
|
requirements:
|
55
55
|
- - "~>"
|
56
56
|
- !ruby/object:Gem::Version
|
57
|
-
version:
|
57
|
+
version: 1.3.0
|
58
58
|
- - "<"
|
59
59
|
- !ruby/object:Gem::Version
|
60
60
|
version: 2.0.0
|
@@ -153,7 +153,7 @@ files:
|
|
153
153
|
homepage: https://github.com/halcjames/ooxl
|
154
154
|
licenses: []
|
155
155
|
metadata: {}
|
156
|
-
post_install_message:
|
156
|
+
post_install_message:
|
157
157
|
rdoc_options: []
|
158
158
|
require_paths:
|
159
159
|
- lib
|
@@ -168,9 +168,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
168
168
|
- !ruby/object:Gem::Version
|
169
169
|
version: '0'
|
170
170
|
requirements: []
|
171
|
-
|
172
|
-
|
173
|
-
signing_key:
|
171
|
+
rubygems_version: 3.2.6
|
172
|
+
signing_key:
|
174
173
|
specification_version: 4
|
175
174
|
summary: OOXL Excel - Parse Excel Spreadsheets (xlsx, xlsm).
|
176
175
|
test_files: []
|