ooxml_parser 0.29.0 → 0.30.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/ooxml_parser/common_parser/common_data/alternate_content/chart/chart.rb +4 -4
- data/lib/ooxml_parser/common_parser/common_data/alternate_content/drawing/graphic/docx_graphic.rb +2 -2
- data/lib/ooxml_parser/common_parser/common_data/alternate_content/drawing/graphic/picture/docx_blip/file_reference.rb +3 -10
- data/lib/ooxml_parser/common_parser/common_data/content_types.rb +1 -1
- data/lib/ooxml_parser/common_parser/common_data/hyperlink.rb +3 -3
- data/lib/ooxml_parser/common_parser/common_data/ooxml_document_object.rb +2 -91
- data/lib/ooxml_parser/common_parser/common_document_structure.rb +38 -0
- data/lib/ooxml_parser/common_parser/parser/ooxml_file.rb +68 -0
- data/lib/ooxml_parser/common_parser/parser.rb +32 -38
- data/lib/ooxml_parser/docx_parser/document_structure/comments.rb +1 -1
- data/lib/ooxml_parser/docx_parser/document_structure/comments_extended.rb +1 -1
- data/lib/ooxml_parser/docx_parser/document_structure/document_properties.rb +1 -1
- data/lib/ooxml_parser/docx_parser/document_structure/document_settings.rb +1 -1
- data/lib/ooxml_parser/docx_parser/document_structure/header_footer.rb +1 -1
- data/lib/ooxml_parser/docx_parser/document_structure/numbering.rb +1 -1
- data/lib/ooxml_parser/docx_parser/document_structure/page_properties/note.rb +2 -2
- data/lib/ooxml_parser/docx_parser/document_structure/page_properties/page_properties.rb +3 -3
- data/lib/ooxml_parser/docx_parser/document_structure/styles.rb +1 -1
- data/lib/ooxml_parser/docx_parser/document_structure.rb +10 -11
- data/lib/ooxml_parser/docx_parser.rb +3 -2
- data/lib/ooxml_parser/pptx_parser/presentation/comment_authors.rb +1 -1
- data/lib/ooxml_parser/pptx_parser/presentation/presentation_comments.rb +1 -1
- data/lib/ooxml_parser/pptx_parser/presentation/presentation_theme.rb +23 -23
- data/lib/ooxml_parser/pptx_parser/presentation/slide/graphic_frame/graphic_frame.rb +2 -2
- data/lib/ooxml_parser/pptx_parser/presentation/slide.rb +5 -5
- data/lib/ooxml_parser/pptx_parser/presentation/slide_layout_file.rb +2 -2
- data/lib/ooxml_parser/pptx_parser/presentation/slide_layouts_helper.rb +1 -1
- data/lib/ooxml_parser/pptx_parser/presentation/slide_master_file.rb +2 -2
- data/lib/ooxml_parser/pptx_parser/presentation/slide_masters_helper.rb +1 -1
- data/lib/ooxml_parser/pptx_parser/presentation/table_styles.rb +1 -1
- data/lib/ooxml_parser/pptx_parser/presentation.rb +7 -8
- data/lib/ooxml_parser/pptx_parser.rb +3 -2
- data/lib/ooxml_parser/version.rb +1 -1
- data/lib/ooxml_parser/xlsx_parser/workbook/chartsheet.rb +3 -3
- data/lib/ooxml_parser/xlsx_parser/workbook/pivot_cache.rb +1 -1
- data/lib/ooxml_parser/xlsx_parser/workbook/pivot_table_definition.rb +1 -1
- data/lib/ooxml_parser/xlsx_parser/workbook/style_sheet.rb +1 -1
- data/lib/ooxml_parser/xlsx_parser/workbook/worksheet/table_part.rb +2 -2
- data/lib/ooxml_parser/xlsx_parser/workbook/worksheet.rb +11 -11
- data/lib/ooxml_parser/xlsx_parser/workbook.rb +7 -8
- data/lib/ooxml_parser/xlsx_parser.rb +3 -2
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f8264219a82e05f855d6e850703479fdfbb25b866ee5eb5705ac5171903fd7dd
|
4
|
+
data.tar.gz: e5ef8d6182d11e432c97bc8d63a1632f270c443acf17803b6abafc502ae0681a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d666854752d50ce5a26dd6c36fa78b801e34de2938eb10e6247ca11832979eea3b23a8fc8a838e272b64df9e13f0947a747dbe322ac008ec797cd0a836c22519
|
7
|
+
data.tar.gz: 81facb1e39faa54991524a2c3b3b613588c990d6cf02be6ac063b17c6971f0f08af0bc09a95cdae99a7a4eb50d1bb1a7278bc76377c0e496786b6f86335335f2
|
@@ -82,7 +82,7 @@ module OoxmlParser
|
|
82
82
|
# Parse Chart data
|
83
83
|
# @return [Chart] result of parsing
|
84
84
|
def parse
|
85
|
-
chart_xml = parse_xml(
|
85
|
+
chart_xml = parse_xml(root_object.current_xml)
|
86
86
|
chart_xml.xpath('*').each do |chart_node|
|
87
87
|
case chart_node.name
|
88
88
|
when 'chartSpace'
|
@@ -180,8 +180,8 @@ module OoxmlParser
|
|
180
180
|
|
181
181
|
# Parse relationship of chart
|
182
182
|
def parse_relationships
|
183
|
-
file_name = File.basename(
|
184
|
-
relationship_file = "#{
|
183
|
+
file_name = File.basename(root_object.current_xml)
|
184
|
+
relationship_file = "#{root_object.unpacked_folder}" \
|
185
185
|
'/word/charts/' \
|
186
186
|
"_rels/#{file_name}.rels"
|
187
187
|
|
@@ -198,7 +198,7 @@ module OoxmlParser
|
|
198
198
|
return if chart_relationship.empty?
|
199
199
|
|
200
200
|
chart_style_file = chart_relationship.first
|
201
|
-
style_file = "#{
|
201
|
+
style_file = "#{root_object.unpacked_folder}" \
|
202
202
|
"/word/charts/#{chart_style_file}"
|
203
203
|
|
204
204
|
@style = ChartStyleFile.new(parent: self).parse(style_file)
|
data/lib/ooxml_parser/common_parser/common_data/alternate_content/drawing/graphic/docx_graphic.rb
CHANGED
@@ -23,9 +23,9 @@ module OoxmlParser
|
|
23
23
|
@data = DocxPicture.new(parent: self).parse(node_child)
|
24
24
|
when 'chart'
|
25
25
|
@type = :chart
|
26
|
-
|
26
|
+
root_object.add_to_xmls_stack("#{root_object.root_subfolder}/#{root_object.get_link_from_rels(node_child.attribute('id').value)}")
|
27
27
|
@data = Chart.new(parent: self).parse
|
28
|
-
|
28
|
+
root_object.xmls_stack.pop
|
29
29
|
when 'wgp'
|
30
30
|
@type = :group
|
31
31
|
@data = ShapesGrouping.new(parent: self).parse(node_child)
|
@@ -25,7 +25,7 @@ module OoxmlParser
|
|
25
25
|
return self unless @resource_id
|
26
26
|
return self if @resource_id.empty?
|
27
27
|
|
28
|
-
@path =
|
28
|
+
@path = root_object.get_link_from_rels(@resource_id)
|
29
29
|
if !@path || @path.empty?
|
30
30
|
warn "Cant find path to media file by id: #{@resource_id}"
|
31
31
|
return self
|
@@ -33,7 +33,7 @@ module OoxmlParser
|
|
33
33
|
return self if @path == 'NULL'
|
34
34
|
return self if @path.match?(URI::DEFAULT_PARSER.make_regexp)
|
35
35
|
|
36
|
-
full_path_to_file =
|
36
|
+
full_path_to_file = root_object.unpacked_folder + root_object.root_subfolder + @path.gsub('..', '')
|
37
37
|
if File.exist?(full_path_to_file)
|
38
38
|
@content = if File.extname(@path) == '.xlsx'
|
39
39
|
parse_ole_xlsx(full_path_to_file)
|
@@ -52,14 +52,7 @@ module OoxmlParser
|
|
52
52
|
# @param [String] full_path to file
|
53
53
|
# @return [XLSXWorkbook]
|
54
54
|
def parse_ole_xlsx(full_path)
|
55
|
-
|
56
|
-
# by replacing all global variables
|
57
|
-
stack = OOXMLDocumentObject.xmls_stack
|
58
|
-
dir = OOXMLDocumentObject.path_to_folder
|
59
|
-
result = OoxmlParser::Parser.parse(full_path)
|
60
|
-
OOXMLDocumentObject.xmls_stack = stack
|
61
|
-
OOXMLDocumentObject.path_to_folder = dir
|
62
|
-
result
|
55
|
+
OoxmlParser::XlsxParser.parse_xlsx(full_path)
|
63
56
|
end
|
64
57
|
end
|
65
58
|
end
|
@@ -21,7 +21,7 @@ module OoxmlParser
|
|
21
21
|
# Parse ContentTypes object
|
22
22
|
# @return [ContentTypes] result of parsing
|
23
23
|
def parse
|
24
|
-
doc = Nokogiri::XML.parse(File.open("#{
|
24
|
+
doc = Nokogiri::XML.parse(File.open("#{root_object.unpacked_folder}/[Content_Types].xml"))
|
25
25
|
node = doc.xpath('*').first
|
26
26
|
|
27
27
|
node.xpath('*').each do |node_child|
|
@@ -46,7 +46,7 @@ module OoxmlParser
|
|
46
46
|
@url = Coordinates.new.parse_string(value.value)
|
47
47
|
when 'id'
|
48
48
|
@id = value.value
|
49
|
-
@url =
|
49
|
+
@url = root_object.get_link_from_rels(@id) unless @id.empty?
|
50
50
|
when 'tooltip'
|
51
51
|
@tooltip = value.value
|
52
52
|
when 'ref'
|
@@ -80,7 +80,7 @@ module OoxmlParser
|
|
80
80
|
else
|
81
81
|
if meaningful_id?
|
82
82
|
@action = :external_link
|
83
|
-
@url =
|
83
|
+
@url = root_object.get_link_from_rels(@id)
|
84
84
|
end
|
85
85
|
end
|
86
86
|
self
|
@@ -98,7 +98,7 @@ module OoxmlParser
|
|
98
98
|
def parse_url_for_slide_link
|
99
99
|
return unless meaningful_id?
|
100
100
|
|
101
|
-
@url =
|
101
|
+
@url = root_object.get_link_from_rels(@id).scan(/\d+/).join.to_i
|
102
102
|
end
|
103
103
|
end
|
104
104
|
end
|
@@ -28,6 +28,8 @@ module OoxmlParser
|
|
28
28
|
|
29
29
|
instance_variables.each do |current_attribute|
|
30
30
|
next if current_attribute == :@parent
|
31
|
+
next if instance_variable_get(current_attribute).is_a?(Nokogiri::XML::Element)
|
32
|
+
|
31
33
|
return false unless instance_variable_get(current_attribute) == other.instance_variable_get(current_attribute)
|
32
34
|
end
|
33
35
|
true
|
@@ -47,96 +49,5 @@ module OoxmlParser
|
|
47
49
|
end
|
48
50
|
xml
|
49
51
|
end
|
50
|
-
|
51
|
-
class << self
|
52
|
-
# @return [String] path to root subfolder
|
53
|
-
attr_accessor :root_subfolder
|
54
|
-
# @return [PresentationTheme] list of themes
|
55
|
-
attr_accessor :theme
|
56
|
-
# @return [Array<String>] stack of xmls
|
57
|
-
attr_accessor :xmls_stack
|
58
|
-
# @return [String] path to root folder
|
59
|
-
attr_accessor :path_to_folder
|
60
|
-
|
61
|
-
# Copy this file and rename to zip
|
62
|
-
# @param path [String] path to file
|
63
|
-
# @return [String] path to result zip
|
64
|
-
def copy_file_and_rename_to_zip(path)
|
65
|
-
file_name = File.basename(path)
|
66
|
-
tmp_folder = Dir.mktmpdir('ruby-ooxml-parser')
|
67
|
-
file_path = "#{tmp_folder}/#{file_name}"
|
68
|
-
FileUtils.rm_rf(tmp_folder) if File.directory?(tmp_folder)
|
69
|
-
FileUtils.mkdir_p(tmp_folder)
|
70
|
-
raise "Cannot find file by path #{path}" unless File.exist?(path)
|
71
|
-
|
72
|
-
FileUtils.cp path, tmp_folder
|
73
|
-
file_path
|
74
|
-
end
|
75
|
-
|
76
|
-
# Decrypt file protected with password
|
77
|
-
# @param path [String] path to file
|
78
|
-
# @param password [String] password to file
|
79
|
-
# @return [String] path to decrypted file
|
80
|
-
def decrypt_file(path, password)
|
81
|
-
file_name = File.basename(path)
|
82
|
-
tmp_folder = Dir.mktmpdir('ruby-ooxml-parser')
|
83
|
-
decrypted_path = "#{tmp_folder}/#{file_name}"
|
84
|
-
binary_password = password.encode('utf-16le').bytes.pack('c*').encode('binary')
|
85
|
-
OoxmlDecrypt::EncryptedFile.decrypt_to_file(path, binary_password, decrypted_path)
|
86
|
-
|
87
|
-
decrypted_path
|
88
|
-
end
|
89
|
-
|
90
|
-
# Unzip specified file
|
91
|
-
# @param path_to_file [String] path to zip file
|
92
|
-
# @param destination [String] folder to extract
|
93
|
-
# @return [void]
|
94
|
-
def unzip_file(path_to_file, destination)
|
95
|
-
Zip.warn_invalid_date = false
|
96
|
-
Zip::File.open(path_to_file) do |zip_file|
|
97
|
-
raise LoadError, "There is no files in zip #{path_to_file}" if zip_file.entries.empty?
|
98
|
-
|
99
|
-
zip_file.each do |file|
|
100
|
-
file_path = File.join(destination, file.name)
|
101
|
-
FileUtils.mkdir_p(File.dirname(file_path))
|
102
|
-
zip_file.extract(file, file_path) unless File.exist?(file_path)
|
103
|
-
end
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
|
-
# @return [String] dir to base of file
|
108
|
-
def dir
|
109
|
-
"#{OOXMLDocumentObject.path_to_folder}#{File.dirname(OOXMLDocumentObject.xmls_stack.last)}/"
|
110
|
-
end
|
111
|
-
|
112
|
-
# @return [String] path to current xml file
|
113
|
-
def current_xml
|
114
|
-
OOXMLDocumentObject.path_to_folder + OOXMLDocumentObject.xmls_stack.last
|
115
|
-
end
|
116
|
-
|
117
|
-
# Add file to parsing stack
|
118
|
-
# @param path [String] path of file to add to stack
|
119
|
-
# @return [void]
|
120
|
-
def add_to_xmls_stack(path)
|
121
|
-
OOXMLDocumentObject.xmls_stack << if path.include?('..')
|
122
|
-
"#{File.dirname(OOXMLDocumentObject.xmls_stack.last)}/#{path}"
|
123
|
-
elsif path.start_with?(OOXMLDocumentObject.root_subfolder)
|
124
|
-
path
|
125
|
-
else
|
126
|
-
OOXMLDocumentObject.root_subfolder + path
|
127
|
-
end
|
128
|
-
end
|
129
|
-
|
130
|
-
# Get link to file from rels file
|
131
|
-
# @param id [String] file to get
|
132
|
-
# @return [String] result
|
133
|
-
def get_link_from_rels(id)
|
134
|
-
rels_path = dir + "_rels/#{File.basename(OOXMLDocumentObject.xmls_stack.last)}.rels"
|
135
|
-
raise LoadError, "Cannot find .rels file by path: #{rels_path}" unless File.exist?(rels_path)
|
136
|
-
|
137
|
-
relationships = Relationships.new.parse_file(rels_path)
|
138
|
-
relationships.target_by_id(id)
|
139
|
-
end
|
140
|
-
end
|
141
52
|
end
|
142
53
|
end
|
@@ -14,12 +14,50 @@ module OoxmlParser
|
|
14
14
|
attr_accessor :default_font_style
|
15
15
|
# @return [ContentTypes] data about content types
|
16
16
|
attr_accessor :content_types
|
17
|
+
# @return [String] root sub-folder for object
|
18
|
+
attr_reader :root_subfolder
|
19
|
+
# @return [String] path to folder with unpacked document
|
20
|
+
attr_reader :unpacked_folder
|
21
|
+
# @return [Array<String>] list of xmls to parse
|
22
|
+
attr_accessor :xmls_stack
|
17
23
|
|
18
24
|
def initialize(params = {})
|
19
25
|
@default_font_size = params.fetch(:default_font_size, 18)
|
20
26
|
@default_font_typeface = params.fetch(:default_font_typeface, 'Arial')
|
21
27
|
@default_font_style = FontStyle.new
|
28
|
+
@unpacked_folder = params.fetch(:unpacked_folder, nil)
|
29
|
+
@xmls_stack = []
|
22
30
|
super(parent: nil)
|
23
31
|
end
|
32
|
+
|
33
|
+
# @return [String] path to current xml file
|
34
|
+
def current_xml
|
35
|
+
root_object.unpacked_folder + @xmls_stack.last
|
36
|
+
end
|
37
|
+
|
38
|
+
# Add file to parsing stack
|
39
|
+
# @param path [String] path of file to add to stack
|
40
|
+
# @return [void]
|
41
|
+
def add_to_xmls_stack(path)
|
42
|
+
@xmls_stack << if path.include?('..')
|
43
|
+
"#{File.dirname(@xmls_stack.last)}/#{path}"
|
44
|
+
elsif path.start_with?(@root_subfolder)
|
45
|
+
path
|
46
|
+
else
|
47
|
+
@root_subfolder + path
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Get link to file from rels file
|
52
|
+
# @param id [String] file to get
|
53
|
+
# @return [String] result
|
54
|
+
def get_link_from_rels(id)
|
55
|
+
dir = "#{unpacked_folder}#{File.dirname(@xmls_stack.last)}/"
|
56
|
+
rels_path = dir + "_rels/#{File.basename(@xmls_stack.last)}.rels"
|
57
|
+
raise LoadError, "Cannot find .rels file by path: #{rels_path}" unless File.exist?(rels_path)
|
58
|
+
|
59
|
+
relationships = Relationships.new.parse_file(rels_path)
|
60
|
+
relationships.target_by_id(id)
|
61
|
+
end
|
24
62
|
end
|
25
63
|
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module OoxmlParser
|
4
|
+
# Class for actions with OOXML file
|
5
|
+
class OoxmlFile
|
6
|
+
# @return [String] path to file
|
7
|
+
attr_reader :path
|
8
|
+
|
9
|
+
def initialize(path)
|
10
|
+
@path = path
|
11
|
+
end
|
12
|
+
|
13
|
+
# Copy this file and rename to zip
|
14
|
+
# @return [String] path to result zip
|
15
|
+
def copy_file_and_rename_to_zip
|
16
|
+
file_name = File.basename(@path)
|
17
|
+
tmp_folder = Dir.mktmpdir('ruby-ooxml-parser')
|
18
|
+
@zip_path = "#{tmp_folder}/#{file_name}"
|
19
|
+
FileUtils.rm_rf(tmp_folder) if File.directory?(tmp_folder)
|
20
|
+
FileUtils.mkdir_p(tmp_folder)
|
21
|
+
raise "Cannot find file by path #{@path}" unless File.exist?(@path)
|
22
|
+
|
23
|
+
FileUtils.cp path, tmp_folder
|
24
|
+
end
|
25
|
+
|
26
|
+
# @return [String] path to folder with zip
|
27
|
+
def path_to_folder
|
28
|
+
@zip_path.sub(File.basename(@zip_path), '')
|
29
|
+
end
|
30
|
+
|
31
|
+
# Unzip specified file
|
32
|
+
# @return [void]
|
33
|
+
def unzip
|
34
|
+
Zip.warn_invalid_date = false
|
35
|
+
Zip::File.open(@zip_path) do |zip_file|
|
36
|
+
raise LoadError, "There is no files in zip #{@zip_path}" if zip_file.entries.empty?
|
37
|
+
|
38
|
+
zip_file.each do |file|
|
39
|
+
file_path = File.join(path_to_folder, file.name)
|
40
|
+
FileUtils.mkdir_p(File.dirname(file_path))
|
41
|
+
zip_file.extract(file, file_path) unless File.exist?(file_path)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# @return [Symbol] file type recognized by folder structure
|
47
|
+
def format_by_folders
|
48
|
+
return :docx if Dir.exist?("#{path_to_folder}/word")
|
49
|
+
return :xlsx if Dir.exist?("#{path_to_folder}/xl")
|
50
|
+
return :pptx if Dir.exist?("#{path_to_folder}/ppt")
|
51
|
+
|
52
|
+
:zip
|
53
|
+
end
|
54
|
+
|
55
|
+
# Decrypt file protected with password
|
56
|
+
# @param password [String] password to file
|
57
|
+
# @return [OoxmlFile] path to decrypted file
|
58
|
+
def decrypt(password)
|
59
|
+
file_name = File.basename(@path)
|
60
|
+
tmp_folder = Dir.mktmpdir('ruby-ooxml-parser')
|
61
|
+
decrypted_path = "#{tmp_folder}/#{file_name}"
|
62
|
+
binary_password = password.encode('utf-16le').bytes.pack('c*').encode('binary')
|
63
|
+
OoxmlDecrypt::EncryptedFile.decrypt_to_file(@path, binary_password, decrypted_path)
|
64
|
+
|
65
|
+
OoxmlFile.new(decrypted_path)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -1,52 +1,46 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative 'parser/encryption_checker'
|
4
|
+
require_relative 'parser/ooxml_file'
|
4
5
|
|
5
6
|
module OoxmlParser
|
6
7
|
# Basic class for OoxmlParser
|
7
8
|
class Parser
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
9
|
+
class << self
|
10
|
+
# Base method to yield parse document of any type
|
11
|
+
# @param [OoxmlFile] file with data
|
12
|
+
# @return [CommonDocumentStructure] structure of doc
|
13
|
+
def parse_format(file)
|
14
|
+
return nil if EncryptionChecker.new(file.path).encrypted?
|
13
15
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
end
|
16
|
+
file.copy_file_and_rename_to_zip
|
17
|
+
file.unzip
|
18
|
+
model = yield(file)
|
19
|
+
model.file_path = file.path if model
|
20
|
+
FileUtils.rm_rf(file.path_to_folder)
|
21
|
+
model
|
22
|
+
end
|
22
23
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
24
|
+
# Base method to parse document of any type
|
25
|
+
# @param path_to_file [String] file
|
26
|
+
# @return [CommonDocumentStructure] structure of doc
|
27
|
+
def parse(path_to_file, password: nil)
|
28
|
+
file = OoxmlFile.new(path_to_file)
|
29
|
+
file = file.decrypt(password) if password
|
30
|
+
Parser.parse_format(file) do |yielded_file|
|
31
|
+
format = yielded_file.format_by_folders
|
32
|
+
case format
|
33
|
+
when :docx
|
34
|
+
DocumentStructure.new(unpacked_folder: yielded_file.path_to_folder).parse
|
35
|
+
when :xlsx
|
36
|
+
XLSXWorkbook.new(unpacked_folder: yielded_file.path_to_folder).parse
|
37
|
+
when :pptx
|
38
|
+
Presentation.new(unpacked_folder: yielded_file.path_to_folder).parse
|
39
|
+
else
|
40
|
+
warn "#{path_to_file} is a simple zip file without OOXML content"
|
41
|
+
end
|
39
42
|
end
|
40
43
|
end
|
41
44
|
end
|
42
|
-
|
43
|
-
# Recognize folder format
|
44
|
-
# @param directory [String] path to dirctory
|
45
|
-
# @return [Symbol] type of document
|
46
|
-
def self.recognize_folder_format(directory = OOXMLDocumentObject.path_to_folder)
|
47
|
-
return :docx if Dir.exist?("#{directory}/word")
|
48
|
-
return :xlsx if Dir.exist?("#{directory}/xl")
|
49
|
-
return :pptx if Dir.exist?("#{directory}/ppt")
|
50
|
-
end
|
51
45
|
end
|
52
46
|
end
|
@@ -9,8 +9,8 @@ module OoxmlParser
|
|
9
9
|
|
10
10
|
def initialize(params = {})
|
11
11
|
@comments_array = []
|
12
|
-
@file = params.fetch(:file, "#{OOXMLDocumentObject.path_to_folder}word/comments.xml")
|
13
12
|
super(parent: params[:parent])
|
13
|
+
@file = params.fetch(:file, "#{root_object.unpacked_folder}word/comments.xml")
|
14
14
|
end
|
15
15
|
|
16
16
|
# @return [Comment] accessor
|
@@ -17,7 +17,7 @@ module OoxmlParser
|
|
17
17
|
# Parse CommentsExtended object
|
18
18
|
# @return [CommentsExtended] result of parsing
|
19
19
|
def parse
|
20
|
-
file_to_parse = "#{
|
20
|
+
file_to_parse = "#{root_object.unpacked_folder}word/commentsExtended.xml"
|
21
21
|
return nil unless File.exist?(file_to_parse)
|
22
22
|
|
23
23
|
doc = parse_xml(file_to_parse)
|
@@ -8,7 +8,7 @@ module OoxmlParser
|
|
8
8
|
# Parse Document properties
|
9
9
|
# @return [DocumentProperties]
|
10
10
|
def parse
|
11
|
-
properties_file = "#{
|
11
|
+
properties_file = "#{root_object.unpacked_folder}docProps/app.xml"
|
12
12
|
unless File.exist?(properties_file)
|
13
13
|
warn "There is no 'docProps/app.xml' in docx. It may be some problem with it"
|
14
14
|
return self
|
@@ -9,7 +9,7 @@ module OoxmlParser
|
|
9
9
|
# Parse Settings object
|
10
10
|
# @return [DocumentSettings] result of parsing
|
11
11
|
def parse
|
12
|
-
settings_path = "#{
|
12
|
+
settings_path = "#{root_object.unpacked_folder}word/settings.xml"
|
13
13
|
return nil unless File.exist?(settings_path)
|
14
14
|
|
15
15
|
doc = parse_xml(settings_path)
|
@@ -47,7 +47,7 @@ module OoxmlParser
|
|
47
47
|
def parse(node)
|
48
48
|
@id = node.attribute('id').value.to_i
|
49
49
|
parse_type(node)
|
50
|
-
doc = parse_xml(
|
50
|
+
doc = parse_xml(root_object.unpacked_folder + xml_path)
|
51
51
|
doc.search(xpath_for_search).each do |footnote|
|
52
52
|
next unless footnote.attribute('id').value.to_i == @id
|
53
53
|
|
@@ -38,7 +38,7 @@ module OoxmlParser
|
|
38
38
|
# Parse Numbering data
|
39
39
|
# @return [Numbering] result of parse
|
40
40
|
def parse
|
41
|
-
numbering_xml = "#{
|
41
|
+
numbering_xml = "#{root_object.unpacked_folder}word/numbering.xml"
|
42
42
|
return nil unless File.exist?(numbering_xml)
|
43
43
|
|
44
44
|
node = parse_xml(File.open(numbering_xml))
|
@@ -43,10 +43,10 @@ module OoxmlParser
|
|
43
43
|
# @param target [String] name of target
|
44
44
|
# @return [String] path to note xml file
|
45
45
|
def file_path(target)
|
46
|
-
file = "#{
|
46
|
+
file = "#{root_object.unpacked_folder}word/#{target}"
|
47
47
|
return file if File.exist?(file)
|
48
48
|
|
49
|
-
"#{
|
49
|
+
"#{root_object.unpacked_folder}#{target}" unless File.exist?(file)
|
50
50
|
end
|
51
51
|
end
|
52
52
|
end
|
@@ -63,8 +63,8 @@ module OoxmlParser
|
|
63
63
|
when 'cols'
|
64
64
|
@columns = Columns.new.parse(pg_size_subnode)
|
65
65
|
when 'headerReference', 'footerReference'
|
66
|
-
target =
|
67
|
-
|
66
|
+
target = root_object.get_link_from_rels(pg_size_subnode.attribute('id').value)
|
67
|
+
root_object.add_to_xmls_stack("word/#{target}")
|
68
68
|
note = Note.parse(default_paragraph: default_paragraph,
|
69
69
|
default_character: default_character,
|
70
70
|
target: target,
|
@@ -72,7 +72,7 @@ module OoxmlParser
|
|
72
72
|
type: File.basename(target).sub('.xml', ''),
|
73
73
|
parent: self)
|
74
74
|
@notes << note
|
75
|
-
|
75
|
+
root_object.xmls_stack.pop
|
76
76
|
when 'footnotePr'
|
77
77
|
@footnote_properties = FootnoteProperties.new(parent: self).parse(pg_size_subnode)
|
78
78
|
end
|
@@ -17,7 +17,7 @@ module OoxmlParser
|
|
17
17
|
# Parse styles data
|
18
18
|
# @return [Styles] result of parsing
|
19
19
|
def parse
|
20
|
-
doc = parse_xml("#{
|
20
|
+
doc = parse_xml("#{root_object.unpacked_folder}word/styles.xml")
|
21
21
|
doc.xpath('w:styles/*').each do |node_child|
|
22
22
|
case node_child.name
|
23
23
|
when 'docDefaults'
|
@@ -44,7 +44,7 @@ module OoxmlParser
|
|
44
44
|
# @return [CommentsExtended] extended comments
|
45
45
|
attr_accessor :comments_extended
|
46
46
|
|
47
|
-
def initialize
|
47
|
+
def initialize(params = {})
|
48
48
|
@elements = []
|
49
49
|
@notes = []
|
50
50
|
@document_properties = DocumentProperties.new
|
@@ -158,17 +158,16 @@ module OoxmlParser
|
|
158
158
|
# @return [DocumentStructure] parsed structure
|
159
159
|
def parse
|
160
160
|
@content_types = ContentTypes.new(parent: self).parse
|
161
|
-
|
162
|
-
OOXMLDocumentObject.xmls_stack = []
|
161
|
+
@root_subfolder = 'word/'
|
163
162
|
@comments = []
|
164
163
|
DocumentStructure.default_paragraph_style = DocxParagraph.new
|
165
164
|
DocumentStructure.default_run_style = DocxParagraphRun.new(parent: self)
|
166
|
-
@theme = PresentationTheme.parse('word/theme/theme1.xml')
|
167
|
-
@relationships = Relationships.new(parent: self).parse_file("#{
|
165
|
+
@theme = PresentationTheme.new(parent: self).parse('word/theme/theme1.xml')
|
166
|
+
@relationships = Relationships.new(parent: self).parse_file("#{root_object.unpacked_folder}word/_rels/document.xml.rels")
|
168
167
|
parse_styles
|
169
168
|
number = 0
|
170
|
-
|
171
|
-
doc = parse_xml(
|
169
|
+
root_object.add_to_xmls_stack('word/document.xml')
|
170
|
+
doc = parse_xml(root_object.current_xml)
|
172
171
|
doc.search('//w:document').each do |document|
|
173
172
|
document.xpath('w:background').each do |background|
|
174
173
|
@background = DocumentBackground.new(parent: self).parse(background)
|
@@ -201,12 +200,12 @@ module OoxmlParser
|
|
201
200
|
end
|
202
201
|
end
|
203
202
|
end
|
204
|
-
|
203
|
+
root_object.xmls_stack.pop
|
205
204
|
@document_properties = DocumentProperties.new(parent: self).parse
|
206
205
|
@comments = Comments.new(parent: self).parse
|
207
206
|
@comments_extended = CommentsExtended.new(parent: self).parse
|
208
207
|
@comments_document = Comments.new(parent: self,
|
209
|
-
file: "#{
|
208
|
+
file: "#{root_object.unpacked_folder}word/#{relationships.target_by_type('commentsDocument').first}")
|
210
209
|
.parse
|
211
210
|
@settings = DocumentSettings.new(parent: self).parse
|
212
211
|
self
|
@@ -215,7 +214,7 @@ module OoxmlParser
|
|
215
214
|
# Parse default style
|
216
215
|
# @return [void]
|
217
216
|
def parse_default_style
|
218
|
-
doc = parse_xml("#{
|
217
|
+
doc = parse_xml("#{root_object.unpacked_folder}word/styles.xml")
|
219
218
|
doc.search('//w:style').each do |style|
|
220
219
|
next if style.attribute('default').nil?
|
221
220
|
|
@@ -256,7 +255,7 @@ module OoxmlParser
|
|
256
255
|
|
257
256
|
# Perform parsing styles.xml
|
258
257
|
def parse_styles
|
259
|
-
file = "#{
|
258
|
+
file = "#{root_object.unpacked_folder}/word/styles.xml"
|
260
259
|
DocumentStructure.default_paragraph_style = DocxParagraph.new(parent: self)
|
261
260
|
DocumentStructure.default_table_paragraph_style = DocxParagraph.new(parent: self)
|
262
261
|
DocumentStructure.default_run_style = DocxParagraphRun.new(parent: self)
|
@@ -10,8 +10,9 @@ module OoxmlParser
|
|
10
10
|
# @param path_to_file [String] file path
|
11
11
|
# @return [DocumentStructure] result of parse
|
12
12
|
def self.parse_docx(path_to_file)
|
13
|
-
|
14
|
-
|
13
|
+
file = OoxmlFile.new(path_to_file)
|
14
|
+
Parser.parse_format(file) do |yielded_file|
|
15
|
+
DocumentStructure.new(unpacked_folder: yielded_file.path_to_folder).parse
|
15
16
|
end
|
16
17
|
end
|
17
18
|
end
|
@@ -15,7 +15,7 @@ module OoxmlParser
|
|
15
15
|
# Parse CommentAuthors object
|
16
16
|
# @param file [Nokogiri::XML:Element] node to parse
|
17
17
|
# @return [CommentAuthors] result of parsing
|
18
|
-
def parse(file = "#{
|
18
|
+
def parse(file = "#{root_object.unpacked_folder}/#{root_object.root_subfolder}/commentAuthors.xml")
|
19
19
|
return nil unless File.exist?(file)
|
20
20
|
|
21
21
|
document = parse_xml(File.open(file))
|
@@ -15,7 +15,7 @@ module OoxmlParser
|
|
15
15
|
# Parse PresentationComments object
|
16
16
|
# @param file [Nokogiri::XML:Element] node to parse
|
17
17
|
# @return [PresentationComments] result of parsing
|
18
|
-
def parse(file = "#{
|
18
|
+
def parse(file = "#{root_object.unpacked_folder}/#{root_object.root_subfolder}/comments/comment1.xml")
|
19
19
|
return nil unless File.exist?(file)
|
20
20
|
|
21
21
|
document = parse_xml(File.open(file))
|
@@ -9,46 +9,46 @@ module OoxmlParser
|
|
9
9
|
# @return [FontScheme] font scheme
|
10
10
|
attr_accessor :font_scheme
|
11
11
|
|
12
|
-
def initialize(
|
13
|
-
@name =
|
14
|
-
@color_scheme =
|
15
|
-
super
|
12
|
+
def initialize(parent: nil)
|
13
|
+
@name = ''
|
14
|
+
@color_scheme = {}
|
15
|
+
super
|
16
16
|
end
|
17
17
|
|
18
18
|
# Parse PresentationTheme
|
19
19
|
# @param file [String] path to file to parse
|
20
20
|
# @return [PresentationTheme] result of parsing
|
21
|
-
def
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
OOXMLDocumentObject.xmls_stack.pop
|
21
|
+
def parse(file)
|
22
|
+
root_object.add_to_xmls_stack(file)
|
23
|
+
unless File.exist?(root_object.current_xml)
|
24
|
+
root_object.xmls_stack.pop
|
26
25
|
return
|
27
26
|
end
|
28
|
-
doc =
|
27
|
+
doc = parse_xml(root_object.current_xml)
|
28
|
+
|
29
29
|
doc.xpath('a:theme').each do |theme_node|
|
30
|
-
|
30
|
+
@name = theme_node.attribute('name').value if theme_node.attribute('name')
|
31
31
|
theme_node.xpath('a:themeElements/*').each do |theme_element_node|
|
32
32
|
case theme_element_node.name
|
33
33
|
when 'clrScheme'
|
34
34
|
theme_element_node.xpath('*').each do |color_scheme_element|
|
35
|
-
|
35
|
+
@color_scheme[color_scheme_element.name.to_sym] = ThemeColor.new.parse(color_scheme_element)
|
36
36
|
end
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
37
|
+
@color_scheme[:background1] = @color_scheme[:lt1]
|
38
|
+
@color_scheme[:background2] = @color_scheme[:lt2]
|
39
|
+
@color_scheme[:bg1] = @color_scheme[:lt1]
|
40
|
+
@color_scheme[:bg2] = @color_scheme[:lt2]
|
41
|
+
@color_scheme[:text1] = @color_scheme[:dk1]
|
42
|
+
@color_scheme[:text2] = @color_scheme[:dk2]
|
43
|
+
@color_scheme[:tx1] = @color_scheme[:dk1]
|
44
|
+
@color_scheme[:tx2] = @color_scheme[:dk2]
|
45
45
|
when 'fontScheme'
|
46
|
-
|
46
|
+
@font_scheme = FontScheme.new(parent: self).parse(theme_element_node)
|
47
47
|
end
|
48
48
|
end
|
49
49
|
end
|
50
|
-
|
51
|
-
|
50
|
+
root_object.xmls_stack.pop
|
51
|
+
self
|
52
52
|
end
|
53
53
|
end
|
54
54
|
end
|
@@ -30,9 +30,9 @@ module OoxmlParser
|
|
30
30
|
when 'tbl'
|
31
31
|
graphic_data << Table.new(parent: self).parse(graphic_node_child)
|
32
32
|
when 'chart'
|
33
|
-
|
33
|
+
root_object.add_to_xmls_stack(root_object.get_link_from_rels(graphic_node_child.attribute('id').value))
|
34
34
|
graphic_data << Chart.new(parent: self).parse
|
35
|
-
|
35
|
+
root_object.xmls_stack.pop
|
36
36
|
when 'oleObj'
|
37
37
|
graphic_data << OleObject.new(parent: self).parse(graphic_node_child)
|
38
38
|
end
|
@@ -51,9 +51,9 @@ module OoxmlParser
|
|
51
51
|
# Parse Slide object
|
52
52
|
# @return [Slide] result of parsing
|
53
53
|
def parse
|
54
|
-
|
54
|
+
root_object.add_to_xmls_stack(@xml_path)
|
55
55
|
@name = File.basename(@xml_path, '.*')
|
56
|
-
node = parse_xml(
|
56
|
+
node = parse_xml(root_object.current_xml)
|
57
57
|
node.xpath('//p:sld/*').each do |node_child|
|
58
58
|
case node_child.name
|
59
59
|
when 'cSld'
|
@@ -66,8 +66,8 @@ module OoxmlParser
|
|
66
66
|
@alternate_content = PresentationAlternateContent.new(parent: self).parse(node_child)
|
67
67
|
end
|
68
68
|
end
|
69
|
-
|
70
|
-
@relationships = Relationships.new(parent: self).parse_file("#{
|
69
|
+
root_object.xmls_stack.pop
|
70
|
+
@relationships = Relationships.new(parent: self).parse_file("#{root_object.unpacked_folder}#{File.dirname(@xml_path)}/_rels/#{@name}.xml.rels")
|
71
71
|
parse_note
|
72
72
|
self
|
73
73
|
end
|
@@ -79,7 +79,7 @@ module OoxmlParser
|
|
79
79
|
notes_target = @relationships.target_by_type('notes')
|
80
80
|
return nil if notes_target.empty?
|
81
81
|
|
82
|
-
@note = PresentationNotes.new(parent: self).parse("#{
|
82
|
+
@note = PresentationNotes.new(parent: self).parse("#{root_object.unpacked_folder}#{File.dirname(@xml_path)}/#{notes_target.first}")
|
83
83
|
end
|
84
84
|
end
|
85
85
|
end
|
@@ -10,7 +10,7 @@ module OoxmlParser
|
|
10
10
|
# @param file [String] path to file to parse
|
11
11
|
# @return [SlideLayoutFile] result of parsing
|
12
12
|
def parse(file)
|
13
|
-
|
13
|
+
root_object.add_to_xmls_stack(file.gsub(root_object.unpacked_folder, ''))
|
14
14
|
doc = parse_xml(file)
|
15
15
|
doc.xpath('p:sldLayout/*').each do |node_child|
|
16
16
|
case node_child.name
|
@@ -18,7 +18,7 @@ module OoxmlParser
|
|
18
18
|
@common_slide_data = CommonSlideData.new(parent: self).parse(node_child)
|
19
19
|
end
|
20
20
|
end
|
21
|
-
|
21
|
+
root_object.xmls_stack.pop
|
22
22
|
self
|
23
23
|
end
|
24
24
|
end
|
@@ -5,7 +5,7 @@ module OoxmlParser
|
|
5
5
|
module SlideLayoutsHelper
|
6
6
|
# @return [Array<String>] list of slide layouts files
|
7
7
|
def slide_layouts_files
|
8
|
-
Dir["#{
|
8
|
+
Dir["#{root_object.unpacked_folder}ppt/slideLayouts/*.xml"]
|
9
9
|
end
|
10
10
|
|
11
11
|
private
|
@@ -10,7 +10,7 @@ module OoxmlParser
|
|
10
10
|
# @param file [String] path to file to parse
|
11
11
|
# @return [SlideMasterFile] result of parsing
|
12
12
|
def parse(file)
|
13
|
-
|
13
|
+
root_object.add_to_xmls_stack(file.gsub(root_object.unpacked_folder, ''))
|
14
14
|
doc = parse_xml(file)
|
15
15
|
doc.xpath('p:sldMaster/*').each do |node_child|
|
16
16
|
case node_child.name
|
@@ -18,7 +18,7 @@ module OoxmlParser
|
|
18
18
|
@common_slide_data = CommonSlideData.new(parent: self).parse(node_child)
|
19
19
|
end
|
20
20
|
end
|
21
|
-
|
21
|
+
root_object.xmls_stack.pop
|
22
22
|
self
|
23
23
|
end
|
24
24
|
end
|
@@ -5,7 +5,7 @@ module OoxmlParser
|
|
5
5
|
module SlideMastersHelper
|
6
6
|
# @return [Array<String>] list of slide masters files
|
7
7
|
def slide_masters_files
|
8
|
-
Dir["#{
|
8
|
+
Dir["#{root_object.unpacked_folder}ppt/slideMasters/*.xml"]
|
9
9
|
end
|
10
10
|
|
11
11
|
private
|
@@ -14,7 +14,7 @@ module OoxmlParser
|
|
14
14
|
# Parse TableStyles object
|
15
15
|
# @param file [Nokogiri::XML:Element] node to parse
|
16
16
|
# @return [TableStyles] result of parsing
|
17
|
-
def parse(file = "#{
|
17
|
+
def parse(file = "#{root_object.unpacked_folder}/#{root_object.root_subfolder}/tableStyles.xml")
|
18
18
|
return nil unless File.exist?(file)
|
19
19
|
|
20
20
|
document = parse_xml(file)
|
@@ -43,11 +43,10 @@ module OoxmlParser
|
|
43
43
|
# @return [Presentation] parsed presentation
|
44
44
|
def parse
|
45
45
|
@content_types = ContentTypes.new(parent: self).parse
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
@theme = PresentationTheme.parse('ppt/theme/theme1.xml')
|
46
|
+
@root_subfolder = 'ppt/'
|
47
|
+
root_object.add_to_xmls_stack('ppt/presentation.xml')
|
48
|
+
doc = parse_xml(root_object.current_xml)
|
49
|
+
@theme = PresentationTheme.new(parent: self).parse('ppt/theme/theme1.xml')
|
51
50
|
@table_styles = TableStyles.new(parent: self).parse
|
52
51
|
@comment_authors = CommentAuthors.new(parent: self).parse
|
53
52
|
@comments = PresentationComments.new(parent: self).parse
|
@@ -60,13 +59,13 @@ module OoxmlParser
|
|
60
59
|
presentation_node_child.xpath('p:sldId').each do |silde_id_node|
|
61
60
|
slide_id = silde_id_node.attr('r:id')
|
62
61
|
@slides << Slide.new(parent: self,
|
63
|
-
xml_path: "#{
|
62
|
+
xml_path: "#{root_object.root_subfolder}/#{root_object.get_link_from_rels(slide_id)}")
|
64
63
|
.parse
|
65
64
|
end
|
66
65
|
end
|
67
66
|
end
|
68
|
-
|
69
|
-
@relationships = Relationships.new(parent: self).parse_file("#{
|
67
|
+
root_object.xmls_stack.pop
|
68
|
+
@relationships = Relationships.new(parent: self).parse_file("#{root_object.unpacked_folder}/ppt/_rels/presentation.xml.rels")
|
70
69
|
parse_slide_layouts
|
71
70
|
parse_slide_masters
|
72
71
|
self
|
@@ -9,8 +9,9 @@ module OoxmlParser
|
|
9
9
|
# @param path_to_file [String] file path
|
10
10
|
# @return [Presentation] result of parse
|
11
11
|
def self.parse_pptx(path_to_file)
|
12
|
-
|
13
|
-
|
12
|
+
file = OoxmlFile.new(path_to_file)
|
13
|
+
Parser.parse_format(file) do |yielded_file|
|
14
|
+
Presentation.new(unpacked_folder: yielded_file.path_to_folder).parse
|
14
15
|
end
|
15
16
|
end
|
16
17
|
end
|
data/lib/ooxml_parser/version.rb
CHANGED
@@ -15,8 +15,8 @@ module OoxmlParser
|
|
15
15
|
# @param file [String] file to parse
|
16
16
|
# @return [Chartsheet] result of parsing
|
17
17
|
def parse(file)
|
18
|
-
|
19
|
-
doc = parse_xml(
|
18
|
+
root_object.add_to_xmls_stack(root_object.root_subfolder + file)
|
19
|
+
doc = parse_xml(root_object.current_xml)
|
20
20
|
node = doc.xpath('//xmlns:chartsheet').first
|
21
21
|
node.xpath('*').each do |node_child|
|
22
22
|
case node_child.name
|
@@ -26,7 +26,7 @@ module OoxmlParser
|
|
26
26
|
end
|
27
27
|
end
|
28
28
|
end
|
29
|
-
|
29
|
+
root_object.xmls_stack.pop
|
30
30
|
self
|
31
31
|
end
|
32
32
|
end
|
@@ -33,7 +33,7 @@ module OoxmlParser
|
|
33
33
|
# @return [PivotCacheDefinition] pivot cache definition for current pivot cache
|
34
34
|
def parse_pivot_cache_definition
|
35
35
|
definition_file = root_object.relationships.target_by_id(id)
|
36
|
-
full_file_path = "#{
|
36
|
+
full_file_path = "#{root_object.unpacked_folder}/xl/#{definition_file}"
|
37
37
|
@pivot_cache_definition = PivotCacheDefinition.new(parent: root_object)
|
38
38
|
.parse(full_file_path)
|
39
39
|
end
|
@@ -55,7 +55,7 @@ module OoxmlParser
|
|
55
55
|
# @param [String] file path
|
56
56
|
# @return [PivotTableDefinition] result of parsing
|
57
57
|
def parse(file)
|
58
|
-
doc = Nokogiri::XML.parse(File.open("#{
|
58
|
+
doc = Nokogiri::XML.parse(File.open("#{root_object.unpacked_folder}/#{file}"))
|
59
59
|
node = doc.xpath('//xmlns:pivotTableDefinition').first
|
60
60
|
node.attributes.each do |key, value|
|
61
61
|
case key
|
@@ -32,7 +32,7 @@ module OoxmlParser
|
|
32
32
|
# Parse StyleSheet object
|
33
33
|
# @return [StyleSheet] result of parsing
|
34
34
|
def parse
|
35
|
-
doc = parse_xml("#{
|
35
|
+
doc = parse_xml("#{root_object.unpacked_folder}/#{root_object.root_subfolder}/styles.xml")
|
36
36
|
doc.root.xpath('*').each do |node_child|
|
37
37
|
case node_child.name
|
38
38
|
when 'numFmts'
|
@@ -19,8 +19,8 @@ module OoxmlParser
|
|
19
19
|
# @param node [Nokogiri::XML:Element] node to parse
|
20
20
|
# @return [TablePart] result of parsing
|
21
21
|
def parse(node)
|
22
|
-
link_to_table_part_xml =
|
23
|
-
doc = parse_xml(
|
22
|
+
link_to_table_part_xml = root_object.get_link_from_rels(node.attribute('id').value)
|
23
|
+
doc = parse_xml(root_object.unpacked_folder + link_to_table_part_xml.gsub('..', 'xl'))
|
24
24
|
table_node = doc.xpath('xmlns:table').first
|
25
25
|
table_node.attributes.each do |key, value|
|
26
26
|
case key
|
@@ -61,9 +61,9 @@ module OoxmlParser
|
|
61
61
|
# Perform parsing of relationships
|
62
62
|
# @return [nil]
|
63
63
|
def parse_relationships
|
64
|
-
|
65
|
-
@relationships = Relationships.new(parent: self).parse_file(
|
66
|
-
|
64
|
+
root_object.add_to_xmls_stack("#{root_object.root_subfolder}/worksheets/_rels/#{@xml_name}.rels")
|
65
|
+
@relationships = Relationships.new(parent: self).parse_file(root_object.current_xml) if File.exist?(root_object.current_xml)
|
66
|
+
root_object.xmls_stack.pop
|
67
67
|
end
|
68
68
|
|
69
69
|
# @return [True, false] if structure contain any user data
|
@@ -79,7 +79,7 @@ module OoxmlParser
|
|
79
79
|
|
80
80
|
# Parse list of drawings in file
|
81
81
|
def parse_drawing
|
82
|
-
drawing_node = parse_xml(
|
82
|
+
drawing_node = parse_xml(root_object.current_xml)
|
83
83
|
drawing_node.xpath('xdr:wsDr/*').each do |drawing_node_child|
|
84
84
|
@drawings << XlsxDrawing.new(parent: self).parse(drawing_node_child)
|
85
85
|
end
|
@@ -91,8 +91,8 @@ module OoxmlParser
|
|
91
91
|
def parse(path_to_xml_file)
|
92
92
|
@xml_name = File.basename path_to_xml_file
|
93
93
|
parse_relationships
|
94
|
-
|
95
|
-
doc = parse_xml(
|
94
|
+
root_object.add_to_xmls_stack("#{root_object.root_subfolder}/worksheets/#{File.basename(path_to_xml_file)}")
|
95
|
+
doc = parse_xml(root_object.current_xml)
|
96
96
|
sheet = doc.search('//xmlns:worksheet').first
|
97
97
|
sheet.xpath('*').each do |worksheet_node_child|
|
98
98
|
case worksheet_node_child.name
|
@@ -107,11 +107,11 @@ module OoxmlParser
|
|
107
107
|
@merge << merge_node.attribute('ref').value.to_s
|
108
108
|
end
|
109
109
|
when 'drawing'
|
110
|
-
path_to_drawing =
|
110
|
+
path_to_drawing = root_object.get_link_from_rels(worksheet_node_child.attribute('id').value)
|
111
111
|
unless path_to_drawing.nil?
|
112
|
-
|
112
|
+
root_object.add_to_xmls_stack(path_to_drawing)
|
113
113
|
parse_drawing
|
114
|
-
|
114
|
+
root_object.xmls_stack.pop
|
115
115
|
end
|
116
116
|
when 'hyperlinks'
|
117
117
|
worksheet_node_child.xpath('xmlns:hyperlink').each do |hyperlink_node|
|
@@ -150,7 +150,7 @@ module OoxmlParser
|
|
150
150
|
end
|
151
151
|
end
|
152
152
|
parse_comments
|
153
|
-
|
153
|
+
root_object.xmls_stack.pop
|
154
154
|
self
|
155
155
|
end
|
156
156
|
|
@@ -175,7 +175,7 @@ module OoxmlParser
|
|
175
175
|
comments_target = relationships.target_by_type('comment')
|
176
176
|
return if comments_target.empty?
|
177
177
|
|
178
|
-
comments_file = "#{
|
178
|
+
comments_file = "#{root_object.unpacked_folder}/#{root_object.root_subfolder}/#{comments_target.first.gsub('..', '')}"
|
179
179
|
@comments = ExcelComments.new(parent: self).parse(comments_file)
|
180
180
|
end
|
181
181
|
end
|
@@ -109,7 +109,7 @@ module OoxmlParser
|
|
109
109
|
shared_strings_target = relationships.target_by_type('sharedString')
|
110
110
|
return if shared_strings_target.empty?
|
111
111
|
|
112
|
-
shared_string_file = "#{
|
112
|
+
shared_string_file = "#{root_object.unpacked_folder}/xl/#{shared_strings_target.first}"
|
113
113
|
@shared_strings_table = SharedStringTable.new(parent: self).parse(shared_string_file)
|
114
114
|
end
|
115
115
|
|
@@ -117,13 +117,12 @@ module OoxmlParser
|
|
117
117
|
# @return [XLSXWorkbook]
|
118
118
|
def parse
|
119
119
|
@content_types = ContentTypes.new(parent: self).parse
|
120
|
-
@relationships = Relationships.new(parent: self).parse_file("#{
|
120
|
+
@relationships = Relationships.new(parent: self).parse_file("#{root_object.unpacked_folder}xl/_rels/workbook.xml.rels")
|
121
121
|
parse_shared_strings
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
@
|
126
|
-
@theme = PresentationTheme.parse("xl/#{link_to_theme_xml}") if link_to_theme_xml
|
122
|
+
@root_subfolder = 'xl/'
|
123
|
+
root_object.add_to_xmls_stack('xl/workbook.xml')
|
124
|
+
@doc = Nokogiri::XML.parse(File.open(root_object.current_xml))
|
125
|
+
@theme = PresentationTheme.new(parent: self).parse("xl/#{link_to_theme_xml}") if link_to_theme_xml
|
127
126
|
@style_sheet = StyleSheet.new(parent: self).parse
|
128
127
|
@doc.xpath('xmlns:workbook/xmlns:sheets/xmlns:sheet').each do |sheet|
|
129
128
|
@sheets << Sheet.new(parent: self).parse(sheet)
|
@@ -139,7 +138,7 @@ module OoxmlParser
|
|
139
138
|
parse_pivot_table
|
140
139
|
parse_defined_names
|
141
140
|
parse_workbook_protection
|
142
|
-
|
141
|
+
root_object.xmls_stack.pop
|
143
142
|
self
|
144
143
|
end
|
145
144
|
|
@@ -9,8 +9,9 @@ module OoxmlParser
|
|
9
9
|
# @param path_to_file [String] file path
|
10
10
|
# @return [XLSXWorkbook] result of parse
|
11
11
|
def self.parse_xlsx(path_to_file)
|
12
|
-
|
13
|
-
|
12
|
+
file = OoxmlFile.new(path_to_file)
|
13
|
+
Parser.parse_format(file) do |yielded_file|
|
14
|
+
XLSXWorkbook.new(unpacked_folder: yielded_file.path_to_folder).parse
|
14
15
|
end
|
15
16
|
end
|
16
17
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ooxml_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.30.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ONLYOFFICE
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2022-09-
|
13
|
+
date: 2022-09-09 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: nokogiri
|
@@ -363,6 +363,7 @@ files:
|
|
363
363
|
- lib/ooxml_parser/common_parser/common_document_structure.rb
|
364
364
|
- lib/ooxml_parser/common_parser/parser.rb
|
365
365
|
- lib/ooxml_parser/common_parser/parser/encryption_checker.rb
|
366
|
+
- lib/ooxml_parser/common_parser/parser/ooxml_file.rb
|
366
367
|
- lib/ooxml_parser/configuration.rb
|
367
368
|
- lib/ooxml_parser/docx_parser.rb
|
368
369
|
- lib/ooxml_parser/docx_parser/document_structure.rb
|
@@ -607,7 +608,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
607
608
|
- !ruby/object:Gem::Version
|
608
609
|
version: '0'
|
609
610
|
requirements: []
|
610
|
-
rubygems_version: 3.3.
|
611
|
+
rubygems_version: 3.3.22
|
611
612
|
signing_key:
|
612
613
|
specification_version: 4
|
613
614
|
summary: OoxmlParser Gem
|