ooxml_parser 0.29.0 → 0.31.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ooxml_parser/common_parser/common_data/alternate_content/chart/chart.rb +4 -4
- data/lib/ooxml_parser/common_parser/common_data/alternate_content/drawing/graphic/docx_graphic.rb +2 -2
- data/lib/ooxml_parser/common_parser/common_data/alternate_content/drawing/graphic/picture/docx_blip/file_reference.rb +3 -10
- data/lib/ooxml_parser/common_parser/common_data/content_types.rb +1 -1
- data/lib/ooxml_parser/common_parser/common_data/coordinates.rb +9 -4
- data/lib/ooxml_parser/common_parser/common_data/hyperlink.rb +3 -3
- data/lib/ooxml_parser/common_parser/common_data/ooxml_document_object.rb +2 -91
- data/lib/ooxml_parser/common_parser/common_data/paragraph/paragraph_run/run_properties.rb +4 -0
- data/lib/ooxml_parser/common_parser/common_document_structure.rb +38 -0
- data/lib/ooxml_parser/common_parser/parser/ooxml_file.rb +68 -0
- data/lib/ooxml_parser/common_parser/parser.rb +32 -38
- data/lib/ooxml_parser/docx_parser/document_structure/comments.rb +1 -1
- data/lib/ooxml_parser/docx_parser/document_structure/comments_extended.rb +1 -1
- data/lib/ooxml_parser/docx_parser/document_structure/document_properties.rb +1 -1
- data/lib/ooxml_parser/docx_parser/document_structure/document_settings.rb +1 -1
- data/lib/ooxml_parser/docx_parser/document_structure/header_footer.rb +1 -1
- data/lib/ooxml_parser/docx_parser/document_structure/numbering.rb +2 -2
- data/lib/ooxml_parser/docx_parser/document_structure/page_properties/note.rb +2 -2
- data/lib/ooxml_parser/docx_parser/document_structure/page_properties/page_properties.rb +3 -3
- data/lib/ooxml_parser/docx_parser/document_structure/styles.rb +1 -1
- data/lib/ooxml_parser/docx_parser/document_structure.rb +10 -11
- data/lib/ooxml_parser/docx_parser.rb +3 -2
- data/lib/ooxml_parser/pptx_parser/presentation/comment_authors.rb +2 -2
- data/lib/ooxml_parser/pptx_parser/presentation/presentation_comments.rb +2 -2
- data/lib/ooxml_parser/pptx_parser/presentation/presentation_theme.rb +23 -23
- data/lib/ooxml_parser/pptx_parser/presentation/slide/graphic_frame/graphic_frame.rb +2 -2
- data/lib/ooxml_parser/pptx_parser/presentation/slide.rb +5 -5
- data/lib/ooxml_parser/pptx_parser/presentation/slide_layout_file.rb +2 -2
- data/lib/ooxml_parser/pptx_parser/presentation/slide_layouts_helper.rb +1 -1
- data/lib/ooxml_parser/pptx_parser/presentation/slide_master_file.rb +2 -2
- data/lib/ooxml_parser/pptx_parser/presentation/slide_masters_helper.rb +1 -1
- data/lib/ooxml_parser/pptx_parser/presentation/table_styles.rb +1 -1
- data/lib/ooxml_parser/pptx_parser/presentation.rb +7 -8
- data/lib/ooxml_parser/pptx_parser.rb +3 -2
- data/lib/ooxml_parser/version.rb +1 -1
- data/lib/ooxml_parser/xlsx_parser/workbook/chartsheet.rb +3 -3
- data/lib/ooxml_parser/xlsx_parser/workbook/pivot_cache.rb +1 -1
- data/lib/ooxml_parser/xlsx_parser/workbook/pivot_table_definition.rb +1 -1
- data/lib/ooxml_parser/xlsx_parser/workbook/style_sheet.rb +1 -1
- data/lib/ooxml_parser/xlsx_parser/workbook/worksheet/table_part.rb +2 -2
- data/lib/ooxml_parser/xlsx_parser/workbook/worksheet.rb +11 -11
- data/lib/ooxml_parser/xlsx_parser/workbook.rb +7 -8
- data/lib/ooxml_parser/xlsx_parser.rb +3 -2
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 682b6146d274c90e967ce9d494c4c6df0fafe597d9dc01fc27c3b8fa854630c2
|
4
|
+
data.tar.gz: a5f7c131398910ef896a0c0b21578083e02dbe84aa6d024bf4abd4e2fab2c1bb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: df07e615fd1ecc0c4436072b978f6f6ba8959a83785d88fe2a1922417c94daef2227ae4914f34683e3f9542ca35e68de2652c5cf78b2a52cf828d6f46b850aa2
|
7
|
+
data.tar.gz: 0fce9c7ca3617ef27c67acc338f8e2eaa7a2220d98de3d4367d4b4bcf89f445a1982e58fe2e3f4ec6aa1bca48e8ed533f7bed9ad0a7d64244620f47dd241b4c3
|
@@ -82,7 +82,7 @@ module OoxmlParser
|
|
82
82
|
# Parse Chart data
|
83
83
|
# @return [Chart] result of parsing
|
84
84
|
def parse
|
85
|
-
chart_xml = parse_xml(
|
85
|
+
chart_xml = parse_xml(root_object.current_xml)
|
86
86
|
chart_xml.xpath('*').each do |chart_node|
|
87
87
|
case chart_node.name
|
88
88
|
when 'chartSpace'
|
@@ -180,8 +180,8 @@ module OoxmlParser
|
|
180
180
|
|
181
181
|
# Parse relationship of chart
|
182
182
|
def parse_relationships
|
183
|
-
file_name = File.basename(
|
184
|
-
relationship_file = "#{
|
183
|
+
file_name = File.basename(root_object.current_xml)
|
184
|
+
relationship_file = "#{root_object.unpacked_folder}" \
|
185
185
|
'/word/charts/' \
|
186
186
|
"_rels/#{file_name}.rels"
|
187
187
|
|
@@ -198,7 +198,7 @@ module OoxmlParser
|
|
198
198
|
return if chart_relationship.empty?
|
199
199
|
|
200
200
|
chart_style_file = chart_relationship.first
|
201
|
-
style_file = "#{
|
201
|
+
style_file = "#{root_object.unpacked_folder}" \
|
202
202
|
"/word/charts/#{chart_style_file}"
|
203
203
|
|
204
204
|
@style = ChartStyleFile.new(parent: self).parse(style_file)
|
data/lib/ooxml_parser/common_parser/common_data/alternate_content/drawing/graphic/docx_graphic.rb
CHANGED
@@ -23,9 +23,9 @@ module OoxmlParser
|
|
23
23
|
@data = DocxPicture.new(parent: self).parse(node_child)
|
24
24
|
when 'chart'
|
25
25
|
@type = :chart
|
26
|
-
|
26
|
+
root_object.add_to_xmls_stack("#{root_object.root_subfolder}/#{root_object.get_link_from_rels(node_child.attribute('id').value)}")
|
27
27
|
@data = Chart.new(parent: self).parse
|
28
|
-
|
28
|
+
root_object.xmls_stack.pop
|
29
29
|
when 'wgp'
|
30
30
|
@type = :group
|
31
31
|
@data = ShapesGrouping.new(parent: self).parse(node_child)
|
@@ -25,7 +25,7 @@ module OoxmlParser
|
|
25
25
|
return self unless @resource_id
|
26
26
|
return self if @resource_id.empty?
|
27
27
|
|
28
|
-
@path =
|
28
|
+
@path = root_object.get_link_from_rels(@resource_id)
|
29
29
|
if !@path || @path.empty?
|
30
30
|
warn "Cant find path to media file by id: #{@resource_id}"
|
31
31
|
return self
|
@@ -33,7 +33,7 @@ module OoxmlParser
|
|
33
33
|
return self if @path == 'NULL'
|
34
34
|
return self if @path.match?(URI::DEFAULT_PARSER.make_regexp)
|
35
35
|
|
36
|
-
full_path_to_file =
|
36
|
+
full_path_to_file = root_object.unpacked_folder + root_object.root_subfolder + @path.gsub('..', '')
|
37
37
|
if File.exist?(full_path_to_file)
|
38
38
|
@content = if File.extname(@path) == '.xlsx'
|
39
39
|
parse_ole_xlsx(full_path_to_file)
|
@@ -52,14 +52,7 @@ module OoxmlParser
|
|
52
52
|
# @param [String] full_path to file
|
53
53
|
# @return [XLSXWorkbook]
|
54
54
|
def parse_ole_xlsx(full_path)
|
55
|
-
|
56
|
-
# by replacing all global variables
|
57
|
-
stack = OOXMLDocumentObject.xmls_stack
|
58
|
-
dir = OOXMLDocumentObject.path_to_folder
|
59
|
-
result = OoxmlParser::Parser.parse(full_path)
|
60
|
-
OOXMLDocumentObject.xmls_stack = stack
|
61
|
-
OOXMLDocumentObject.path_to_folder = dir
|
62
|
-
result
|
55
|
+
OoxmlParser::XlsxParser.parse_xlsx(full_path)
|
63
56
|
end
|
64
57
|
end
|
65
58
|
end
|
@@ -21,7 +21,7 @@ module OoxmlParser
|
|
21
21
|
# Parse ContentTypes object
|
22
22
|
# @return [ContentTypes] result of parsing
|
23
23
|
def parse
|
24
|
-
doc =
|
24
|
+
doc = parse_xml("#{root_object.unpacked_folder}/[Content_Types].xml")
|
25
25
|
node = doc.xpath('*').first
|
26
26
|
|
27
27
|
node.xpath('*').each do |node_child|
|
@@ -3,6 +3,11 @@
|
|
3
3
|
module OoxmlParser
|
4
4
|
# Class for working with coordinates
|
5
5
|
class Coordinates
|
6
|
+
# @return [Regexp] regexp for row name
|
7
|
+
ROW_REGEXP = /[a-z]/i.freeze
|
8
|
+
# @return [Regexp] regexp for column name
|
9
|
+
COLUMN_REGEXP = /\d/.freeze
|
10
|
+
|
6
11
|
attr_accessor :row, :column, :list
|
7
12
|
|
8
13
|
def initialize(row = nil, column = nil, list = nil)
|
@@ -30,10 +35,10 @@ module OoxmlParser
|
|
30
35
|
range = arguments_string.split(':')
|
31
36
|
|
32
37
|
difference = []
|
33
|
-
symbols_from = range.first.scan(
|
34
|
-
symbols_to = range.last.scan(
|
35
|
-
digits_from = range.first.scan(
|
36
|
-
digits_to = range.last.scan(
|
38
|
+
symbols_from = range.first.scan(ROW_REGEXP).join
|
39
|
+
symbols_to = range.last.scan(ROW_REGEXP).join
|
40
|
+
digits_from = range.first.scan(COLUMN_REGEXP).join
|
41
|
+
digits_to = range.last.scan(COLUMN_REGEXP).join
|
37
42
|
|
38
43
|
difference[0] = [symbols_from, symbols_to] unless symbols_from == symbols_to
|
39
44
|
difference[1] = [digits_from, digits_to] unless digits_from == digits_to
|
@@ -46,7 +46,7 @@ module OoxmlParser
|
|
46
46
|
@url = Coordinates.new.parse_string(value.value)
|
47
47
|
when 'id'
|
48
48
|
@id = value.value
|
49
|
-
@url =
|
49
|
+
@url = root_object.get_link_from_rels(@id) unless @id.empty?
|
50
50
|
when 'tooltip'
|
51
51
|
@tooltip = value.value
|
52
52
|
when 'ref'
|
@@ -80,7 +80,7 @@ module OoxmlParser
|
|
80
80
|
else
|
81
81
|
if meaningful_id?
|
82
82
|
@action = :external_link
|
83
|
-
@url =
|
83
|
+
@url = root_object.get_link_from_rels(@id)
|
84
84
|
end
|
85
85
|
end
|
86
86
|
self
|
@@ -98,7 +98,7 @@ module OoxmlParser
|
|
98
98
|
def parse_url_for_slide_link
|
99
99
|
return unless meaningful_id?
|
100
100
|
|
101
|
-
@url =
|
101
|
+
@url = root_object.get_link_from_rels(@id).scan(/\d+/).join.to_i
|
102
102
|
end
|
103
103
|
end
|
104
104
|
end
|
@@ -28,6 +28,8 @@ module OoxmlParser
|
|
28
28
|
|
29
29
|
instance_variables.each do |current_attribute|
|
30
30
|
next if current_attribute == :@parent
|
31
|
+
next if instance_variable_get(current_attribute).is_a?(Nokogiri::XML::Element)
|
32
|
+
|
31
33
|
return false unless instance_variable_get(current_attribute) == other.instance_variable_get(current_attribute)
|
32
34
|
end
|
33
35
|
true
|
@@ -47,96 +49,5 @@ module OoxmlParser
|
|
47
49
|
end
|
48
50
|
xml
|
49
51
|
end
|
50
|
-
|
51
|
-
class << self
|
52
|
-
# @return [String] path to root subfolder
|
53
|
-
attr_accessor :root_subfolder
|
54
|
-
# @return [PresentationTheme] list of themes
|
55
|
-
attr_accessor :theme
|
56
|
-
# @return [Array<String>] stack of xmls
|
57
|
-
attr_accessor :xmls_stack
|
58
|
-
# @return [String] path to root folder
|
59
|
-
attr_accessor :path_to_folder
|
60
|
-
|
61
|
-
# Copy this file and rename to zip
|
62
|
-
# @param path [String] path to file
|
63
|
-
# @return [String] path to result zip
|
64
|
-
def copy_file_and_rename_to_zip(path)
|
65
|
-
file_name = File.basename(path)
|
66
|
-
tmp_folder = Dir.mktmpdir('ruby-ooxml-parser')
|
67
|
-
file_path = "#{tmp_folder}/#{file_name}"
|
68
|
-
FileUtils.rm_rf(tmp_folder) if File.directory?(tmp_folder)
|
69
|
-
FileUtils.mkdir_p(tmp_folder)
|
70
|
-
raise "Cannot find file by path #{path}" unless File.exist?(path)
|
71
|
-
|
72
|
-
FileUtils.cp path, tmp_folder
|
73
|
-
file_path
|
74
|
-
end
|
75
|
-
|
76
|
-
# Decrypt file protected with password
|
77
|
-
# @param path [String] path to file
|
78
|
-
# @param password [String] password to file
|
79
|
-
# @return [String] path to decrypted file
|
80
|
-
def decrypt_file(path, password)
|
81
|
-
file_name = File.basename(path)
|
82
|
-
tmp_folder = Dir.mktmpdir('ruby-ooxml-parser')
|
83
|
-
decrypted_path = "#{tmp_folder}/#{file_name}"
|
84
|
-
binary_password = password.encode('utf-16le').bytes.pack('c*').encode('binary')
|
85
|
-
OoxmlDecrypt::EncryptedFile.decrypt_to_file(path, binary_password, decrypted_path)
|
86
|
-
|
87
|
-
decrypted_path
|
88
|
-
end
|
89
|
-
|
90
|
-
# Unzip specified file
|
91
|
-
# @param path_to_file [String] path to zip file
|
92
|
-
# @param destination [String] folder to extract
|
93
|
-
# @return [void]
|
94
|
-
def unzip_file(path_to_file, destination)
|
95
|
-
Zip.warn_invalid_date = false
|
96
|
-
Zip::File.open(path_to_file) do |zip_file|
|
97
|
-
raise LoadError, "There is no files in zip #{path_to_file}" if zip_file.entries.empty?
|
98
|
-
|
99
|
-
zip_file.each do |file|
|
100
|
-
file_path = File.join(destination, file.name)
|
101
|
-
FileUtils.mkdir_p(File.dirname(file_path))
|
102
|
-
zip_file.extract(file, file_path) unless File.exist?(file_path)
|
103
|
-
end
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
|
-
# @return [String] dir to base of file
|
108
|
-
def dir
|
109
|
-
"#{OOXMLDocumentObject.path_to_folder}#{File.dirname(OOXMLDocumentObject.xmls_stack.last)}/"
|
110
|
-
end
|
111
|
-
|
112
|
-
# @return [String] path to current xml file
|
113
|
-
def current_xml
|
114
|
-
OOXMLDocumentObject.path_to_folder + OOXMLDocumentObject.xmls_stack.last
|
115
|
-
end
|
116
|
-
|
117
|
-
# Add file to parsing stack
|
118
|
-
# @param path [String] path of file to add to stack
|
119
|
-
# @return [void]
|
120
|
-
def add_to_xmls_stack(path)
|
121
|
-
OOXMLDocumentObject.xmls_stack << if path.include?('..')
|
122
|
-
"#{File.dirname(OOXMLDocumentObject.xmls_stack.last)}/#{path}"
|
123
|
-
elsif path.start_with?(OOXMLDocumentObject.root_subfolder)
|
124
|
-
path
|
125
|
-
else
|
126
|
-
OOXMLDocumentObject.root_subfolder + path
|
127
|
-
end
|
128
|
-
end
|
129
|
-
|
130
|
-
# Get link to file from rels file
|
131
|
-
# @param id [String] file to get
|
132
|
-
# @return [String] result
|
133
|
-
def get_link_from_rels(id)
|
134
|
-
rels_path = dir + "_rels/#{File.basename(OOXMLDocumentObject.xmls_stack.last)}.rels"
|
135
|
-
raise LoadError, "Cannot find .rels file by path: #{rels_path}" unless File.exist?(rels_path)
|
136
|
-
|
137
|
-
relationships = Relationships.new.parse_file(rels_path)
|
138
|
-
relationships.target_by_id(id)
|
139
|
-
end
|
140
|
-
end
|
141
52
|
end
|
142
53
|
end
|
@@ -47,6 +47,8 @@ module OoxmlParser
|
|
47
47
|
attr_accessor :shade
|
48
48
|
# @return [RunStyle] run style
|
49
49
|
attr_accessor :run_style
|
50
|
+
# @return [ValuedChild] ligatures type
|
51
|
+
attr_reader :ligatures
|
50
52
|
|
51
53
|
def initialize(params = {})
|
52
54
|
@font_name = params.fetch(:font_name, '')
|
@@ -124,6 +126,8 @@ module OoxmlParser
|
|
124
126
|
@shade = Shade.new(parent: self).parse(node_child)
|
125
127
|
when 'rStyle'
|
126
128
|
@run_style = RunStyle.new(parent: self).parse(node_child)
|
129
|
+
when 'ligatures'
|
130
|
+
@ligatures = ValuedChild.new(:symbol, parent: self).parse(node_child)
|
127
131
|
end
|
128
132
|
end
|
129
133
|
@font_color = DocxColorScheme.new(parent: self).parse(node)
|
@@ -14,12 +14,50 @@ module OoxmlParser
|
|
14
14
|
attr_accessor :default_font_style
|
15
15
|
# @return [ContentTypes] data about content types
|
16
16
|
attr_accessor :content_types
|
17
|
+
# @return [String] root sub-folder for object
|
18
|
+
attr_reader :root_subfolder
|
19
|
+
# @return [String] path to folder with unpacked document
|
20
|
+
attr_reader :unpacked_folder
|
21
|
+
# @return [Array<String>] list of xmls to parse
|
22
|
+
attr_accessor :xmls_stack
|
17
23
|
|
18
24
|
def initialize(params = {})
|
19
25
|
@default_font_size = params.fetch(:default_font_size, 18)
|
20
26
|
@default_font_typeface = params.fetch(:default_font_typeface, 'Arial')
|
21
27
|
@default_font_style = FontStyle.new
|
28
|
+
@unpacked_folder = params.fetch(:unpacked_folder, nil)
|
29
|
+
@xmls_stack = []
|
22
30
|
super(parent: nil)
|
23
31
|
end
|
32
|
+
|
33
|
+
# @return [String] path to current xml file
|
34
|
+
def current_xml
|
35
|
+
root_object.unpacked_folder + @xmls_stack.last
|
36
|
+
end
|
37
|
+
|
38
|
+
# Add file to parsing stack
|
39
|
+
# @param path [String] path of file to add to stack
|
40
|
+
# @return [void]
|
41
|
+
def add_to_xmls_stack(path)
|
42
|
+
@xmls_stack << if path.include?('..')
|
43
|
+
"#{File.dirname(@xmls_stack.last)}/#{path}"
|
44
|
+
elsif path.start_with?(@root_subfolder)
|
45
|
+
path
|
46
|
+
else
|
47
|
+
@root_subfolder + path
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Get link to file from rels file
|
52
|
+
# @param id [String] file to get
|
53
|
+
# @return [String] result
|
54
|
+
def get_link_from_rels(id)
|
55
|
+
dir = "#{unpacked_folder}#{File.dirname(@xmls_stack.last)}/"
|
56
|
+
rels_path = dir + "_rels/#{File.basename(@xmls_stack.last)}.rels"
|
57
|
+
raise LoadError, "Cannot find .rels file by path: #{rels_path}" unless File.exist?(rels_path)
|
58
|
+
|
59
|
+
relationships = Relationships.new.parse_file(rels_path)
|
60
|
+
relationships.target_by_id(id)
|
61
|
+
end
|
24
62
|
end
|
25
63
|
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module OoxmlParser
|
4
|
+
# Class for actions with OOXML file
|
5
|
+
class OoxmlFile
|
6
|
+
# @return [String] path to file
|
7
|
+
attr_reader :path
|
8
|
+
|
9
|
+
def initialize(path)
|
10
|
+
@path = path
|
11
|
+
end
|
12
|
+
|
13
|
+
# Copy this file and rename to zip
|
14
|
+
# @return [String] path to result zip
|
15
|
+
def copy_file_and_rename_to_zip
|
16
|
+
file_name = File.basename(@path)
|
17
|
+
tmp_folder = Dir.mktmpdir('ruby-ooxml-parser')
|
18
|
+
@zip_path = "#{tmp_folder}/#{file_name}"
|
19
|
+
FileUtils.rm_rf(tmp_folder) if File.directory?(tmp_folder)
|
20
|
+
FileUtils.mkdir_p(tmp_folder)
|
21
|
+
raise "Cannot find file by path #{@path}" unless File.exist?(@path)
|
22
|
+
|
23
|
+
FileUtils.cp path, tmp_folder
|
24
|
+
end
|
25
|
+
|
26
|
+
# @return [String] path to folder with zip
|
27
|
+
def path_to_folder
|
28
|
+
@zip_path.sub(File.basename(@zip_path), '')
|
29
|
+
end
|
30
|
+
|
31
|
+
# Unzip specified file
|
32
|
+
# @return [void]
|
33
|
+
def unzip
|
34
|
+
Zip.warn_invalid_date = false
|
35
|
+
Zip::File.open(@zip_path) do |zip_file|
|
36
|
+
raise LoadError, "There is no files in zip #{@zip_path}" if zip_file.entries.empty?
|
37
|
+
|
38
|
+
zip_file.each do |file|
|
39
|
+
file_path = File.join(path_to_folder, file.name)
|
40
|
+
FileUtils.mkdir_p(File.dirname(file_path))
|
41
|
+
zip_file.extract(file, file_path) unless File.exist?(file_path)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# @return [Symbol] file type recognized by folder structure
|
47
|
+
def format_by_folders
|
48
|
+
return :docx if Dir.exist?("#{path_to_folder}/word")
|
49
|
+
return :xlsx if Dir.exist?("#{path_to_folder}/xl")
|
50
|
+
return :pptx if Dir.exist?("#{path_to_folder}/ppt")
|
51
|
+
|
52
|
+
:zip
|
53
|
+
end
|
54
|
+
|
55
|
+
# Decrypt file protected with password
|
56
|
+
# @param password [String] password to file
|
57
|
+
# @return [OoxmlFile] path to decrypted file
|
58
|
+
def decrypt(password)
|
59
|
+
file_name = File.basename(@path)
|
60
|
+
tmp_folder = Dir.mktmpdir('ruby-ooxml-parser')
|
61
|
+
decrypted_path = "#{tmp_folder}/#{file_name}"
|
62
|
+
binary_password = password.encode('utf-16le').bytes.pack('c*').encode('binary')
|
63
|
+
OoxmlDecrypt::EncryptedFile.decrypt_to_file(@path, binary_password, decrypted_path)
|
64
|
+
|
65
|
+
OoxmlFile.new(decrypted_path)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -1,52 +1,46 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative 'parser/encryption_checker'
|
4
|
+
require_relative 'parser/ooxml_file'
|
4
5
|
|
5
6
|
module OoxmlParser
|
6
7
|
# Basic class for OoxmlParser
|
7
8
|
class Parser
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
9
|
+
class << self
|
10
|
+
# Base method to yield parse document of any type
|
11
|
+
# @param [OoxmlFile] file with data
|
12
|
+
# @return [CommonDocumentStructure] structure of doc
|
13
|
+
def parse_format(file)
|
14
|
+
return nil if EncryptionChecker.new(file.path).encrypted?
|
13
15
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
end
|
16
|
+
file.copy_file_and_rename_to_zip
|
17
|
+
file.unzip
|
18
|
+
model = yield(file)
|
19
|
+
model.file_path = file.path if model
|
20
|
+
FileUtils.rm_rf(file.path_to_folder)
|
21
|
+
model
|
22
|
+
end
|
22
23
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
24
|
+
# Base method to parse document of any type
|
25
|
+
# @param path_to_file [String] file
|
26
|
+
# @return [CommonDocumentStructure] structure of doc
|
27
|
+
def parse(path_to_file, password: nil)
|
28
|
+
file = OoxmlFile.new(path_to_file)
|
29
|
+
file = file.decrypt(password) if password
|
30
|
+
Parser.parse_format(file) do |yielded_file|
|
31
|
+
format = yielded_file.format_by_folders
|
32
|
+
case format
|
33
|
+
when :docx
|
34
|
+
DocumentStructure.new(unpacked_folder: yielded_file.path_to_folder).parse
|
35
|
+
when :xlsx
|
36
|
+
XLSXWorkbook.new(unpacked_folder: yielded_file.path_to_folder).parse
|
37
|
+
when :pptx
|
38
|
+
Presentation.new(unpacked_folder: yielded_file.path_to_folder).parse
|
39
|
+
else
|
40
|
+
warn "#{path_to_file} is a simple zip file without OOXML content"
|
41
|
+
end
|
39
42
|
end
|
40
43
|
end
|
41
44
|
end
|
42
|
-
|
43
|
-
# Recognize folder format
|
44
|
-
# @param directory [String] path to dirctory
|
45
|
-
# @return [Symbol] type of document
|
46
|
-
def self.recognize_folder_format(directory = OOXMLDocumentObject.path_to_folder)
|
47
|
-
return :docx if Dir.exist?("#{directory}/word")
|
48
|
-
return :xlsx if Dir.exist?("#{directory}/xl")
|
49
|
-
return :pptx if Dir.exist?("#{directory}/ppt")
|
50
|
-
end
|
51
45
|
end
|
52
46
|
end
|
@@ -9,8 +9,8 @@ module OoxmlParser
|
|
9
9
|
|
10
10
|
def initialize(params = {})
|
11
11
|
@comments_array = []
|
12
|
-
@file = params.fetch(:file, "#{OOXMLDocumentObject.path_to_folder}word/comments.xml")
|
13
12
|
super(parent: params[:parent])
|
13
|
+
@file = params.fetch(:file, "#{root_object.unpacked_folder}word/comments.xml")
|
14
14
|
end
|
15
15
|
|
16
16
|
# @return [Comment] accessor
|
@@ -17,7 +17,7 @@ module OoxmlParser
|
|
17
17
|
# Parse CommentsExtended object
|
18
18
|
# @return [CommentsExtended] result of parsing
|
19
19
|
def parse
|
20
|
-
file_to_parse = "#{
|
20
|
+
file_to_parse = "#{root_object.unpacked_folder}word/commentsExtended.xml"
|
21
21
|
return nil unless File.exist?(file_to_parse)
|
22
22
|
|
23
23
|
doc = parse_xml(file_to_parse)
|
@@ -8,7 +8,7 @@ module OoxmlParser
|
|
8
8
|
# Parse Document properties
|
9
9
|
# @return [DocumentProperties]
|
10
10
|
def parse
|
11
|
-
properties_file = "#{
|
11
|
+
properties_file = "#{root_object.unpacked_folder}docProps/app.xml"
|
12
12
|
unless File.exist?(properties_file)
|
13
13
|
warn "There is no 'docProps/app.xml' in docx. It may be some problem with it"
|
14
14
|
return self
|
@@ -9,7 +9,7 @@ module OoxmlParser
|
|
9
9
|
# Parse Settings object
|
10
10
|
# @return [DocumentSettings] result of parsing
|
11
11
|
def parse
|
12
|
-
settings_path = "#{
|
12
|
+
settings_path = "#{root_object.unpacked_folder}word/settings.xml"
|
13
13
|
return nil unless File.exist?(settings_path)
|
14
14
|
|
15
15
|
doc = parse_xml(settings_path)
|
@@ -47,7 +47,7 @@ module OoxmlParser
|
|
47
47
|
def parse(node)
|
48
48
|
@id = node.attribute('id').value.to_i
|
49
49
|
parse_type(node)
|
50
|
-
doc = parse_xml(
|
50
|
+
doc = parse_xml(root_object.unpacked_folder + xml_path)
|
51
51
|
doc.search(xpath_for_search).each do |footnote|
|
52
52
|
next unless footnote.attribute('id').value.to_i == @id
|
53
53
|
|
@@ -38,10 +38,10 @@ module OoxmlParser
|
|
38
38
|
# Parse Numbering data
|
39
39
|
# @return [Numbering] result of parse
|
40
40
|
def parse
|
41
|
-
numbering_xml = "#{
|
41
|
+
numbering_xml = "#{root_object.unpacked_folder}word/numbering.xml"
|
42
42
|
return nil unless File.exist?(numbering_xml)
|
43
43
|
|
44
|
-
node = parse_xml(
|
44
|
+
node = parse_xml(numbering_xml)
|
45
45
|
node.xpath('w:numbering/*').each do |numbering_child_node|
|
46
46
|
case numbering_child_node.name
|
47
47
|
when 'abstractNum'
|
@@ -43,10 +43,10 @@ module OoxmlParser
|
|
43
43
|
# @param target [String] name of target
|
44
44
|
# @return [String] path to note xml file
|
45
45
|
def file_path(target)
|
46
|
-
file = "#{
|
46
|
+
file = "#{root_object.unpacked_folder}word/#{target}"
|
47
47
|
return file if File.exist?(file)
|
48
48
|
|
49
|
-
"#{
|
49
|
+
"#{root_object.unpacked_folder}#{target}" unless File.exist?(file)
|
50
50
|
end
|
51
51
|
end
|
52
52
|
end
|
@@ -63,8 +63,8 @@ module OoxmlParser
|
|
63
63
|
when 'cols'
|
64
64
|
@columns = Columns.new.parse(pg_size_subnode)
|
65
65
|
when 'headerReference', 'footerReference'
|
66
|
-
target =
|
67
|
-
|
66
|
+
target = root_object.get_link_from_rels(pg_size_subnode.attribute('id').value)
|
67
|
+
root_object.add_to_xmls_stack("word/#{target}")
|
68
68
|
note = Note.parse(default_paragraph: default_paragraph,
|
69
69
|
default_character: default_character,
|
70
70
|
target: target,
|
@@ -72,7 +72,7 @@ module OoxmlParser
|
|
72
72
|
type: File.basename(target).sub('.xml', ''),
|
73
73
|
parent: self)
|
74
74
|
@notes << note
|
75
|
-
|
75
|
+
root_object.xmls_stack.pop
|
76
76
|
when 'footnotePr'
|
77
77
|
@footnote_properties = FootnoteProperties.new(parent: self).parse(pg_size_subnode)
|
78
78
|
end
|
@@ -17,7 +17,7 @@ module OoxmlParser
|
|
17
17
|
# Parse styles data
|
18
18
|
# @return [Styles] result of parsing
|
19
19
|
def parse
|
20
|
-
doc = parse_xml("#{
|
20
|
+
doc = parse_xml("#{root_object.unpacked_folder}word/styles.xml")
|
21
21
|
doc.xpath('w:styles/*').each do |node_child|
|
22
22
|
case node_child.name
|
23
23
|
when 'docDefaults'
|
@@ -44,7 +44,7 @@ module OoxmlParser
|
|
44
44
|
# @return [CommentsExtended] extended comments
|
45
45
|
attr_accessor :comments_extended
|
46
46
|
|
47
|
-
def initialize
|
47
|
+
def initialize(params = {})
|
48
48
|
@elements = []
|
49
49
|
@notes = []
|
50
50
|
@document_properties = DocumentProperties.new
|
@@ -158,17 +158,16 @@ module OoxmlParser
|
|
158
158
|
# @return [DocumentStructure] parsed structure
|
159
159
|
def parse
|
160
160
|
@content_types = ContentTypes.new(parent: self).parse
|
161
|
-
|
162
|
-
OOXMLDocumentObject.xmls_stack = []
|
161
|
+
@root_subfolder = 'word/'
|
163
162
|
@comments = []
|
164
163
|
DocumentStructure.default_paragraph_style = DocxParagraph.new
|
165
164
|
DocumentStructure.default_run_style = DocxParagraphRun.new(parent: self)
|
166
|
-
@theme = PresentationTheme.parse('word/theme/theme1.xml')
|
167
|
-
@relationships = Relationships.new(parent: self).parse_file("#{
|
165
|
+
@theme = PresentationTheme.new(parent: self).parse('word/theme/theme1.xml')
|
166
|
+
@relationships = Relationships.new(parent: self).parse_file("#{root_object.unpacked_folder}word/_rels/document.xml.rels")
|
168
167
|
parse_styles
|
169
168
|
number = 0
|
170
|
-
|
171
|
-
doc = parse_xml(
|
169
|
+
root_object.add_to_xmls_stack('word/document.xml')
|
170
|
+
doc = parse_xml(root_object.current_xml)
|
172
171
|
doc.search('//w:document').each do |document|
|
173
172
|
document.xpath('w:background').each do |background|
|
174
173
|
@background = DocumentBackground.new(parent: self).parse(background)
|
@@ -201,12 +200,12 @@ module OoxmlParser
|
|
201
200
|
end
|
202
201
|
end
|
203
202
|
end
|
204
|
-
|
203
|
+
root_object.xmls_stack.pop
|
205
204
|
@document_properties = DocumentProperties.new(parent: self).parse
|
206
205
|
@comments = Comments.new(parent: self).parse
|
207
206
|
@comments_extended = CommentsExtended.new(parent: self).parse
|
208
207
|
@comments_document = Comments.new(parent: self,
|
209
|
-
file: "#{
|
208
|
+
file: "#{root_object.unpacked_folder}word/#{relationships.target_by_type('commentsDocument').first}")
|
210
209
|
.parse
|
211
210
|
@settings = DocumentSettings.new(parent: self).parse
|
212
211
|
self
|
@@ -215,7 +214,7 @@ module OoxmlParser
|
|
215
214
|
# Parse default style
|
216
215
|
# @return [void]
|
217
216
|
def parse_default_style
|
218
|
-
doc = parse_xml("#{
|
217
|
+
doc = parse_xml("#{root_object.unpacked_folder}word/styles.xml")
|
219
218
|
doc.search('//w:style').each do |style|
|
220
219
|
next if style.attribute('default').nil?
|
221
220
|
|
@@ -256,7 +255,7 @@ module OoxmlParser
|
|
256
255
|
|
257
256
|
# Perform parsing styles.xml
|
258
257
|
def parse_styles
|
259
|
-
file = "#{
|
258
|
+
file = "#{root_object.unpacked_folder}/word/styles.xml"
|
260
259
|
DocumentStructure.default_paragraph_style = DocxParagraph.new(parent: self)
|
261
260
|
DocumentStructure.default_table_paragraph_style = DocxParagraph.new(parent: self)
|
262
261
|
DocumentStructure.default_run_style = DocxParagraphRun.new(parent: self)
|
@@ -10,8 +10,9 @@ module OoxmlParser
|
|
10
10
|
# @param path_to_file [String] file path
|
11
11
|
# @return [DocumentStructure] result of parse
|
12
12
|
def self.parse_docx(path_to_file)
|
13
|
-
|
14
|
-
|
13
|
+
file = OoxmlFile.new(path_to_file)
|
14
|
+
Parser.parse_format(file) do |yielded_file|
|
15
|
+
DocumentStructure.new(unpacked_folder: yielded_file.path_to_folder).parse
|
15
16
|
end
|
16
17
|
end
|
17
18
|
end
|
@@ -15,10 +15,10 @@ module OoxmlParser
|
|
15
15
|
# Parse CommentAuthors object
|
16
16
|
# @param file [Nokogiri::XML:Element] node to parse
|
17
17
|
# @return [CommentAuthors] result of parsing
|
18
|
-
def parse(file = "#{
|
18
|
+
def parse(file = "#{root_object.unpacked_folder}/#{root_object.root_subfolder}/commentAuthors.xml")
|
19
19
|
return nil unless File.exist?(file)
|
20
20
|
|
21
|
-
document = parse_xml(
|
21
|
+
document = parse_xml(file)
|
22
22
|
node = document.xpath('*').first
|
23
23
|
|
24
24
|
node.xpath('*').each do |node_child|
|
@@ -15,10 +15,10 @@ module OoxmlParser
|
|
15
15
|
# Parse PresentationComments object
|
16
16
|
# @param file [Nokogiri::XML:Element] node to parse
|
17
17
|
# @return [PresentationComments] result of parsing
|
18
|
-
def parse(file = "#{
|
18
|
+
def parse(file = "#{root_object.unpacked_folder}/#{root_object.root_subfolder}/comments/comment1.xml")
|
19
19
|
return nil unless File.exist?(file)
|
20
20
|
|
21
|
-
document = parse_xml(
|
21
|
+
document = parse_xml(file)
|
22
22
|
node = document.xpath('*').first
|
23
23
|
|
24
24
|
node.xpath('*').each do |node_child|
|
@@ -9,46 +9,46 @@ module OoxmlParser
|
|
9
9
|
# @return [FontScheme] font scheme
|
10
10
|
attr_accessor :font_scheme
|
11
11
|
|
12
|
-
def initialize(
|
13
|
-
@name =
|
14
|
-
@color_scheme =
|
15
|
-
super
|
12
|
+
def initialize(parent: nil)
|
13
|
+
@name = ''
|
14
|
+
@color_scheme = {}
|
15
|
+
super
|
16
16
|
end
|
17
17
|
|
18
18
|
# Parse PresentationTheme
|
19
19
|
# @param file [String] path to file to parse
|
20
20
|
# @return [PresentationTheme] result of parsing
|
21
|
-
def
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
OOXMLDocumentObject.xmls_stack.pop
|
21
|
+
def parse(file)
|
22
|
+
root_object.add_to_xmls_stack(file)
|
23
|
+
unless File.exist?(root_object.current_xml)
|
24
|
+
root_object.xmls_stack.pop
|
26
25
|
return
|
27
26
|
end
|
28
|
-
doc =
|
27
|
+
doc = parse_xml(root_object.current_xml)
|
28
|
+
|
29
29
|
doc.xpath('a:theme').each do |theme_node|
|
30
|
-
|
30
|
+
@name = theme_node.attribute('name').value if theme_node.attribute('name')
|
31
31
|
theme_node.xpath('a:themeElements/*').each do |theme_element_node|
|
32
32
|
case theme_element_node.name
|
33
33
|
when 'clrScheme'
|
34
34
|
theme_element_node.xpath('*').each do |color_scheme_element|
|
35
|
-
|
35
|
+
@color_scheme[color_scheme_element.name.to_sym] = ThemeColor.new.parse(color_scheme_element)
|
36
36
|
end
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
37
|
+
@color_scheme[:background1] = @color_scheme[:lt1]
|
38
|
+
@color_scheme[:background2] = @color_scheme[:lt2]
|
39
|
+
@color_scheme[:bg1] = @color_scheme[:lt1]
|
40
|
+
@color_scheme[:bg2] = @color_scheme[:lt2]
|
41
|
+
@color_scheme[:text1] = @color_scheme[:dk1]
|
42
|
+
@color_scheme[:text2] = @color_scheme[:dk2]
|
43
|
+
@color_scheme[:tx1] = @color_scheme[:dk1]
|
44
|
+
@color_scheme[:tx2] = @color_scheme[:dk2]
|
45
45
|
when 'fontScheme'
|
46
|
-
|
46
|
+
@font_scheme = FontScheme.new(parent: self).parse(theme_element_node)
|
47
47
|
end
|
48
48
|
end
|
49
49
|
end
|
50
|
-
|
51
|
-
|
50
|
+
root_object.xmls_stack.pop
|
51
|
+
self
|
52
52
|
end
|
53
53
|
end
|
54
54
|
end
|
@@ -30,9 +30,9 @@ module OoxmlParser
|
|
30
30
|
when 'tbl'
|
31
31
|
graphic_data << Table.new(parent: self).parse(graphic_node_child)
|
32
32
|
when 'chart'
|
33
|
-
|
33
|
+
root_object.add_to_xmls_stack(root_object.get_link_from_rels(graphic_node_child.attribute('id').value))
|
34
34
|
graphic_data << Chart.new(parent: self).parse
|
35
|
-
|
35
|
+
root_object.xmls_stack.pop
|
36
36
|
when 'oleObj'
|
37
37
|
graphic_data << OleObject.new(parent: self).parse(graphic_node_child)
|
38
38
|
end
|
@@ -51,9 +51,9 @@ module OoxmlParser
|
|
51
51
|
# Parse Slide object
|
52
52
|
# @return [Slide] result of parsing
|
53
53
|
def parse
|
54
|
-
|
54
|
+
root_object.add_to_xmls_stack(@xml_path)
|
55
55
|
@name = File.basename(@xml_path, '.*')
|
56
|
-
node = parse_xml(
|
56
|
+
node = parse_xml(root_object.current_xml)
|
57
57
|
node.xpath('//p:sld/*').each do |node_child|
|
58
58
|
case node_child.name
|
59
59
|
when 'cSld'
|
@@ -66,8 +66,8 @@ module OoxmlParser
|
|
66
66
|
@alternate_content = PresentationAlternateContent.new(parent: self).parse(node_child)
|
67
67
|
end
|
68
68
|
end
|
69
|
-
|
70
|
-
@relationships = Relationships.new(parent: self).parse_file("#{
|
69
|
+
root_object.xmls_stack.pop
|
70
|
+
@relationships = Relationships.new(parent: self).parse_file("#{root_object.unpacked_folder}#{File.dirname(@xml_path)}/_rels/#{@name}.xml.rels")
|
71
71
|
parse_note
|
72
72
|
self
|
73
73
|
end
|
@@ -79,7 +79,7 @@ module OoxmlParser
|
|
79
79
|
notes_target = @relationships.target_by_type('notes')
|
80
80
|
return nil if notes_target.empty?
|
81
81
|
|
82
|
-
@note = PresentationNotes.new(parent: self).parse("#{
|
82
|
+
@note = PresentationNotes.new(parent: self).parse("#{root_object.unpacked_folder}#{File.dirname(@xml_path)}/#{notes_target.first}")
|
83
83
|
end
|
84
84
|
end
|
85
85
|
end
|
@@ -10,7 +10,7 @@ module OoxmlParser
|
|
10
10
|
# @param file [String] path to file to parse
|
11
11
|
# @return [SlideLayoutFile] result of parsing
|
12
12
|
def parse(file)
|
13
|
-
|
13
|
+
root_object.add_to_xmls_stack(file.gsub(root_object.unpacked_folder, ''))
|
14
14
|
doc = parse_xml(file)
|
15
15
|
doc.xpath('p:sldLayout/*').each do |node_child|
|
16
16
|
case node_child.name
|
@@ -18,7 +18,7 @@ module OoxmlParser
|
|
18
18
|
@common_slide_data = CommonSlideData.new(parent: self).parse(node_child)
|
19
19
|
end
|
20
20
|
end
|
21
|
-
|
21
|
+
root_object.xmls_stack.pop
|
22
22
|
self
|
23
23
|
end
|
24
24
|
end
|
@@ -5,7 +5,7 @@ module OoxmlParser
|
|
5
5
|
module SlideLayoutsHelper
|
6
6
|
# @return [Array<String>] list of slide layouts files
|
7
7
|
def slide_layouts_files
|
8
|
-
Dir["#{
|
8
|
+
Dir["#{root_object.unpacked_folder}ppt/slideLayouts/*.xml"]
|
9
9
|
end
|
10
10
|
|
11
11
|
private
|
@@ -10,7 +10,7 @@ module OoxmlParser
|
|
10
10
|
# @param file [String] path to file to parse
|
11
11
|
# @return [SlideMasterFile] result of parsing
|
12
12
|
def parse(file)
|
13
|
-
|
13
|
+
root_object.add_to_xmls_stack(file.gsub(root_object.unpacked_folder, ''))
|
14
14
|
doc = parse_xml(file)
|
15
15
|
doc.xpath('p:sldMaster/*').each do |node_child|
|
16
16
|
case node_child.name
|
@@ -18,7 +18,7 @@ module OoxmlParser
|
|
18
18
|
@common_slide_data = CommonSlideData.new(parent: self).parse(node_child)
|
19
19
|
end
|
20
20
|
end
|
21
|
-
|
21
|
+
root_object.xmls_stack.pop
|
22
22
|
self
|
23
23
|
end
|
24
24
|
end
|
@@ -5,7 +5,7 @@ module OoxmlParser
|
|
5
5
|
module SlideMastersHelper
|
6
6
|
# @return [Array<String>] list of slide masters files
|
7
7
|
def slide_masters_files
|
8
|
-
Dir["#{
|
8
|
+
Dir["#{root_object.unpacked_folder}ppt/slideMasters/*.xml"]
|
9
9
|
end
|
10
10
|
|
11
11
|
private
|
@@ -14,7 +14,7 @@ module OoxmlParser
|
|
14
14
|
# Parse TableStyles object
|
15
15
|
# @param file [Nokogiri::XML:Element] node to parse
|
16
16
|
# @return [TableStyles] result of parsing
|
17
|
-
def parse(file = "#{
|
17
|
+
def parse(file = "#{root_object.unpacked_folder}/#{root_object.root_subfolder}/tableStyles.xml")
|
18
18
|
return nil unless File.exist?(file)
|
19
19
|
|
20
20
|
document = parse_xml(file)
|
@@ -43,11 +43,10 @@ module OoxmlParser
|
|
43
43
|
# @return [Presentation] parsed presentation
|
44
44
|
def parse
|
45
45
|
@content_types = ContentTypes.new(parent: self).parse
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
@theme = PresentationTheme.parse('ppt/theme/theme1.xml')
|
46
|
+
@root_subfolder = 'ppt/'
|
47
|
+
root_object.add_to_xmls_stack('ppt/presentation.xml')
|
48
|
+
doc = parse_xml(root_object.current_xml)
|
49
|
+
@theme = PresentationTheme.new(parent: self).parse('ppt/theme/theme1.xml')
|
51
50
|
@table_styles = TableStyles.new(parent: self).parse
|
52
51
|
@comment_authors = CommentAuthors.new(parent: self).parse
|
53
52
|
@comments = PresentationComments.new(parent: self).parse
|
@@ -60,13 +59,13 @@ module OoxmlParser
|
|
60
59
|
presentation_node_child.xpath('p:sldId').each do |silde_id_node|
|
61
60
|
slide_id = silde_id_node.attr('r:id')
|
62
61
|
@slides << Slide.new(parent: self,
|
63
|
-
xml_path: "#{
|
62
|
+
xml_path: "#{root_object.root_subfolder}/#{root_object.get_link_from_rels(slide_id)}")
|
64
63
|
.parse
|
65
64
|
end
|
66
65
|
end
|
67
66
|
end
|
68
|
-
|
69
|
-
@relationships = Relationships.new(parent: self).parse_file("#{
|
67
|
+
root_object.xmls_stack.pop
|
68
|
+
@relationships = Relationships.new(parent: self).parse_file("#{root_object.unpacked_folder}/ppt/_rels/presentation.xml.rels")
|
70
69
|
parse_slide_layouts
|
71
70
|
parse_slide_masters
|
72
71
|
self
|
@@ -9,8 +9,9 @@ module OoxmlParser
|
|
9
9
|
# @param path_to_file [String] file path
|
10
10
|
# @return [Presentation] result of parse
|
11
11
|
def self.parse_pptx(path_to_file)
|
12
|
-
|
13
|
-
|
12
|
+
file = OoxmlFile.new(path_to_file)
|
13
|
+
Parser.parse_format(file) do |yielded_file|
|
14
|
+
Presentation.new(unpacked_folder: yielded_file.path_to_folder).parse
|
14
15
|
end
|
15
16
|
end
|
16
17
|
end
|
data/lib/ooxml_parser/version.rb
CHANGED
@@ -15,8 +15,8 @@ module OoxmlParser
|
|
15
15
|
# @param file [String] file to parse
|
16
16
|
# @return [Chartsheet] result of parsing
|
17
17
|
def parse(file)
|
18
|
-
|
19
|
-
doc = parse_xml(
|
18
|
+
root_object.add_to_xmls_stack(root_object.root_subfolder + file)
|
19
|
+
doc = parse_xml(root_object.current_xml)
|
20
20
|
node = doc.xpath('//xmlns:chartsheet').first
|
21
21
|
node.xpath('*').each do |node_child|
|
22
22
|
case node_child.name
|
@@ -26,7 +26,7 @@ module OoxmlParser
|
|
26
26
|
end
|
27
27
|
end
|
28
28
|
end
|
29
|
-
|
29
|
+
root_object.xmls_stack.pop
|
30
30
|
self
|
31
31
|
end
|
32
32
|
end
|
@@ -33,7 +33,7 @@ module OoxmlParser
|
|
33
33
|
# @return [PivotCacheDefinition] pivot cache definition for current pivot cache
|
34
34
|
def parse_pivot_cache_definition
|
35
35
|
definition_file = root_object.relationships.target_by_id(id)
|
36
|
-
full_file_path = "#{
|
36
|
+
full_file_path = "#{root_object.unpacked_folder}/xl/#{definition_file}"
|
37
37
|
@pivot_cache_definition = PivotCacheDefinition.new(parent: root_object)
|
38
38
|
.parse(full_file_path)
|
39
39
|
end
|
@@ -55,7 +55,7 @@ module OoxmlParser
|
|
55
55
|
# @param [String] file path
|
56
56
|
# @return [PivotTableDefinition] result of parsing
|
57
57
|
def parse(file)
|
58
|
-
doc =
|
58
|
+
doc = parse_xml("#{root_object.unpacked_folder}/#{file}")
|
59
59
|
node = doc.xpath('//xmlns:pivotTableDefinition').first
|
60
60
|
node.attributes.each do |key, value|
|
61
61
|
case key
|
@@ -32,7 +32,7 @@ module OoxmlParser
|
|
32
32
|
# Parse StyleSheet object
|
33
33
|
# @return [StyleSheet] result of parsing
|
34
34
|
def parse
|
35
|
-
doc = parse_xml("#{
|
35
|
+
doc = parse_xml("#{root_object.unpacked_folder}/#{root_object.root_subfolder}/styles.xml")
|
36
36
|
doc.root.xpath('*').each do |node_child|
|
37
37
|
case node_child.name
|
38
38
|
when 'numFmts'
|
@@ -19,8 +19,8 @@ module OoxmlParser
|
|
19
19
|
# @param node [Nokogiri::XML:Element] node to parse
|
20
20
|
# @return [TablePart] result of parsing
|
21
21
|
def parse(node)
|
22
|
-
link_to_table_part_xml =
|
23
|
-
doc = parse_xml(
|
22
|
+
link_to_table_part_xml = root_object.get_link_from_rels(node.attribute('id').value)
|
23
|
+
doc = parse_xml(root_object.unpacked_folder + link_to_table_part_xml.gsub('..', 'xl'))
|
24
24
|
table_node = doc.xpath('xmlns:table').first
|
25
25
|
table_node.attributes.each do |key, value|
|
26
26
|
case key
|
@@ -61,9 +61,9 @@ module OoxmlParser
|
|
61
61
|
# Perform parsing of relationships
|
62
62
|
# @return [nil]
|
63
63
|
def parse_relationships
|
64
|
-
|
65
|
-
@relationships = Relationships.new(parent: self).parse_file(
|
66
|
-
|
64
|
+
root_object.add_to_xmls_stack("#{root_object.root_subfolder}/worksheets/_rels/#{@xml_name}.rels")
|
65
|
+
@relationships = Relationships.new(parent: self).parse_file(root_object.current_xml) if File.exist?(root_object.current_xml)
|
66
|
+
root_object.xmls_stack.pop
|
67
67
|
end
|
68
68
|
|
69
69
|
# @return [True, false] if structure contain any user data
|
@@ -79,7 +79,7 @@ module OoxmlParser
|
|
79
79
|
|
80
80
|
# Parse list of drawings in file
|
81
81
|
def parse_drawing
|
82
|
-
drawing_node = parse_xml(
|
82
|
+
drawing_node = parse_xml(root_object.current_xml)
|
83
83
|
drawing_node.xpath('xdr:wsDr/*').each do |drawing_node_child|
|
84
84
|
@drawings << XlsxDrawing.new(parent: self).parse(drawing_node_child)
|
85
85
|
end
|
@@ -91,8 +91,8 @@ module OoxmlParser
|
|
91
91
|
def parse(path_to_xml_file)
|
92
92
|
@xml_name = File.basename path_to_xml_file
|
93
93
|
parse_relationships
|
94
|
-
|
95
|
-
doc = parse_xml(
|
94
|
+
root_object.add_to_xmls_stack("#{root_object.root_subfolder}/worksheets/#{File.basename(path_to_xml_file)}")
|
95
|
+
doc = parse_xml(root_object.current_xml)
|
96
96
|
sheet = doc.search('//xmlns:worksheet').first
|
97
97
|
sheet.xpath('*').each do |worksheet_node_child|
|
98
98
|
case worksheet_node_child.name
|
@@ -107,11 +107,11 @@ module OoxmlParser
|
|
107
107
|
@merge << merge_node.attribute('ref').value.to_s
|
108
108
|
end
|
109
109
|
when 'drawing'
|
110
|
-
path_to_drawing =
|
110
|
+
path_to_drawing = root_object.get_link_from_rels(worksheet_node_child.attribute('id').value)
|
111
111
|
unless path_to_drawing.nil?
|
112
|
-
|
112
|
+
root_object.add_to_xmls_stack(path_to_drawing)
|
113
113
|
parse_drawing
|
114
|
-
|
114
|
+
root_object.xmls_stack.pop
|
115
115
|
end
|
116
116
|
when 'hyperlinks'
|
117
117
|
worksheet_node_child.xpath('xmlns:hyperlink').each do |hyperlink_node|
|
@@ -150,7 +150,7 @@ module OoxmlParser
|
|
150
150
|
end
|
151
151
|
end
|
152
152
|
parse_comments
|
153
|
-
|
153
|
+
root_object.xmls_stack.pop
|
154
154
|
self
|
155
155
|
end
|
156
156
|
|
@@ -175,7 +175,7 @@ module OoxmlParser
|
|
175
175
|
comments_target = relationships.target_by_type('comment')
|
176
176
|
return if comments_target.empty?
|
177
177
|
|
178
|
-
comments_file = "#{
|
178
|
+
comments_file = "#{root_object.unpacked_folder}/#{root_object.root_subfolder}/#{comments_target.first.gsub('..', '')}"
|
179
179
|
@comments = ExcelComments.new(parent: self).parse(comments_file)
|
180
180
|
end
|
181
181
|
end
|
@@ -109,7 +109,7 @@ module OoxmlParser
|
|
109
109
|
shared_strings_target = relationships.target_by_type('sharedString')
|
110
110
|
return if shared_strings_target.empty?
|
111
111
|
|
112
|
-
shared_string_file = "#{
|
112
|
+
shared_string_file = "#{root_object.unpacked_folder}/xl/#{shared_strings_target.first}"
|
113
113
|
@shared_strings_table = SharedStringTable.new(parent: self).parse(shared_string_file)
|
114
114
|
end
|
115
115
|
|
@@ -117,13 +117,12 @@ module OoxmlParser
|
|
117
117
|
# @return [XLSXWorkbook]
|
118
118
|
def parse
|
119
119
|
@content_types = ContentTypes.new(parent: self).parse
|
120
|
-
@relationships = Relationships.new(parent: self).parse_file("#{
|
120
|
+
@relationships = Relationships.new(parent: self).parse_file("#{root_object.unpacked_folder}xl/_rels/workbook.xml.rels")
|
121
121
|
parse_shared_strings
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
@
|
126
|
-
@theme = PresentationTheme.parse("xl/#{link_to_theme_xml}") if link_to_theme_xml
|
122
|
+
@root_subfolder = 'xl/'
|
123
|
+
root_object.add_to_xmls_stack('xl/workbook.xml')
|
124
|
+
@doc = parse_xml(root_object.current_xml)
|
125
|
+
@theme = PresentationTheme.new(parent: self).parse("xl/#{link_to_theme_xml}") if link_to_theme_xml
|
127
126
|
@style_sheet = StyleSheet.new(parent: self).parse
|
128
127
|
@doc.xpath('xmlns:workbook/xmlns:sheets/xmlns:sheet').each do |sheet|
|
129
128
|
@sheets << Sheet.new(parent: self).parse(sheet)
|
@@ -139,7 +138,7 @@ module OoxmlParser
|
|
139
138
|
parse_pivot_table
|
140
139
|
parse_defined_names
|
141
140
|
parse_workbook_protection
|
142
|
-
|
141
|
+
root_object.xmls_stack.pop
|
143
142
|
self
|
144
143
|
end
|
145
144
|
|
@@ -9,8 +9,9 @@ module OoxmlParser
|
|
9
9
|
# @param path_to_file [String] file path
|
10
10
|
# @return [XLSXWorkbook] result of parse
|
11
11
|
def self.parse_xlsx(path_to_file)
|
12
|
-
|
13
|
-
|
12
|
+
file = OoxmlFile.new(path_to_file)
|
13
|
+
Parser.parse_format(file) do |yielded_file|
|
14
|
+
XLSXWorkbook.new(unpacked_folder: yielded_file.path_to_folder).parse
|
14
15
|
end
|
15
16
|
end
|
16
17
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ooxml_parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.31.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ONLYOFFICE
|
@@ -10,7 +10,7 @@ authors:
|
|
10
10
|
autorequire:
|
11
11
|
bindir: bin
|
12
12
|
cert_chain: []
|
13
|
-
date: 2022-09-
|
13
|
+
date: 2022-09-27 00:00:00.000000000 Z
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
16
16
|
name: nokogiri
|
@@ -363,6 +363,7 @@ files:
|
|
363
363
|
- lib/ooxml_parser/common_parser/common_document_structure.rb
|
364
364
|
- lib/ooxml_parser/common_parser/parser.rb
|
365
365
|
- lib/ooxml_parser/common_parser/parser/encryption_checker.rb
|
366
|
+
- lib/ooxml_parser/common_parser/parser/ooxml_file.rb
|
366
367
|
- lib/ooxml_parser/configuration.rb
|
367
368
|
- lib/ooxml_parser/docx_parser.rb
|
368
369
|
- lib/ooxml_parser/docx_parser/document_structure.rb
|
@@ -607,7 +608,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
607
608
|
- !ruby/object:Gem::Version
|
608
609
|
version: '0'
|
609
610
|
requirements: []
|
610
|
-
rubygems_version: 3.3.
|
611
|
+
rubygems_version: 3.3.22
|
611
612
|
signing_key:
|
612
613
|
specification_version: 4
|
613
614
|
summary: OoxmlParser Gem
|