ooxml_parser 0.27.0 → 0.30.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/ooxml_parser/common_parser/common_data/alternate_content/chart/chart.rb +4 -4
- data/lib/ooxml_parser/common_parser/common_data/alternate_content/drawing/graphic/docx_graphic.rb +2 -2
- data/lib/ooxml_parser/common_parser/common_data/alternate_content/drawing/graphic/picture/docx_blip/file_reference.rb +16 -3
- data/lib/ooxml_parser/common_parser/common_data/content_types.rb +1 -1
- data/lib/ooxml_parser/common_parser/common_data/hyperlink.rb +3 -3
- data/lib/ooxml_parser/common_parser/common_data/ooxml_document_object.rb +2 -91
- data/lib/ooxml_parser/common_parser/common_document_structure.rb +38 -0
- data/lib/ooxml_parser/common_parser/parser/ooxml_file.rb +68 -0
- data/lib/ooxml_parser/common_parser/parser.rb +32 -38
- data/lib/ooxml_parser/docx_parser/document_structure/comments.rb +1 -1
- data/lib/ooxml_parser/docx_parser/document_structure/comments_extended.rb +1 -1
- data/lib/ooxml_parser/docx_parser/document_structure/document_properties.rb +1 -1
- data/lib/ooxml_parser/docx_parser/document_structure/document_settings.rb +1 -1
- data/lib/ooxml_parser/docx_parser/document_structure/document_style.rb +8 -2
- data/lib/ooxml_parser/docx_parser/document_structure/docx_paragraph.rb +7 -18
- data/lib/ooxml_parser/docx_parser/document_structure/header_footer.rb +1 -1
- data/lib/ooxml_parser/docx_parser/document_structure/numbering.rb +1 -1
- data/lib/ooxml_parser/docx_parser/document_structure/page_properties/note.rb +2 -2
- data/lib/ooxml_parser/docx_parser/document_structure/page_properties/page_properties.rb +3 -3
- data/lib/ooxml_parser/docx_parser/document_structure/styles.rb +1 -1
- data/lib/ooxml_parser/docx_parser/document_structure.rb +34 -36
- data/lib/ooxml_parser/docx_parser.rb +3 -2
- data/lib/ooxml_parser/pptx_parser/presentation/comment_authors.rb +1 -1
- data/lib/ooxml_parser/pptx_parser/presentation/presentation_comments.rb +1 -1
- data/lib/ooxml_parser/pptx_parser/presentation/presentation_theme.rb +23 -23
- data/lib/ooxml_parser/pptx_parser/presentation/slide/graphic_frame/graphic_frame.rb +2 -2
- data/lib/ooxml_parser/pptx_parser/presentation/slide.rb +5 -5
- data/lib/ooxml_parser/pptx_parser/presentation/slide_layout_file.rb +2 -2
- data/lib/ooxml_parser/pptx_parser/presentation/slide_layouts_helper.rb +1 -1
- data/lib/ooxml_parser/pptx_parser/presentation/slide_master_file.rb +2 -2
- data/lib/ooxml_parser/pptx_parser/presentation/slide_masters_helper.rb +1 -1
- data/lib/ooxml_parser/pptx_parser/presentation/table_styles.rb +1 -1
- data/lib/ooxml_parser/pptx_parser/presentation.rb +7 -8
- data/lib/ooxml_parser/pptx_parser.rb +3 -2
- data/lib/ooxml_parser/version.rb +1 -1
- data/lib/ooxml_parser/xlsx_parser/workbook/chartsheet.rb +3 -3
- data/lib/ooxml_parser/xlsx_parser/workbook/pivot_cache.rb +1 -1
- data/lib/ooxml_parser/xlsx_parser/workbook/pivot_table_definition.rb +1 -1
- data/lib/ooxml_parser/xlsx_parser/workbook/style_sheet.rb +1 -1
- data/lib/ooxml_parser/xlsx_parser/workbook/worksheet/table_part.rb +2 -2
- data/lib/ooxml_parser/xlsx_parser/workbook/worksheet.rb +11 -11
- data/lib/ooxml_parser/xlsx_parser/workbook.rb +7 -8
- data/lib/ooxml_parser/xlsx_parser.rb +3 -2
- metadata +4 -4
- data/lib/ooxml_parser/docx_parser/document_structure/docx_paragraph/style_parametres.rb +0 -30
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f8264219a82e05f855d6e850703479fdfbb25b866ee5eb5705ac5171903fd7dd
|
4
|
+
data.tar.gz: e5ef8d6182d11e432c97bc8d63a1632f270c443acf17803b6abafc502ae0681a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d666854752d50ce5a26dd6c36fa78b801e34de2938eb10e6247ca11832979eea3b23a8fc8a838e272b64df9e13f0947a747dbe322ac008ec797cd0a836c22519
|
7
|
+
data.tar.gz: 81facb1e39faa54991524a2c3b3b613588c990d6cf02be6ac063b17c6971f0f08af0bc09a95cdae99a7a4eb50d1bb1a7278bc76377c0e496786b6f86335335f2
|
@@ -82,7 +82,7 @@ module OoxmlParser
|
|
82
82
|
# Parse Chart data
|
83
83
|
# @return [Chart] result of parsing
|
84
84
|
def parse
|
85
|
-
chart_xml = parse_xml(
|
85
|
+
chart_xml = parse_xml(root_object.current_xml)
|
86
86
|
chart_xml.xpath('*').each do |chart_node|
|
87
87
|
case chart_node.name
|
88
88
|
when 'chartSpace'
|
@@ -180,8 +180,8 @@ module OoxmlParser
|
|
180
180
|
|
181
181
|
# Parse relationship of chart
|
182
182
|
def parse_relationships
|
183
|
-
file_name = File.basename(
|
184
|
-
relationship_file = "#{
|
183
|
+
file_name = File.basename(root_object.current_xml)
|
184
|
+
relationship_file = "#{root_object.unpacked_folder}" \
|
185
185
|
'/word/charts/' \
|
186
186
|
"_rels/#{file_name}.rels"
|
187
187
|
|
@@ -198,7 +198,7 @@ module OoxmlParser
|
|
198
198
|
return if chart_relationship.empty?
|
199
199
|
|
200
200
|
chart_style_file = chart_relationship.first
|
201
|
-
style_file = "#{
|
201
|
+
style_file = "#{root_object.unpacked_folder}" \
|
202
202
|
"/word/charts/#{chart_style_file}"
|
203
203
|
|
204
204
|
@style = ChartStyleFile.new(parent: self).parse(style_file)
|
data/lib/ooxml_parser/common_parser/common_data/alternate_content/drawing/graphic/docx_graphic.rb
CHANGED
@@ -23,9 +23,9 @@ module OoxmlParser
|
|
23
23
|
@data = DocxPicture.new(parent: self).parse(node_child)
|
24
24
|
when 'chart'
|
25
25
|
@type = :chart
|
26
|
-
|
26
|
+
root_object.add_to_xmls_stack("#{root_object.root_subfolder}/#{root_object.get_link_from_rels(node_child.attribute('id').value)}")
|
27
27
|
@data = Chart.new(parent: self).parse
|
28
|
-
|
28
|
+
root_object.xmls_stack.pop
|
29
29
|
when 'wgp'
|
30
30
|
@type = :group
|
31
31
|
@data = ShapesGrouping.new(parent: self).parse(node_child)
|
@@ -25,7 +25,7 @@ module OoxmlParser
|
|
25
25
|
return self unless @resource_id
|
26
26
|
return self if @resource_id.empty?
|
27
27
|
|
28
|
-
@path =
|
28
|
+
@path = root_object.get_link_from_rels(@resource_id)
|
29
29
|
if !@path || @path.empty?
|
30
30
|
warn "Cant find path to media file by id: #{@resource_id}"
|
31
31
|
return self
|
@@ -33,13 +33,26 @@ module OoxmlParser
|
|
33
33
|
return self if @path == 'NULL'
|
34
34
|
return self if @path.match?(URI::DEFAULT_PARSER.make_regexp)
|
35
35
|
|
36
|
-
full_path_to_file =
|
36
|
+
full_path_to_file = root_object.unpacked_folder + root_object.root_subfolder + @path.gsub('..', '')
|
37
37
|
if File.exist?(full_path_to_file)
|
38
|
-
@content = File.
|
38
|
+
@content = if File.extname(@path) == '.xlsx'
|
39
|
+
parse_ole_xlsx(full_path_to_file)
|
40
|
+
else
|
41
|
+
File.binread(full_path_to_file)
|
42
|
+
end
|
39
43
|
else
|
40
44
|
warn "Couldn't find #{full_path_to_file} file on filesystem. Possible problem in original document"
|
41
45
|
end
|
42
46
|
self
|
43
47
|
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
# Parse ole xlsx file
|
52
|
+
# @param [String] full_path to file
|
53
|
+
# @return [XLSXWorkbook]
|
54
|
+
def parse_ole_xlsx(full_path)
|
55
|
+
OoxmlParser::XlsxParser.parse_xlsx(full_path)
|
56
|
+
end
|
44
57
|
end
|
45
58
|
end
|
@@ -21,7 +21,7 @@ module OoxmlParser
|
|
21
21
|
# Parse ContentTypes object
|
22
22
|
# @return [ContentTypes] result of parsing
|
23
23
|
def parse
|
24
|
-
doc = Nokogiri::XML.parse(File.open("#{
|
24
|
+
doc = Nokogiri::XML.parse(File.open("#{root_object.unpacked_folder}/[Content_Types].xml"))
|
25
25
|
node = doc.xpath('*').first
|
26
26
|
|
27
27
|
node.xpath('*').each do |node_child|
|
@@ -46,7 +46,7 @@ module OoxmlParser
|
|
46
46
|
@url = Coordinates.new.parse_string(value.value)
|
47
47
|
when 'id'
|
48
48
|
@id = value.value
|
49
|
-
@url =
|
49
|
+
@url = root_object.get_link_from_rels(@id) unless @id.empty?
|
50
50
|
when 'tooltip'
|
51
51
|
@tooltip = value.value
|
52
52
|
when 'ref'
|
@@ -80,7 +80,7 @@ module OoxmlParser
|
|
80
80
|
else
|
81
81
|
if meaningful_id?
|
82
82
|
@action = :external_link
|
83
|
-
@url =
|
83
|
+
@url = root_object.get_link_from_rels(@id)
|
84
84
|
end
|
85
85
|
end
|
86
86
|
self
|
@@ -98,7 +98,7 @@ module OoxmlParser
|
|
98
98
|
def parse_url_for_slide_link
|
99
99
|
return unless meaningful_id?
|
100
100
|
|
101
|
-
@url =
|
101
|
+
@url = root_object.get_link_from_rels(@id).scan(/\d+/).join.to_i
|
102
102
|
end
|
103
103
|
end
|
104
104
|
end
|
@@ -28,6 +28,8 @@ module OoxmlParser
|
|
28
28
|
|
29
29
|
instance_variables.each do |current_attribute|
|
30
30
|
next if current_attribute == :@parent
|
31
|
+
next if instance_variable_get(current_attribute).is_a?(Nokogiri::XML::Element)
|
32
|
+
|
31
33
|
return false unless instance_variable_get(current_attribute) == other.instance_variable_get(current_attribute)
|
32
34
|
end
|
33
35
|
true
|
@@ -47,96 +49,5 @@ module OoxmlParser
|
|
47
49
|
end
|
48
50
|
xml
|
49
51
|
end
|
50
|
-
|
51
|
-
class << self
|
52
|
-
# @return [String] path to root subfolder
|
53
|
-
attr_accessor :root_subfolder
|
54
|
-
# @return [PresentationTheme] list of themes
|
55
|
-
attr_accessor :theme
|
56
|
-
# @return [Array<String>] stack of xmls
|
57
|
-
attr_accessor :xmls_stack
|
58
|
-
# @return [String] path to root folder
|
59
|
-
attr_accessor :path_to_folder
|
60
|
-
|
61
|
-
# Copy this file and rename to zip
|
62
|
-
# @param path [String] path to file
|
63
|
-
# @return [String] path to result zip
|
64
|
-
def copy_file_and_rename_to_zip(path)
|
65
|
-
file_name = File.basename(path)
|
66
|
-
tmp_folder = Dir.mktmpdir('ruby-ooxml-parser')
|
67
|
-
file_path = "#{tmp_folder}/#{file_name}"
|
68
|
-
FileUtils.rm_rf(tmp_folder) if File.directory?(tmp_folder)
|
69
|
-
FileUtils.mkdir_p(tmp_folder)
|
70
|
-
raise "Cannot find file by path #{path}" unless File.exist?(path)
|
71
|
-
|
72
|
-
FileUtils.cp path, tmp_folder
|
73
|
-
file_path
|
74
|
-
end
|
75
|
-
|
76
|
-
# Decrypt file protected with password
|
77
|
-
# @param path [String] path to file
|
78
|
-
# @param password [String] password to file
|
79
|
-
# @return [String] path to decrypted file
|
80
|
-
def decrypt_file(path, password)
|
81
|
-
file_name = File.basename(path)
|
82
|
-
tmp_folder = Dir.mktmpdir('ruby-ooxml-parser')
|
83
|
-
decrypted_path = "#{tmp_folder}/#{file_name}"
|
84
|
-
binary_password = password.encode('utf-16le').bytes.pack('c*').encode('binary')
|
85
|
-
OoxmlDecrypt::EncryptedFile.decrypt_to_file(path, binary_password, decrypted_path)
|
86
|
-
|
87
|
-
decrypted_path
|
88
|
-
end
|
89
|
-
|
90
|
-
# Unzip specified file
|
91
|
-
# @param path_to_file [String] path to zip file
|
92
|
-
# @param destination [String] folder to extract
|
93
|
-
# @return [void]
|
94
|
-
def unzip_file(path_to_file, destination)
|
95
|
-
Zip.warn_invalid_date = false
|
96
|
-
Zip::File.open(path_to_file) do |zip_file|
|
97
|
-
raise LoadError, "There is no files in zip #{path_to_file}" if zip_file.entries.empty?
|
98
|
-
|
99
|
-
zip_file.each do |file|
|
100
|
-
file_path = File.join(destination, file.name)
|
101
|
-
FileUtils.mkdir_p(File.dirname(file_path))
|
102
|
-
zip_file.extract(file, file_path) unless File.exist?(file_path)
|
103
|
-
end
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
|
-
# @return [String] dir to base of file
|
108
|
-
def dir
|
109
|
-
"#{OOXMLDocumentObject.path_to_folder}#{File.dirname(OOXMLDocumentObject.xmls_stack.last)}/"
|
110
|
-
end
|
111
|
-
|
112
|
-
# @return [String] path to current xml file
|
113
|
-
def current_xml
|
114
|
-
OOXMLDocumentObject.path_to_folder + OOXMLDocumentObject.xmls_stack.last
|
115
|
-
end
|
116
|
-
|
117
|
-
# Add file to parsing stack
|
118
|
-
# @param path [String] path of file to add to stack
|
119
|
-
# @return [void]
|
120
|
-
def add_to_xmls_stack(path)
|
121
|
-
OOXMLDocumentObject.xmls_stack << if path.include?('..')
|
122
|
-
"#{File.dirname(OOXMLDocumentObject.xmls_stack.last)}/#{path}"
|
123
|
-
elsif path.start_with?(OOXMLDocumentObject.root_subfolder)
|
124
|
-
path
|
125
|
-
else
|
126
|
-
OOXMLDocumentObject.root_subfolder + path
|
127
|
-
end
|
128
|
-
end
|
129
|
-
|
130
|
-
# Get link to file from rels file
|
131
|
-
# @param id [String] file to get
|
132
|
-
# @return [String] result
|
133
|
-
def get_link_from_rels(id)
|
134
|
-
rels_path = dir + "_rels/#{File.basename(OOXMLDocumentObject.xmls_stack.last)}.rels"
|
135
|
-
raise LoadError, "Cannot find .rels file by path: #{rels_path}" unless File.exist?(rels_path)
|
136
|
-
|
137
|
-
relationships = Relationships.new.parse_file(rels_path)
|
138
|
-
relationships.target_by_id(id)
|
139
|
-
end
|
140
|
-
end
|
141
52
|
end
|
142
53
|
end
|
@@ -14,12 +14,50 @@ module OoxmlParser
|
|
14
14
|
attr_accessor :default_font_style
|
15
15
|
# @return [ContentTypes] data about content types
|
16
16
|
attr_accessor :content_types
|
17
|
+
# @return [String] root sub-folder for object
|
18
|
+
attr_reader :root_subfolder
|
19
|
+
# @return [String] path to folder with unpacked document
|
20
|
+
attr_reader :unpacked_folder
|
21
|
+
# @return [Array<String>] list of xmls to parse
|
22
|
+
attr_accessor :xmls_stack
|
17
23
|
|
18
24
|
def initialize(params = {})
|
19
25
|
@default_font_size = params.fetch(:default_font_size, 18)
|
20
26
|
@default_font_typeface = params.fetch(:default_font_typeface, 'Arial')
|
21
27
|
@default_font_style = FontStyle.new
|
28
|
+
@unpacked_folder = params.fetch(:unpacked_folder, nil)
|
29
|
+
@xmls_stack = []
|
22
30
|
super(parent: nil)
|
23
31
|
end
|
32
|
+
|
33
|
+
# @return [String] path to current xml file
|
34
|
+
def current_xml
|
35
|
+
root_object.unpacked_folder + @xmls_stack.last
|
36
|
+
end
|
37
|
+
|
38
|
+
# Add file to parsing stack
|
39
|
+
# @param path [String] path of file to add to stack
|
40
|
+
# @return [void]
|
41
|
+
def add_to_xmls_stack(path)
|
42
|
+
@xmls_stack << if path.include?('..')
|
43
|
+
"#{File.dirname(@xmls_stack.last)}/#{path}"
|
44
|
+
elsif path.start_with?(@root_subfolder)
|
45
|
+
path
|
46
|
+
else
|
47
|
+
@root_subfolder + path
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Get link to file from rels file
|
52
|
+
# @param id [String] file to get
|
53
|
+
# @return [String] result
|
54
|
+
def get_link_from_rels(id)
|
55
|
+
dir = "#{unpacked_folder}#{File.dirname(@xmls_stack.last)}/"
|
56
|
+
rels_path = dir + "_rels/#{File.basename(@xmls_stack.last)}.rels"
|
57
|
+
raise LoadError, "Cannot find .rels file by path: #{rels_path}" unless File.exist?(rels_path)
|
58
|
+
|
59
|
+
relationships = Relationships.new.parse_file(rels_path)
|
60
|
+
relationships.target_by_id(id)
|
61
|
+
end
|
24
62
|
end
|
25
63
|
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module OoxmlParser
|
4
|
+
# Class for actions with OOXML file
|
5
|
+
class OoxmlFile
|
6
|
+
# @return [String] path to file
|
7
|
+
attr_reader :path
|
8
|
+
|
9
|
+
def initialize(path)
|
10
|
+
@path = path
|
11
|
+
end
|
12
|
+
|
13
|
+
# Copy this file and rename to zip
|
14
|
+
# @return [String] path to result zip
|
15
|
+
def copy_file_and_rename_to_zip
|
16
|
+
file_name = File.basename(@path)
|
17
|
+
tmp_folder = Dir.mktmpdir('ruby-ooxml-parser')
|
18
|
+
@zip_path = "#{tmp_folder}/#{file_name}"
|
19
|
+
FileUtils.rm_rf(tmp_folder) if File.directory?(tmp_folder)
|
20
|
+
FileUtils.mkdir_p(tmp_folder)
|
21
|
+
raise "Cannot find file by path #{@path}" unless File.exist?(@path)
|
22
|
+
|
23
|
+
FileUtils.cp path, tmp_folder
|
24
|
+
end
|
25
|
+
|
26
|
+
# @return [String] path to folder with zip
|
27
|
+
def path_to_folder
|
28
|
+
@zip_path.sub(File.basename(@zip_path), '')
|
29
|
+
end
|
30
|
+
|
31
|
+
# Unzip specified file
|
32
|
+
# @return [void]
|
33
|
+
def unzip
|
34
|
+
Zip.warn_invalid_date = false
|
35
|
+
Zip::File.open(@zip_path) do |zip_file|
|
36
|
+
raise LoadError, "There is no files in zip #{@zip_path}" if zip_file.entries.empty?
|
37
|
+
|
38
|
+
zip_file.each do |file|
|
39
|
+
file_path = File.join(path_to_folder, file.name)
|
40
|
+
FileUtils.mkdir_p(File.dirname(file_path))
|
41
|
+
zip_file.extract(file, file_path) unless File.exist?(file_path)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# @return [Symbol] file type recognized by folder structure
|
47
|
+
def format_by_folders
|
48
|
+
return :docx if Dir.exist?("#{path_to_folder}/word")
|
49
|
+
return :xlsx if Dir.exist?("#{path_to_folder}/xl")
|
50
|
+
return :pptx if Dir.exist?("#{path_to_folder}/ppt")
|
51
|
+
|
52
|
+
:zip
|
53
|
+
end
|
54
|
+
|
55
|
+
# Decrypt file protected with password
|
56
|
+
# @param password [String] password to file
|
57
|
+
# @return [OoxmlFile] path to decrypted file
|
58
|
+
def decrypt(password)
|
59
|
+
file_name = File.basename(@path)
|
60
|
+
tmp_folder = Dir.mktmpdir('ruby-ooxml-parser')
|
61
|
+
decrypted_path = "#{tmp_folder}/#{file_name}"
|
62
|
+
binary_password = password.encode('utf-16le').bytes.pack('c*').encode('binary')
|
63
|
+
OoxmlDecrypt::EncryptedFile.decrypt_to_file(@path, binary_password, decrypted_path)
|
64
|
+
|
65
|
+
OoxmlFile.new(decrypted_path)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -1,52 +1,46 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative 'parser/encryption_checker'
|
4
|
+
require_relative 'parser/ooxml_file'
|
4
5
|
|
5
6
|
module OoxmlParser
|
6
7
|
# Basic class for OoxmlParser
|
7
8
|
class Parser
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
9
|
+
class << self
|
10
|
+
# Base method to yield parse document of any type
|
11
|
+
# @param [OoxmlFile] file with data
|
12
|
+
# @return [CommonDocumentStructure] structure of doc
|
13
|
+
def parse_format(file)
|
14
|
+
return nil if EncryptionChecker.new(file.path).encrypted?
|
13
15
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
end
|
16
|
+
file.copy_file_and_rename_to_zip
|
17
|
+
file.unzip
|
18
|
+
model = yield(file)
|
19
|
+
model.file_path = file.path if model
|
20
|
+
FileUtils.rm_rf(file.path_to_folder)
|
21
|
+
model
|
22
|
+
end
|
22
23
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
24
|
+
# Base method to parse document of any type
|
25
|
+
# @param path_to_file [String] file
|
26
|
+
# @return [CommonDocumentStructure] structure of doc
|
27
|
+
def parse(path_to_file, password: nil)
|
28
|
+
file = OoxmlFile.new(path_to_file)
|
29
|
+
file = file.decrypt(password) if password
|
30
|
+
Parser.parse_format(file) do |yielded_file|
|
31
|
+
format = yielded_file.format_by_folders
|
32
|
+
case format
|
33
|
+
when :docx
|
34
|
+
DocumentStructure.new(unpacked_folder: yielded_file.path_to_folder).parse
|
35
|
+
when :xlsx
|
36
|
+
XLSXWorkbook.new(unpacked_folder: yielded_file.path_to_folder).parse
|
37
|
+
when :pptx
|
38
|
+
Presentation.new(unpacked_folder: yielded_file.path_to_folder).parse
|
39
|
+
else
|
40
|
+
warn "#{path_to_file} is a simple zip file without OOXML content"
|
41
|
+
end
|
39
42
|
end
|
40
43
|
end
|
41
44
|
end
|
42
|
-
|
43
|
-
# Recognize folder format
|
44
|
-
# @param directory [String] path to dirctory
|
45
|
-
# @return [Symbol] type of document
|
46
|
-
def self.recognize_folder_format(directory = OOXMLDocumentObject.path_to_folder)
|
47
|
-
return :docx if Dir.exist?("#{directory}/word")
|
48
|
-
return :xlsx if Dir.exist?("#{directory}/xl")
|
49
|
-
return :pptx if Dir.exist?("#{directory}/ppt")
|
50
|
-
end
|
51
45
|
end
|
52
46
|
end
|
@@ -9,8 +9,8 @@ module OoxmlParser
|
|
9
9
|
|
10
10
|
def initialize(params = {})
|
11
11
|
@comments_array = []
|
12
|
-
@file = params.fetch(:file, "#{OOXMLDocumentObject.path_to_folder}word/comments.xml")
|
13
12
|
super(parent: params[:parent])
|
13
|
+
@file = params.fetch(:file, "#{root_object.unpacked_folder}word/comments.xml")
|
14
14
|
end
|
15
15
|
|
16
16
|
# @return [Comment] accessor
|
@@ -17,7 +17,7 @@ module OoxmlParser
|
|
17
17
|
# Parse CommentsExtended object
|
18
18
|
# @return [CommentsExtended] result of parsing
|
19
19
|
def parse
|
20
|
-
file_to_parse = "#{
|
20
|
+
file_to_parse = "#{root_object.unpacked_folder}word/commentsExtended.xml"
|
21
21
|
return nil unless File.exist?(file_to_parse)
|
22
22
|
|
23
23
|
doc = parse_xml(file_to_parse)
|
@@ -8,7 +8,7 @@ module OoxmlParser
|
|
8
8
|
# Parse Document properties
|
9
9
|
# @return [DocumentProperties]
|
10
10
|
def parse
|
11
|
-
properties_file = "#{
|
11
|
+
properties_file = "#{root_object.unpacked_folder}docProps/app.xml"
|
12
12
|
unless File.exist?(properties_file)
|
13
13
|
warn "There is no 'docProps/app.xml' in docx. It may be some problem with it"
|
14
14
|
return self
|
@@ -9,7 +9,7 @@ module OoxmlParser
|
|
9
9
|
# Parse Settings object
|
10
10
|
# @return [DocumentSettings] result of parsing
|
11
11
|
def parse
|
12
|
-
settings_path = "#{
|
12
|
+
settings_path = "#{root_object.unpacked_folder}word/settings.xml"
|
13
13
|
return nil unless File.exist?(settings_path)
|
14
14
|
|
15
15
|
doc = parse_xml(settings_path)
|
@@ -21,8 +21,12 @@ module OoxmlParser
|
|
21
21
|
attr_accessor :next_style
|
22
22
|
# @return [DocxParagraphRun] run properties
|
23
23
|
attr_accessor :run_properties
|
24
|
+
# @return [Nokogiri::XML:Node] run properties node
|
25
|
+
attr_accessor :run_properties_node
|
24
26
|
# @return [DocxParagraph] run properties
|
25
27
|
attr_accessor :paragraph_properties
|
28
|
+
# @return [Nokogiri::XML:Node] paragraph properties node
|
29
|
+
attr_accessor :paragraph_properties_node
|
26
30
|
# @return [TableProperties] properties of table
|
27
31
|
attr_accessor :table_properties
|
28
32
|
# @return [Array, TableStyleProperties] list of table style properties
|
@@ -76,9 +80,11 @@ module OoxmlParser
|
|
76
80
|
when 'next'
|
77
81
|
@next_style = subnode.attribute('val').value
|
78
82
|
when 'rPr'
|
79
|
-
@
|
83
|
+
@run_properties_node = subnode
|
84
|
+
@run_properties = DocxParagraphRun.new(parent: self).parse_properties(@run_properties_node)
|
80
85
|
when 'pPr'
|
81
|
-
@
|
86
|
+
@paragraph_properties_node = subnode
|
87
|
+
@paragraph_properties = ParagraphProperties.new(parent: self).parse(@paragraph_properties_node)
|
82
88
|
when 'tblPr'
|
83
89
|
@table_properties = TableProperties.new(parent: self).parse(subnode)
|
84
90
|
when 'trPr'
|
@@ -12,7 +12,6 @@ require_relative 'docx_paragraph/inserted'
|
|
12
12
|
require_relative 'docx_paragraph/structured_document_tag'
|
13
13
|
require_relative 'docx_paragraph/frame_properties'
|
14
14
|
require_relative 'docx_paragraph/docx_formula'
|
15
|
-
require_relative 'docx_paragraph/style_parametres'
|
16
15
|
module OoxmlParser
|
17
16
|
# Class for data of DocxParagraph
|
18
17
|
class DocxParagraph < OOXMLDocumentObject
|
@@ -202,7 +201,8 @@ module OoxmlParser
|
|
202
201
|
when 'contextualSpacing'
|
203
202
|
@contextual_spacing = true
|
204
203
|
when 'pStyle'
|
205
|
-
|
204
|
+
@paragraph_style_ref = ParagraphStyleRef.new(parent: self).parse(node_child)
|
205
|
+
fill_style_data(default_char_style)
|
206
206
|
when 'ind'
|
207
207
|
@ind = DocumentStructure.default_paragraph_style.ind.dup.parse(node_child)
|
208
208
|
when 'numPr'
|
@@ -229,24 +229,13 @@ module OoxmlParser
|
|
229
229
|
self
|
230
230
|
end
|
231
231
|
|
232
|
-
#
|
233
|
-
# @param id [String] id of style to parse
|
232
|
+
# Fill data from styles
|
234
233
|
# @param character_style [DocxParagraphRun] style to parse
|
235
234
|
# @return [void]
|
236
|
-
def
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
style.xpath('w:pPr').each do |p_pr|
|
242
|
-
parse_paragraph_style(p_pr, character_style)
|
243
|
-
@style = StyleParametres.new(parent: self).parse(style)
|
244
|
-
end
|
245
|
-
style.xpath('w:rPr').each do |r_pr|
|
246
|
-
character_style.parse_properties(r_pr)
|
247
|
-
end
|
248
|
-
break
|
249
|
-
end
|
235
|
+
def fill_style_data(character_style)
|
236
|
+
@style = root_object.document_style_by_id(@paragraph_style_ref.value)
|
237
|
+
parse_paragraph_style(@style.paragraph_properties_node, character_style) if @style.paragraph_properties_node
|
238
|
+
character_style.parse_properties(@style.run_properties_node) if @style.run_properties_node
|
250
239
|
end
|
251
240
|
|
252
241
|
extend Gem::Deprecate
|
@@ -47,7 +47,7 @@ module OoxmlParser
|
|
47
47
|
def parse(node)
|
48
48
|
@id = node.attribute('id').value.to_i
|
49
49
|
parse_type(node)
|
50
|
-
doc = parse_xml(
|
50
|
+
doc = parse_xml(root_object.unpacked_folder + xml_path)
|
51
51
|
doc.search(xpath_for_search).each do |footnote|
|
52
52
|
next unless footnote.attribute('id').value.to_i == @id
|
53
53
|
|
@@ -38,7 +38,7 @@ module OoxmlParser
|
|
38
38
|
# Parse Numbering data
|
39
39
|
# @return [Numbering] result of parse
|
40
40
|
def parse
|
41
|
-
numbering_xml = "#{
|
41
|
+
numbering_xml = "#{root_object.unpacked_folder}word/numbering.xml"
|
42
42
|
return nil unless File.exist?(numbering_xml)
|
43
43
|
|
44
44
|
node = parse_xml(File.open(numbering_xml))
|
@@ -43,10 +43,10 @@ module OoxmlParser
|
|
43
43
|
# @param target [String] name of target
|
44
44
|
# @return [String] path to note xml file
|
45
45
|
def file_path(target)
|
46
|
-
file = "#{
|
46
|
+
file = "#{root_object.unpacked_folder}word/#{target}"
|
47
47
|
return file if File.exist?(file)
|
48
48
|
|
49
|
-
"#{
|
49
|
+
"#{root_object.unpacked_folder}#{target}" unless File.exist?(file)
|
50
50
|
end
|
51
51
|
end
|
52
52
|
end
|
@@ -63,8 +63,8 @@ module OoxmlParser
|
|
63
63
|
when 'cols'
|
64
64
|
@columns = Columns.new.parse(pg_size_subnode)
|
65
65
|
when 'headerReference', 'footerReference'
|
66
|
-
target =
|
67
|
-
|
66
|
+
target = root_object.get_link_from_rels(pg_size_subnode.attribute('id').value)
|
67
|
+
root_object.add_to_xmls_stack("word/#{target}")
|
68
68
|
note = Note.parse(default_paragraph: default_paragraph,
|
69
69
|
default_character: default_character,
|
70
70
|
target: target,
|
@@ -72,7 +72,7 @@ module OoxmlParser
|
|
72
72
|
type: File.basename(target).sub('.xml', ''),
|
73
73
|
parent: self)
|
74
74
|
@notes << note
|
75
|
-
|
75
|
+
root_object.xmls_stack.pop
|
76
76
|
when 'footnotePr'
|
77
77
|
@footnote_properties = FootnoteProperties.new(parent: self).parse(pg_size_subnode)
|
78
78
|
end
|
@@ -17,7 +17,7 @@ module OoxmlParser
|
|
17
17
|
# Parse styles data
|
18
18
|
# @return [Styles] result of parsing
|
19
19
|
def parse
|
20
|
-
doc = parse_xml("#{
|
20
|
+
doc = parse_xml("#{root_object.unpacked_folder}word/styles.xml")
|
21
21
|
doc.xpath('w:styles/*').each do |node_child|
|
22
22
|
case node_child.name
|
23
23
|
when 'docDefaults'
|