ooxml_parser 0.27.0 → 0.30.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/ooxml_parser/common_parser/common_data/alternate_content/chart/chart.rb +4 -4
- data/lib/ooxml_parser/common_parser/common_data/alternate_content/drawing/graphic/docx_graphic.rb +2 -2
- data/lib/ooxml_parser/common_parser/common_data/alternate_content/drawing/graphic/picture/docx_blip/file_reference.rb +16 -3
- data/lib/ooxml_parser/common_parser/common_data/content_types.rb +1 -1
- data/lib/ooxml_parser/common_parser/common_data/hyperlink.rb +3 -3
- data/lib/ooxml_parser/common_parser/common_data/ooxml_document_object.rb +2 -91
- data/lib/ooxml_parser/common_parser/common_document_structure.rb +38 -0
- data/lib/ooxml_parser/common_parser/parser/ooxml_file.rb +68 -0
- data/lib/ooxml_parser/common_parser/parser.rb +32 -38
- data/lib/ooxml_parser/docx_parser/document_structure/comments.rb +1 -1
- data/lib/ooxml_parser/docx_parser/document_structure/comments_extended.rb +1 -1
- data/lib/ooxml_parser/docx_parser/document_structure/document_properties.rb +1 -1
- data/lib/ooxml_parser/docx_parser/document_structure/document_settings.rb +1 -1
- data/lib/ooxml_parser/docx_parser/document_structure/document_style.rb +8 -2
- data/lib/ooxml_parser/docx_parser/document_structure/docx_paragraph.rb +7 -18
- data/lib/ooxml_parser/docx_parser/document_structure/header_footer.rb +1 -1
- data/lib/ooxml_parser/docx_parser/document_structure/numbering.rb +1 -1
- data/lib/ooxml_parser/docx_parser/document_structure/page_properties/note.rb +2 -2
- data/lib/ooxml_parser/docx_parser/document_structure/page_properties/page_properties.rb +3 -3
- data/lib/ooxml_parser/docx_parser/document_structure/styles.rb +1 -1
- data/lib/ooxml_parser/docx_parser/document_structure.rb +34 -36
- data/lib/ooxml_parser/docx_parser.rb +3 -2
- data/lib/ooxml_parser/pptx_parser/presentation/comment_authors.rb +1 -1
- data/lib/ooxml_parser/pptx_parser/presentation/presentation_comments.rb +1 -1
- data/lib/ooxml_parser/pptx_parser/presentation/presentation_theme.rb +23 -23
- data/lib/ooxml_parser/pptx_parser/presentation/slide/graphic_frame/graphic_frame.rb +2 -2
- data/lib/ooxml_parser/pptx_parser/presentation/slide.rb +5 -5
- data/lib/ooxml_parser/pptx_parser/presentation/slide_layout_file.rb +2 -2
- data/lib/ooxml_parser/pptx_parser/presentation/slide_layouts_helper.rb +1 -1
- data/lib/ooxml_parser/pptx_parser/presentation/slide_master_file.rb +2 -2
- data/lib/ooxml_parser/pptx_parser/presentation/slide_masters_helper.rb +1 -1
- data/lib/ooxml_parser/pptx_parser/presentation/table_styles.rb +1 -1
- data/lib/ooxml_parser/pptx_parser/presentation.rb +7 -8
- data/lib/ooxml_parser/pptx_parser.rb +3 -2
- data/lib/ooxml_parser/version.rb +1 -1
- data/lib/ooxml_parser/xlsx_parser/workbook/chartsheet.rb +3 -3
- data/lib/ooxml_parser/xlsx_parser/workbook/pivot_cache.rb +1 -1
- data/lib/ooxml_parser/xlsx_parser/workbook/pivot_table_definition.rb +1 -1
- data/lib/ooxml_parser/xlsx_parser/workbook/style_sheet.rb +1 -1
- data/lib/ooxml_parser/xlsx_parser/workbook/worksheet/table_part.rb +2 -2
- data/lib/ooxml_parser/xlsx_parser/workbook/worksheet.rb +11 -11
- data/lib/ooxml_parser/xlsx_parser/workbook.rb +7 -8
- data/lib/ooxml_parser/xlsx_parser.rb +3 -2
- metadata +4 -4
- data/lib/ooxml_parser/docx_parser/document_structure/docx_paragraph/style_parametres.rb +0 -30
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f8264219a82e05f855d6e850703479fdfbb25b866ee5eb5705ac5171903fd7dd
|
4
|
+
data.tar.gz: e5ef8d6182d11e432c97bc8d63a1632f270c443acf17803b6abafc502ae0681a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d666854752d50ce5a26dd6c36fa78b801e34de2938eb10e6247ca11832979eea3b23a8fc8a838e272b64df9e13f0947a747dbe322ac008ec797cd0a836c22519
|
7
|
+
data.tar.gz: 81facb1e39faa54991524a2c3b3b613588c990d6cf02be6ac063b17c6971f0f08af0bc09a95cdae99a7a4eb50d1bb1a7278bc76377c0e496786b6f86335335f2
|
@@ -82,7 +82,7 @@ module OoxmlParser
|
|
82
82
|
# Parse Chart data
|
83
83
|
# @return [Chart] result of parsing
|
84
84
|
def parse
|
85
|
-
chart_xml = parse_xml(
|
85
|
+
chart_xml = parse_xml(root_object.current_xml)
|
86
86
|
chart_xml.xpath('*').each do |chart_node|
|
87
87
|
case chart_node.name
|
88
88
|
when 'chartSpace'
|
@@ -180,8 +180,8 @@ module OoxmlParser
|
|
180
180
|
|
181
181
|
# Parse relationship of chart
|
182
182
|
def parse_relationships
|
183
|
-
file_name = File.basename(
|
184
|
-
relationship_file = "#{
|
183
|
+
file_name = File.basename(root_object.current_xml)
|
184
|
+
relationship_file = "#{root_object.unpacked_folder}" \
|
185
185
|
'/word/charts/' \
|
186
186
|
"_rels/#{file_name}.rels"
|
187
187
|
|
@@ -198,7 +198,7 @@ module OoxmlParser
|
|
198
198
|
return if chart_relationship.empty?
|
199
199
|
|
200
200
|
chart_style_file = chart_relationship.first
|
201
|
-
style_file = "#{
|
201
|
+
style_file = "#{root_object.unpacked_folder}" \
|
202
202
|
"/word/charts/#{chart_style_file}"
|
203
203
|
|
204
204
|
@style = ChartStyleFile.new(parent: self).parse(style_file)
|
data/lib/ooxml_parser/common_parser/common_data/alternate_content/drawing/graphic/docx_graphic.rb
CHANGED
@@ -23,9 +23,9 @@ module OoxmlParser
|
|
23
23
|
@data = DocxPicture.new(parent: self).parse(node_child)
|
24
24
|
when 'chart'
|
25
25
|
@type = :chart
|
26
|
-
|
26
|
+
root_object.add_to_xmls_stack("#{root_object.root_subfolder}/#{root_object.get_link_from_rels(node_child.attribute('id').value)}")
|
27
27
|
@data = Chart.new(parent: self).parse
|
28
|
-
|
28
|
+
root_object.xmls_stack.pop
|
29
29
|
when 'wgp'
|
30
30
|
@type = :group
|
31
31
|
@data = ShapesGrouping.new(parent: self).parse(node_child)
|
@@ -25,7 +25,7 @@ module OoxmlParser
|
|
25
25
|
return self unless @resource_id
|
26
26
|
return self if @resource_id.empty?
|
27
27
|
|
28
|
-
@path =
|
28
|
+
@path = root_object.get_link_from_rels(@resource_id)
|
29
29
|
if !@path || @path.empty?
|
30
30
|
warn "Cant find path to media file by id: #{@resource_id}"
|
31
31
|
return self
|
@@ -33,13 +33,26 @@ module OoxmlParser
|
|
33
33
|
return self if @path == 'NULL'
|
34
34
|
return self if @path.match?(URI::DEFAULT_PARSER.make_regexp)
|
35
35
|
|
36
|
-
full_path_to_file =
|
36
|
+
full_path_to_file = root_object.unpacked_folder + root_object.root_subfolder + @path.gsub('..', '')
|
37
37
|
if File.exist?(full_path_to_file)
|
38
|
-
@content = File.
|
38
|
+
@content = if File.extname(@path) == '.xlsx'
|
39
|
+
parse_ole_xlsx(full_path_to_file)
|
40
|
+
else
|
41
|
+
File.binread(full_path_to_file)
|
42
|
+
end
|
39
43
|
else
|
40
44
|
warn "Couldn't find #{full_path_to_file} file on filesystem. Possible problem in original document"
|
41
45
|
end
|
42
46
|
self
|
43
47
|
end
|
48
|
+
|
49
|
+
private
|
50
|
+
|
51
|
+
# Parse ole xlsx file
|
52
|
+
# @param [String] full_path to file
|
53
|
+
# @return [XLSXWorkbook]
|
54
|
+
def parse_ole_xlsx(full_path)
|
55
|
+
OoxmlParser::XlsxParser.parse_xlsx(full_path)
|
56
|
+
end
|
44
57
|
end
|
45
58
|
end
|
@@ -21,7 +21,7 @@ module OoxmlParser
|
|
21
21
|
# Parse ContentTypes object
|
22
22
|
# @return [ContentTypes] result of parsing
|
23
23
|
def parse
|
24
|
-
doc = Nokogiri::XML.parse(File.open("#{
|
24
|
+
doc = Nokogiri::XML.parse(File.open("#{root_object.unpacked_folder}/[Content_Types].xml"))
|
25
25
|
node = doc.xpath('*').first
|
26
26
|
|
27
27
|
node.xpath('*').each do |node_child|
|
@@ -46,7 +46,7 @@ module OoxmlParser
|
|
46
46
|
@url = Coordinates.new.parse_string(value.value)
|
47
47
|
when 'id'
|
48
48
|
@id = value.value
|
49
|
-
@url =
|
49
|
+
@url = root_object.get_link_from_rels(@id) unless @id.empty?
|
50
50
|
when 'tooltip'
|
51
51
|
@tooltip = value.value
|
52
52
|
when 'ref'
|
@@ -80,7 +80,7 @@ module OoxmlParser
|
|
80
80
|
else
|
81
81
|
if meaningful_id?
|
82
82
|
@action = :external_link
|
83
|
-
@url =
|
83
|
+
@url = root_object.get_link_from_rels(@id)
|
84
84
|
end
|
85
85
|
end
|
86
86
|
self
|
@@ -98,7 +98,7 @@ module OoxmlParser
|
|
98
98
|
def parse_url_for_slide_link
|
99
99
|
return unless meaningful_id?
|
100
100
|
|
101
|
-
@url =
|
101
|
+
@url = root_object.get_link_from_rels(@id).scan(/\d+/).join.to_i
|
102
102
|
end
|
103
103
|
end
|
104
104
|
end
|
@@ -28,6 +28,8 @@ module OoxmlParser
|
|
28
28
|
|
29
29
|
instance_variables.each do |current_attribute|
|
30
30
|
next if current_attribute == :@parent
|
31
|
+
next if instance_variable_get(current_attribute).is_a?(Nokogiri::XML::Element)
|
32
|
+
|
31
33
|
return false unless instance_variable_get(current_attribute) == other.instance_variable_get(current_attribute)
|
32
34
|
end
|
33
35
|
true
|
@@ -47,96 +49,5 @@ module OoxmlParser
|
|
47
49
|
end
|
48
50
|
xml
|
49
51
|
end
|
50
|
-
|
51
|
-
class << self
|
52
|
-
# @return [String] path to root subfolder
|
53
|
-
attr_accessor :root_subfolder
|
54
|
-
# @return [PresentationTheme] list of themes
|
55
|
-
attr_accessor :theme
|
56
|
-
# @return [Array<String>] stack of xmls
|
57
|
-
attr_accessor :xmls_stack
|
58
|
-
# @return [String] path to root folder
|
59
|
-
attr_accessor :path_to_folder
|
60
|
-
|
61
|
-
# Copy this file and rename to zip
|
62
|
-
# @param path [String] path to file
|
63
|
-
# @return [String] path to result zip
|
64
|
-
def copy_file_and_rename_to_zip(path)
|
65
|
-
file_name = File.basename(path)
|
66
|
-
tmp_folder = Dir.mktmpdir('ruby-ooxml-parser')
|
67
|
-
file_path = "#{tmp_folder}/#{file_name}"
|
68
|
-
FileUtils.rm_rf(tmp_folder) if File.directory?(tmp_folder)
|
69
|
-
FileUtils.mkdir_p(tmp_folder)
|
70
|
-
raise "Cannot find file by path #{path}" unless File.exist?(path)
|
71
|
-
|
72
|
-
FileUtils.cp path, tmp_folder
|
73
|
-
file_path
|
74
|
-
end
|
75
|
-
|
76
|
-
# Decrypt file protected with password
|
77
|
-
# @param path [String] path to file
|
78
|
-
# @param password [String] password to file
|
79
|
-
# @return [String] path to decrypted file
|
80
|
-
def decrypt_file(path, password)
|
81
|
-
file_name = File.basename(path)
|
82
|
-
tmp_folder = Dir.mktmpdir('ruby-ooxml-parser')
|
83
|
-
decrypted_path = "#{tmp_folder}/#{file_name}"
|
84
|
-
binary_password = password.encode('utf-16le').bytes.pack('c*').encode('binary')
|
85
|
-
OoxmlDecrypt::EncryptedFile.decrypt_to_file(path, binary_password, decrypted_path)
|
86
|
-
|
87
|
-
decrypted_path
|
88
|
-
end
|
89
|
-
|
90
|
-
# Unzip specified file
|
91
|
-
# @param path_to_file [String] path to zip file
|
92
|
-
# @param destination [String] folder to extract
|
93
|
-
# @return [void]
|
94
|
-
def unzip_file(path_to_file, destination)
|
95
|
-
Zip.warn_invalid_date = false
|
96
|
-
Zip::File.open(path_to_file) do |zip_file|
|
97
|
-
raise LoadError, "There is no files in zip #{path_to_file}" if zip_file.entries.empty?
|
98
|
-
|
99
|
-
zip_file.each do |file|
|
100
|
-
file_path = File.join(destination, file.name)
|
101
|
-
FileUtils.mkdir_p(File.dirname(file_path))
|
102
|
-
zip_file.extract(file, file_path) unless File.exist?(file_path)
|
103
|
-
end
|
104
|
-
end
|
105
|
-
end
|
106
|
-
|
107
|
-
# @return [String] dir to base of file
|
108
|
-
def dir
|
109
|
-
"#{OOXMLDocumentObject.path_to_folder}#{File.dirname(OOXMLDocumentObject.xmls_stack.last)}/"
|
110
|
-
end
|
111
|
-
|
112
|
-
# @return [String] path to current xml file
|
113
|
-
def current_xml
|
114
|
-
OOXMLDocumentObject.path_to_folder + OOXMLDocumentObject.xmls_stack.last
|
115
|
-
end
|
116
|
-
|
117
|
-
# Add file to parsing stack
|
118
|
-
# @param path [String] path of file to add to stack
|
119
|
-
# @return [void]
|
120
|
-
def add_to_xmls_stack(path)
|
121
|
-
OOXMLDocumentObject.xmls_stack << if path.include?('..')
|
122
|
-
"#{File.dirname(OOXMLDocumentObject.xmls_stack.last)}/#{path}"
|
123
|
-
elsif path.start_with?(OOXMLDocumentObject.root_subfolder)
|
124
|
-
path
|
125
|
-
else
|
126
|
-
OOXMLDocumentObject.root_subfolder + path
|
127
|
-
end
|
128
|
-
end
|
129
|
-
|
130
|
-
# Get link to file from rels file
|
131
|
-
# @param id [String] file to get
|
132
|
-
# @return [String] result
|
133
|
-
def get_link_from_rels(id)
|
134
|
-
rels_path = dir + "_rels/#{File.basename(OOXMLDocumentObject.xmls_stack.last)}.rels"
|
135
|
-
raise LoadError, "Cannot find .rels file by path: #{rels_path}" unless File.exist?(rels_path)
|
136
|
-
|
137
|
-
relationships = Relationships.new.parse_file(rels_path)
|
138
|
-
relationships.target_by_id(id)
|
139
|
-
end
|
140
|
-
end
|
141
52
|
end
|
142
53
|
end
|
@@ -14,12 +14,50 @@ module OoxmlParser
|
|
14
14
|
attr_accessor :default_font_style
|
15
15
|
# @return [ContentTypes] data about content types
|
16
16
|
attr_accessor :content_types
|
17
|
+
# @return [String] root sub-folder for object
|
18
|
+
attr_reader :root_subfolder
|
19
|
+
# @return [String] path to folder with unpacked document
|
20
|
+
attr_reader :unpacked_folder
|
21
|
+
# @return [Array<String>] list of xmls to parse
|
22
|
+
attr_accessor :xmls_stack
|
17
23
|
|
18
24
|
def initialize(params = {})
|
19
25
|
@default_font_size = params.fetch(:default_font_size, 18)
|
20
26
|
@default_font_typeface = params.fetch(:default_font_typeface, 'Arial')
|
21
27
|
@default_font_style = FontStyle.new
|
28
|
+
@unpacked_folder = params.fetch(:unpacked_folder, nil)
|
29
|
+
@xmls_stack = []
|
22
30
|
super(parent: nil)
|
23
31
|
end
|
32
|
+
|
33
|
+
# @return [String] path to current xml file
|
34
|
+
def current_xml
|
35
|
+
root_object.unpacked_folder + @xmls_stack.last
|
36
|
+
end
|
37
|
+
|
38
|
+
# Add file to parsing stack
|
39
|
+
# @param path [String] path of file to add to stack
|
40
|
+
# @return [void]
|
41
|
+
def add_to_xmls_stack(path)
|
42
|
+
@xmls_stack << if path.include?('..')
|
43
|
+
"#{File.dirname(@xmls_stack.last)}/#{path}"
|
44
|
+
elsif path.start_with?(@root_subfolder)
|
45
|
+
path
|
46
|
+
else
|
47
|
+
@root_subfolder + path
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
# Get link to file from rels file
|
52
|
+
# @param id [String] file to get
|
53
|
+
# @return [String] result
|
54
|
+
def get_link_from_rels(id)
|
55
|
+
dir = "#{unpacked_folder}#{File.dirname(@xmls_stack.last)}/"
|
56
|
+
rels_path = dir + "_rels/#{File.basename(@xmls_stack.last)}.rels"
|
57
|
+
raise LoadError, "Cannot find .rels file by path: #{rels_path}" unless File.exist?(rels_path)
|
58
|
+
|
59
|
+
relationships = Relationships.new.parse_file(rels_path)
|
60
|
+
relationships.target_by_id(id)
|
61
|
+
end
|
24
62
|
end
|
25
63
|
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module OoxmlParser
|
4
|
+
# Class for actions with OOXML file
|
5
|
+
class OoxmlFile
|
6
|
+
# @return [String] path to file
|
7
|
+
attr_reader :path
|
8
|
+
|
9
|
+
def initialize(path)
|
10
|
+
@path = path
|
11
|
+
end
|
12
|
+
|
13
|
+
# Copy this file and rename to zip
|
14
|
+
# @return [String] path to result zip
|
15
|
+
def copy_file_and_rename_to_zip
|
16
|
+
file_name = File.basename(@path)
|
17
|
+
tmp_folder = Dir.mktmpdir('ruby-ooxml-parser')
|
18
|
+
@zip_path = "#{tmp_folder}/#{file_name}"
|
19
|
+
FileUtils.rm_rf(tmp_folder) if File.directory?(tmp_folder)
|
20
|
+
FileUtils.mkdir_p(tmp_folder)
|
21
|
+
raise "Cannot find file by path #{@path}" unless File.exist?(@path)
|
22
|
+
|
23
|
+
FileUtils.cp path, tmp_folder
|
24
|
+
end
|
25
|
+
|
26
|
+
# @return [String] path to folder with zip
|
27
|
+
def path_to_folder
|
28
|
+
@zip_path.sub(File.basename(@zip_path), '')
|
29
|
+
end
|
30
|
+
|
31
|
+
# Unzip specified file
|
32
|
+
# @return [void]
|
33
|
+
def unzip
|
34
|
+
Zip.warn_invalid_date = false
|
35
|
+
Zip::File.open(@zip_path) do |zip_file|
|
36
|
+
raise LoadError, "There is no files in zip #{@zip_path}" if zip_file.entries.empty?
|
37
|
+
|
38
|
+
zip_file.each do |file|
|
39
|
+
file_path = File.join(path_to_folder, file.name)
|
40
|
+
FileUtils.mkdir_p(File.dirname(file_path))
|
41
|
+
zip_file.extract(file, file_path) unless File.exist?(file_path)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# @return [Symbol] file type recognized by folder structure
|
47
|
+
def format_by_folders
|
48
|
+
return :docx if Dir.exist?("#{path_to_folder}/word")
|
49
|
+
return :xlsx if Dir.exist?("#{path_to_folder}/xl")
|
50
|
+
return :pptx if Dir.exist?("#{path_to_folder}/ppt")
|
51
|
+
|
52
|
+
:zip
|
53
|
+
end
|
54
|
+
|
55
|
+
# Decrypt file protected with password
|
56
|
+
# @param password [String] password to file
|
57
|
+
# @return [OoxmlFile] path to decrypted file
|
58
|
+
def decrypt(password)
|
59
|
+
file_name = File.basename(@path)
|
60
|
+
tmp_folder = Dir.mktmpdir('ruby-ooxml-parser')
|
61
|
+
decrypted_path = "#{tmp_folder}/#{file_name}"
|
62
|
+
binary_password = password.encode('utf-16le').bytes.pack('c*').encode('binary')
|
63
|
+
OoxmlDecrypt::EncryptedFile.decrypt_to_file(@path, binary_password, decrypted_path)
|
64
|
+
|
65
|
+
OoxmlFile.new(decrypted_path)
|
66
|
+
end
|
67
|
+
end
|
68
|
+
end
|
@@ -1,52 +1,46 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative 'parser/encryption_checker'
|
4
|
+
require_relative 'parser/ooxml_file'
|
4
5
|
|
5
6
|
module OoxmlParser
|
6
7
|
# Basic class for OoxmlParser
|
7
8
|
class Parser
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
9
|
+
class << self
|
10
|
+
# Base method to yield parse document of any type
|
11
|
+
# @param [OoxmlFile] file with data
|
12
|
+
# @return [CommonDocumentStructure] structure of doc
|
13
|
+
def parse_format(file)
|
14
|
+
return nil if EncryptionChecker.new(file.path).encrypted?
|
13
15
|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
end
|
16
|
+
file.copy_file_and_rename_to_zip
|
17
|
+
file.unzip
|
18
|
+
model = yield(file)
|
19
|
+
model.file_path = file.path if model
|
20
|
+
FileUtils.rm_rf(file.path_to_folder)
|
21
|
+
model
|
22
|
+
end
|
22
23
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
24
|
+
# Base method to parse document of any type
|
25
|
+
# @param path_to_file [String] file
|
26
|
+
# @return [CommonDocumentStructure] structure of doc
|
27
|
+
def parse(path_to_file, password: nil)
|
28
|
+
file = OoxmlFile.new(path_to_file)
|
29
|
+
file = file.decrypt(password) if password
|
30
|
+
Parser.parse_format(file) do |yielded_file|
|
31
|
+
format = yielded_file.format_by_folders
|
32
|
+
case format
|
33
|
+
when :docx
|
34
|
+
DocumentStructure.new(unpacked_folder: yielded_file.path_to_folder).parse
|
35
|
+
when :xlsx
|
36
|
+
XLSXWorkbook.new(unpacked_folder: yielded_file.path_to_folder).parse
|
37
|
+
when :pptx
|
38
|
+
Presentation.new(unpacked_folder: yielded_file.path_to_folder).parse
|
39
|
+
else
|
40
|
+
warn "#{path_to_file} is a simple zip file without OOXML content"
|
41
|
+
end
|
39
42
|
end
|
40
43
|
end
|
41
44
|
end
|
42
|
-
|
43
|
-
# Recognize folder format
|
44
|
-
# @param directory [String] path to dirctory
|
45
|
-
# @return [Symbol] type of document
|
46
|
-
def self.recognize_folder_format(directory = OOXMLDocumentObject.path_to_folder)
|
47
|
-
return :docx if Dir.exist?("#{directory}/word")
|
48
|
-
return :xlsx if Dir.exist?("#{directory}/xl")
|
49
|
-
return :pptx if Dir.exist?("#{directory}/ppt")
|
50
|
-
end
|
51
45
|
end
|
52
46
|
end
|
@@ -9,8 +9,8 @@ module OoxmlParser
|
|
9
9
|
|
10
10
|
def initialize(params = {})
|
11
11
|
@comments_array = []
|
12
|
-
@file = params.fetch(:file, "#{OOXMLDocumentObject.path_to_folder}word/comments.xml")
|
13
12
|
super(parent: params[:parent])
|
13
|
+
@file = params.fetch(:file, "#{root_object.unpacked_folder}word/comments.xml")
|
14
14
|
end
|
15
15
|
|
16
16
|
# @return [Comment] accessor
|
@@ -17,7 +17,7 @@ module OoxmlParser
|
|
17
17
|
# Parse CommentsExtended object
|
18
18
|
# @return [CommentsExtended] result of parsing
|
19
19
|
def parse
|
20
|
-
file_to_parse = "#{
|
20
|
+
file_to_parse = "#{root_object.unpacked_folder}word/commentsExtended.xml"
|
21
21
|
return nil unless File.exist?(file_to_parse)
|
22
22
|
|
23
23
|
doc = parse_xml(file_to_parse)
|
@@ -8,7 +8,7 @@ module OoxmlParser
|
|
8
8
|
# Parse Document properties
|
9
9
|
# @return [DocumentProperties]
|
10
10
|
def parse
|
11
|
-
properties_file = "#{
|
11
|
+
properties_file = "#{root_object.unpacked_folder}docProps/app.xml"
|
12
12
|
unless File.exist?(properties_file)
|
13
13
|
warn "There is no 'docProps/app.xml' in docx. It may be some problem with it"
|
14
14
|
return self
|
@@ -9,7 +9,7 @@ module OoxmlParser
|
|
9
9
|
# Parse Settings object
|
10
10
|
# @return [DocumentSettings] result of parsing
|
11
11
|
def parse
|
12
|
-
settings_path = "#{
|
12
|
+
settings_path = "#{root_object.unpacked_folder}word/settings.xml"
|
13
13
|
return nil unless File.exist?(settings_path)
|
14
14
|
|
15
15
|
doc = parse_xml(settings_path)
|
@@ -21,8 +21,12 @@ module OoxmlParser
|
|
21
21
|
attr_accessor :next_style
|
22
22
|
# @return [DocxParagraphRun] run properties
|
23
23
|
attr_accessor :run_properties
|
24
|
+
# @return [Nokogiri::XML:Node] run properties node
|
25
|
+
attr_accessor :run_properties_node
|
24
26
|
# @return [DocxParagraph] run properties
|
25
27
|
attr_accessor :paragraph_properties
|
28
|
+
# @return [Nokogiri::XML:Node] paragraph properties node
|
29
|
+
attr_accessor :paragraph_properties_node
|
26
30
|
# @return [TableProperties] properties of table
|
27
31
|
attr_accessor :table_properties
|
28
32
|
# @return [Array, TableStyleProperties] list of table style properties
|
@@ -76,9 +80,11 @@ module OoxmlParser
|
|
76
80
|
when 'next'
|
77
81
|
@next_style = subnode.attribute('val').value
|
78
82
|
when 'rPr'
|
79
|
-
@
|
83
|
+
@run_properties_node = subnode
|
84
|
+
@run_properties = DocxParagraphRun.new(parent: self).parse_properties(@run_properties_node)
|
80
85
|
when 'pPr'
|
81
|
-
@
|
86
|
+
@paragraph_properties_node = subnode
|
87
|
+
@paragraph_properties = ParagraphProperties.new(parent: self).parse(@paragraph_properties_node)
|
82
88
|
when 'tblPr'
|
83
89
|
@table_properties = TableProperties.new(parent: self).parse(subnode)
|
84
90
|
when 'trPr'
|
@@ -12,7 +12,6 @@ require_relative 'docx_paragraph/inserted'
|
|
12
12
|
require_relative 'docx_paragraph/structured_document_tag'
|
13
13
|
require_relative 'docx_paragraph/frame_properties'
|
14
14
|
require_relative 'docx_paragraph/docx_formula'
|
15
|
-
require_relative 'docx_paragraph/style_parametres'
|
16
15
|
module OoxmlParser
|
17
16
|
# Class for data of DocxParagraph
|
18
17
|
class DocxParagraph < OOXMLDocumentObject
|
@@ -202,7 +201,8 @@ module OoxmlParser
|
|
202
201
|
when 'contextualSpacing'
|
203
202
|
@contextual_spacing = true
|
204
203
|
when 'pStyle'
|
205
|
-
|
204
|
+
@paragraph_style_ref = ParagraphStyleRef.new(parent: self).parse(node_child)
|
205
|
+
fill_style_data(default_char_style)
|
206
206
|
when 'ind'
|
207
207
|
@ind = DocumentStructure.default_paragraph_style.ind.dup.parse(node_child)
|
208
208
|
when 'numPr'
|
@@ -229,24 +229,13 @@ module OoxmlParser
|
|
229
229
|
self
|
230
230
|
end
|
231
231
|
|
232
|
-
#
|
233
|
-
# @param id [String] id of style to parse
|
232
|
+
# Fill data from styles
|
234
233
|
# @param character_style [DocxParagraphRun] style to parse
|
235
234
|
# @return [void]
|
236
|
-
def
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
style.xpath('w:pPr').each do |p_pr|
|
242
|
-
parse_paragraph_style(p_pr, character_style)
|
243
|
-
@style = StyleParametres.new(parent: self).parse(style)
|
244
|
-
end
|
245
|
-
style.xpath('w:rPr').each do |r_pr|
|
246
|
-
character_style.parse_properties(r_pr)
|
247
|
-
end
|
248
|
-
break
|
249
|
-
end
|
235
|
+
def fill_style_data(character_style)
|
236
|
+
@style = root_object.document_style_by_id(@paragraph_style_ref.value)
|
237
|
+
parse_paragraph_style(@style.paragraph_properties_node, character_style) if @style.paragraph_properties_node
|
238
|
+
character_style.parse_properties(@style.run_properties_node) if @style.run_properties_node
|
250
239
|
end
|
251
240
|
|
252
241
|
extend Gem::Deprecate
|
@@ -47,7 +47,7 @@ module OoxmlParser
|
|
47
47
|
def parse(node)
|
48
48
|
@id = node.attribute('id').value.to_i
|
49
49
|
parse_type(node)
|
50
|
-
doc = parse_xml(
|
50
|
+
doc = parse_xml(root_object.unpacked_folder + xml_path)
|
51
51
|
doc.search(xpath_for_search).each do |footnote|
|
52
52
|
next unless footnote.attribute('id').value.to_i == @id
|
53
53
|
|
@@ -38,7 +38,7 @@ module OoxmlParser
|
|
38
38
|
# Parse Numbering data
|
39
39
|
# @return [Numbering] result of parse
|
40
40
|
def parse
|
41
|
-
numbering_xml = "#{
|
41
|
+
numbering_xml = "#{root_object.unpacked_folder}word/numbering.xml"
|
42
42
|
return nil unless File.exist?(numbering_xml)
|
43
43
|
|
44
44
|
node = parse_xml(File.open(numbering_xml))
|
@@ -43,10 +43,10 @@ module OoxmlParser
|
|
43
43
|
# @param target [String] name of target
|
44
44
|
# @return [String] path to note xml file
|
45
45
|
def file_path(target)
|
46
|
-
file = "#{
|
46
|
+
file = "#{root_object.unpacked_folder}word/#{target}"
|
47
47
|
return file if File.exist?(file)
|
48
48
|
|
49
|
-
"#{
|
49
|
+
"#{root_object.unpacked_folder}#{target}" unless File.exist?(file)
|
50
50
|
end
|
51
51
|
end
|
52
52
|
end
|
@@ -63,8 +63,8 @@ module OoxmlParser
|
|
63
63
|
when 'cols'
|
64
64
|
@columns = Columns.new.parse(pg_size_subnode)
|
65
65
|
when 'headerReference', 'footerReference'
|
66
|
-
target =
|
67
|
-
|
66
|
+
target = root_object.get_link_from_rels(pg_size_subnode.attribute('id').value)
|
67
|
+
root_object.add_to_xmls_stack("word/#{target}")
|
68
68
|
note = Note.parse(default_paragraph: default_paragraph,
|
69
69
|
default_character: default_character,
|
70
70
|
target: target,
|
@@ -72,7 +72,7 @@ module OoxmlParser
|
|
72
72
|
type: File.basename(target).sub('.xml', ''),
|
73
73
|
parent: self)
|
74
74
|
@notes << note
|
75
|
-
|
75
|
+
root_object.xmls_stack.pop
|
76
76
|
when 'footnotePr'
|
77
77
|
@footnote_properties = FootnoteProperties.new(parent: self).parse(pg_size_subnode)
|
78
78
|
end
|
@@ -17,7 +17,7 @@ module OoxmlParser
|
|
17
17
|
# Parse styles data
|
18
18
|
# @return [Styles] result of parsing
|
19
19
|
def parse
|
20
|
-
doc = parse_xml("#{
|
20
|
+
doc = parse_xml("#{root_object.unpacked_folder}word/styles.xml")
|
21
21
|
doc.xpath('w:styles/*').each do |node_child|
|
22
22
|
case node_child.name
|
23
23
|
when 'docDefaults'
|