ooxml_parser 0.29.0 → 0.31.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/lib/ooxml_parser/common_parser/common_data/alternate_content/chart/chart.rb +4 -4
  3. data/lib/ooxml_parser/common_parser/common_data/alternate_content/drawing/graphic/docx_graphic.rb +2 -2
  4. data/lib/ooxml_parser/common_parser/common_data/alternate_content/drawing/graphic/picture/docx_blip/file_reference.rb +3 -10
  5. data/lib/ooxml_parser/common_parser/common_data/content_types.rb +1 -1
  6. data/lib/ooxml_parser/common_parser/common_data/coordinates.rb +9 -4
  7. data/lib/ooxml_parser/common_parser/common_data/hyperlink.rb +3 -3
  8. data/lib/ooxml_parser/common_parser/common_data/ooxml_document_object.rb +2 -91
  9. data/lib/ooxml_parser/common_parser/common_data/paragraph/paragraph_run/run_properties.rb +4 -0
  10. data/lib/ooxml_parser/common_parser/common_document_structure.rb +38 -0
  11. data/lib/ooxml_parser/common_parser/parser/ooxml_file.rb +68 -0
  12. data/lib/ooxml_parser/common_parser/parser.rb +32 -38
  13. data/lib/ooxml_parser/docx_parser/document_structure/comments.rb +1 -1
  14. data/lib/ooxml_parser/docx_parser/document_structure/comments_extended.rb +1 -1
  15. data/lib/ooxml_parser/docx_parser/document_structure/document_properties.rb +1 -1
  16. data/lib/ooxml_parser/docx_parser/document_structure/document_settings.rb +1 -1
  17. data/lib/ooxml_parser/docx_parser/document_structure/header_footer.rb +1 -1
  18. data/lib/ooxml_parser/docx_parser/document_structure/numbering.rb +2 -2
  19. data/lib/ooxml_parser/docx_parser/document_structure/page_properties/note.rb +2 -2
  20. data/lib/ooxml_parser/docx_parser/document_structure/page_properties/page_properties.rb +3 -3
  21. data/lib/ooxml_parser/docx_parser/document_structure/styles.rb +1 -1
  22. data/lib/ooxml_parser/docx_parser/document_structure.rb +10 -11
  23. data/lib/ooxml_parser/docx_parser.rb +3 -2
  24. data/lib/ooxml_parser/pptx_parser/presentation/comment_authors.rb +2 -2
  25. data/lib/ooxml_parser/pptx_parser/presentation/presentation_comments.rb +2 -2
  26. data/lib/ooxml_parser/pptx_parser/presentation/presentation_theme.rb +23 -23
  27. data/lib/ooxml_parser/pptx_parser/presentation/slide/graphic_frame/graphic_frame.rb +2 -2
  28. data/lib/ooxml_parser/pptx_parser/presentation/slide.rb +5 -5
  29. data/lib/ooxml_parser/pptx_parser/presentation/slide_layout_file.rb +2 -2
  30. data/lib/ooxml_parser/pptx_parser/presentation/slide_layouts_helper.rb +1 -1
  31. data/lib/ooxml_parser/pptx_parser/presentation/slide_master_file.rb +2 -2
  32. data/lib/ooxml_parser/pptx_parser/presentation/slide_masters_helper.rb +1 -1
  33. data/lib/ooxml_parser/pptx_parser/presentation/table_styles.rb +1 -1
  34. data/lib/ooxml_parser/pptx_parser/presentation.rb +7 -8
  35. data/lib/ooxml_parser/pptx_parser.rb +3 -2
  36. data/lib/ooxml_parser/version.rb +1 -1
  37. data/lib/ooxml_parser/xlsx_parser/workbook/chartsheet.rb +3 -3
  38. data/lib/ooxml_parser/xlsx_parser/workbook/pivot_cache.rb +1 -1
  39. data/lib/ooxml_parser/xlsx_parser/workbook/pivot_table_definition.rb +1 -1
  40. data/lib/ooxml_parser/xlsx_parser/workbook/style_sheet.rb +1 -1
  41. data/lib/ooxml_parser/xlsx_parser/workbook/worksheet/table_part.rb +2 -2
  42. data/lib/ooxml_parser/xlsx_parser/workbook/worksheet.rb +11 -11
  43. data/lib/ooxml_parser/xlsx_parser/workbook.rb +7 -8
  44. data/lib/ooxml_parser/xlsx_parser.rb +3 -2
  45. metadata +4 -3
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ae537423c384b461a5c2eb77bfe60b692ab334e1bd6ae22cc928e28230302997
4
- data.tar.gz: c8696409b7862346b631367d7fa014ceea48e18deb3a6b9832fbb4dcc57dab13
3
+ metadata.gz: 682b6146d274c90e967ce9d494c4c6df0fafe597d9dc01fc27c3b8fa854630c2
4
+ data.tar.gz: a5f7c131398910ef896a0c0b21578083e02dbe84aa6d024bf4abd4e2fab2c1bb
5
5
  SHA512:
6
- metadata.gz: 9b3655be06b3aa59bdb700ba359f0dffd6bbe54840978665870673edb2e7249c179807b102cc7b49a503c638b0508a3cb46f0183afa4be7a8b8baff93b3dbab6
7
- data.tar.gz: 36b999f281320ca0792e6504060eb65ba0f97005ed82b131dea276f6b136e6a0cca0281a3ab4a0c67ce213fe31e26263a1603bc7cf4ac10a847e264e64ccd512
6
+ metadata.gz: df07e615fd1ecc0c4436072b978f6f6ba8959a83785d88fe2a1922417c94daef2227ae4914f34683e3f9542ca35e68de2652c5cf78b2a52cf828d6f46b850aa2
7
+ data.tar.gz: 0fce9c7ca3617ef27c67acc338f8e2eaa7a2220d98de3d4367d4b4bcf89f445a1982e58fe2e3f4ec6aa1bca48e8ed533f7bed9ad0a7d64244620f47dd241b4c3
@@ -82,7 +82,7 @@ module OoxmlParser
82
82
  # Parse Chart data
83
83
  # @return [Chart] result of parsing
84
84
  def parse
85
- chart_xml = parse_xml(OOXMLDocumentObject.current_xml)
85
+ chart_xml = parse_xml(root_object.current_xml)
86
86
  chart_xml.xpath('*').each do |chart_node|
87
87
  case chart_node.name
88
88
  when 'chartSpace'
@@ -180,8 +180,8 @@ module OoxmlParser
180
180
 
181
181
  # Parse relationship of chart
182
182
  def parse_relationships
183
- file_name = File.basename(OOXMLDocumentObject.current_xml)
184
- relationship_file = "#{OOXMLDocumentObject.path_to_folder}" \
183
+ file_name = File.basename(root_object.current_xml)
184
+ relationship_file = "#{root_object.unpacked_folder}" \
185
185
  '/word/charts/' \
186
186
  "_rels/#{file_name}.rels"
187
187
 
@@ -198,7 +198,7 @@ module OoxmlParser
198
198
  return if chart_relationship.empty?
199
199
 
200
200
  chart_style_file = chart_relationship.first
201
- style_file = "#{OOXMLDocumentObject.path_to_folder}" \
201
+ style_file = "#{root_object.unpacked_folder}" \
202
202
  "/word/charts/#{chart_style_file}"
203
203
 
204
204
  @style = ChartStyleFile.new(parent: self).parse(style_file)
@@ -23,9 +23,9 @@ module OoxmlParser
23
23
  @data = DocxPicture.new(parent: self).parse(node_child)
24
24
  when 'chart'
25
25
  @type = :chart
26
- OOXMLDocumentObject.add_to_xmls_stack("#{OOXMLDocumentObject.root_subfolder}/#{OOXMLDocumentObject.get_link_from_rels(node_child.attribute('id').value)}")
26
+ root_object.add_to_xmls_stack("#{root_object.root_subfolder}/#{root_object.get_link_from_rels(node_child.attribute('id').value)}")
27
27
  @data = Chart.new(parent: self).parse
28
- OOXMLDocumentObject.xmls_stack.pop
28
+ root_object.xmls_stack.pop
29
29
  when 'wgp'
30
30
  @type = :group
31
31
  @data = ShapesGrouping.new(parent: self).parse(node_child)
@@ -25,7 +25,7 @@ module OoxmlParser
25
25
  return self unless @resource_id
26
26
  return self if @resource_id.empty?
27
27
 
28
- @path = OOXMLDocumentObject.get_link_from_rels(@resource_id)
28
+ @path = root_object.get_link_from_rels(@resource_id)
29
29
  if !@path || @path.empty?
30
30
  warn "Cant find path to media file by id: #{@resource_id}"
31
31
  return self
@@ -33,7 +33,7 @@ module OoxmlParser
33
33
  return self if @path == 'NULL'
34
34
  return self if @path.match?(URI::DEFAULT_PARSER.make_regexp)
35
35
 
36
- full_path_to_file = OOXMLDocumentObject.path_to_folder + OOXMLDocumentObject.root_subfolder + @path.gsub('..', '')
36
+ full_path_to_file = root_object.unpacked_folder + root_object.root_subfolder + @path.gsub('..', '')
37
37
  if File.exist?(full_path_to_file)
38
38
  @content = if File.extname(@path) == '.xlsx'
39
39
  parse_ole_xlsx(full_path_to_file)
@@ -52,14 +52,7 @@ module OoxmlParser
52
52
  # @param [String] full_path to file
53
53
  # @return [XLSXWorkbook]
54
54
  def parse_ole_xlsx(full_path)
55
- # TODO: Fix this ugly hack with global vars
56
- # by replacing all global variables
57
- stack = OOXMLDocumentObject.xmls_stack
58
- dir = OOXMLDocumentObject.path_to_folder
59
- result = OoxmlParser::Parser.parse(full_path)
60
- OOXMLDocumentObject.xmls_stack = stack
61
- OOXMLDocumentObject.path_to_folder = dir
62
- result
55
+ OoxmlParser::XlsxParser.parse_xlsx(full_path)
63
56
  end
64
57
  end
65
58
  end
@@ -21,7 +21,7 @@ module OoxmlParser
21
21
  # Parse ContentTypes object
22
22
  # @return [ContentTypes] result of parsing
23
23
  def parse
24
- doc = Nokogiri::XML.parse(File.open("#{OOXMLDocumentObject.path_to_folder}/[Content_Types].xml"))
24
+ doc = parse_xml("#{root_object.unpacked_folder}/[Content_Types].xml")
25
25
  node = doc.xpath('*').first
26
26
 
27
27
  node.xpath('*').each do |node_child|
@@ -3,6 +3,11 @@
3
3
  module OoxmlParser
4
4
  # Class for working with coordinates
5
5
  class Coordinates
6
+ # @return [Regexp] regexp for row name
7
+ ROW_REGEXP = /[a-z]/i.freeze
8
+ # @return [Regexp] regexp for column name
9
+ COLUMN_REGEXP = /\d/.freeze
10
+
6
11
  attr_accessor :row, :column, :list
7
12
 
8
13
  def initialize(row = nil, column = nil, list = nil)
@@ -30,10 +35,10 @@ module OoxmlParser
30
35
  range = arguments_string.split(':')
31
36
 
32
37
  difference = []
33
- symbols_from = range.first.scan(/[a-zA-z]/).join
34
- symbols_to = range.last.scan(/[a-zA-z]/).join
35
- digits_from = range.first.scan(/[0-9]/).join
36
- digits_to = range.last.scan(/[0-9]/).join
38
+ symbols_from = range.first.scan(ROW_REGEXP).join
39
+ symbols_to = range.last.scan(ROW_REGEXP).join
40
+ digits_from = range.first.scan(COLUMN_REGEXP).join
41
+ digits_to = range.last.scan(COLUMN_REGEXP).join
37
42
 
38
43
  difference[0] = [symbols_from, symbols_to] unless symbols_from == symbols_to
39
44
  difference[1] = [digits_from, digits_to] unless digits_from == digits_to
@@ -46,7 +46,7 @@ module OoxmlParser
46
46
  @url = Coordinates.new.parse_string(value.value)
47
47
  when 'id'
48
48
  @id = value.value
49
- @url = OOXMLDocumentObject.get_link_from_rels(@id) unless @id.empty?
49
+ @url = root_object.get_link_from_rels(@id) unless @id.empty?
50
50
  when 'tooltip'
51
51
  @tooltip = value.value
52
52
  when 'ref'
@@ -80,7 +80,7 @@ module OoxmlParser
80
80
  else
81
81
  if meaningful_id?
82
82
  @action = :external_link
83
- @url = OOXMLDocumentObject.get_link_from_rels(@id)
83
+ @url = root_object.get_link_from_rels(@id)
84
84
  end
85
85
  end
86
86
  self
@@ -98,7 +98,7 @@ module OoxmlParser
98
98
  def parse_url_for_slide_link
99
99
  return unless meaningful_id?
100
100
 
101
- @url = OOXMLDocumentObject.get_link_from_rels(@id).scan(/\d+/).join.to_i
101
+ @url = root_object.get_link_from_rels(@id).scan(/\d+/).join.to_i
102
102
  end
103
103
  end
104
104
  end
@@ -28,6 +28,8 @@ module OoxmlParser
28
28
 
29
29
  instance_variables.each do |current_attribute|
30
30
  next if current_attribute == :@parent
31
+ next if instance_variable_get(current_attribute).is_a?(Nokogiri::XML::Element)
32
+
31
33
  return false unless instance_variable_get(current_attribute) == other.instance_variable_get(current_attribute)
32
34
  end
33
35
  true
@@ -47,96 +49,5 @@ module OoxmlParser
47
49
  end
48
50
  xml
49
51
  end
50
-
51
- class << self
52
- # @return [String] path to root subfolder
53
- attr_accessor :root_subfolder
54
- # @return [PresentationTheme] list of themes
55
- attr_accessor :theme
56
- # @return [Array<String>] stack of xmls
57
- attr_accessor :xmls_stack
58
- # @return [String] path to root folder
59
- attr_accessor :path_to_folder
60
-
61
- # Copy this file and rename to zip
62
- # @param path [String] path to file
63
- # @return [String] path to result zip
64
- def copy_file_and_rename_to_zip(path)
65
- file_name = File.basename(path)
66
- tmp_folder = Dir.mktmpdir('ruby-ooxml-parser')
67
- file_path = "#{tmp_folder}/#{file_name}"
68
- FileUtils.rm_rf(tmp_folder) if File.directory?(tmp_folder)
69
- FileUtils.mkdir_p(tmp_folder)
70
- raise "Cannot find file by path #{path}" unless File.exist?(path)
71
-
72
- FileUtils.cp path, tmp_folder
73
- file_path
74
- end
75
-
76
- # Decrypt file protected with password
77
- # @param path [String] path to file
78
- # @param password [String] password to file
79
- # @return [String] path to decrypted file
80
- def decrypt_file(path, password)
81
- file_name = File.basename(path)
82
- tmp_folder = Dir.mktmpdir('ruby-ooxml-parser')
83
- decrypted_path = "#{tmp_folder}/#{file_name}"
84
- binary_password = password.encode('utf-16le').bytes.pack('c*').encode('binary')
85
- OoxmlDecrypt::EncryptedFile.decrypt_to_file(path, binary_password, decrypted_path)
86
-
87
- decrypted_path
88
- end
89
-
90
- # Unzip specified file
91
- # @param path_to_file [String] path to zip file
92
- # @param destination [String] folder to extract
93
- # @return [void]
94
- def unzip_file(path_to_file, destination)
95
- Zip.warn_invalid_date = false
96
- Zip::File.open(path_to_file) do |zip_file|
97
- raise LoadError, "There is no files in zip #{path_to_file}" if zip_file.entries.empty?
98
-
99
- zip_file.each do |file|
100
- file_path = File.join(destination, file.name)
101
- FileUtils.mkdir_p(File.dirname(file_path))
102
- zip_file.extract(file, file_path) unless File.exist?(file_path)
103
- end
104
- end
105
- end
106
-
107
- # @return [String] dir to base of file
108
- def dir
109
- "#{OOXMLDocumentObject.path_to_folder}#{File.dirname(OOXMLDocumentObject.xmls_stack.last)}/"
110
- end
111
-
112
- # @return [String] path to current xml file
113
- def current_xml
114
- OOXMLDocumentObject.path_to_folder + OOXMLDocumentObject.xmls_stack.last
115
- end
116
-
117
- # Add file to parsing stack
118
- # @param path [String] path of file to add to stack
119
- # @return [void]
120
- def add_to_xmls_stack(path)
121
- OOXMLDocumentObject.xmls_stack << if path.include?('..')
122
- "#{File.dirname(OOXMLDocumentObject.xmls_stack.last)}/#{path}"
123
- elsif path.start_with?(OOXMLDocumentObject.root_subfolder)
124
- path
125
- else
126
- OOXMLDocumentObject.root_subfolder + path
127
- end
128
- end
129
-
130
- # Get link to file from rels file
131
- # @param id [String] file to get
132
- # @return [String] result
133
- def get_link_from_rels(id)
134
- rels_path = dir + "_rels/#{File.basename(OOXMLDocumentObject.xmls_stack.last)}.rels"
135
- raise LoadError, "Cannot find .rels file by path: #{rels_path}" unless File.exist?(rels_path)
136
-
137
- relationships = Relationships.new.parse_file(rels_path)
138
- relationships.target_by_id(id)
139
- end
140
- end
141
52
  end
142
53
  end
@@ -47,6 +47,8 @@ module OoxmlParser
47
47
  attr_accessor :shade
48
48
  # @return [RunStyle] run style
49
49
  attr_accessor :run_style
50
+ # @return [ValuedChild] ligatures type
51
+ attr_reader :ligatures
50
52
 
51
53
  def initialize(params = {})
52
54
  @font_name = params.fetch(:font_name, '')
@@ -124,6 +126,8 @@ module OoxmlParser
124
126
  @shade = Shade.new(parent: self).parse(node_child)
125
127
  when 'rStyle'
126
128
  @run_style = RunStyle.new(parent: self).parse(node_child)
129
+ when 'ligatures'
130
+ @ligatures = ValuedChild.new(:symbol, parent: self).parse(node_child)
127
131
  end
128
132
  end
129
133
  @font_color = DocxColorScheme.new(parent: self).parse(node)
@@ -14,12 +14,50 @@ module OoxmlParser
14
14
  attr_accessor :default_font_style
15
15
  # @return [ContentTypes] data about content types
16
16
  attr_accessor :content_types
17
+ # @return [String] root sub-folder for object
18
+ attr_reader :root_subfolder
19
+ # @return [String] path to folder with unpacked document
20
+ attr_reader :unpacked_folder
21
+ # @return [Array<String>] list of xmls to parse
22
+ attr_accessor :xmls_stack
17
23
 
18
24
  def initialize(params = {})
19
25
  @default_font_size = params.fetch(:default_font_size, 18)
20
26
  @default_font_typeface = params.fetch(:default_font_typeface, 'Arial')
21
27
  @default_font_style = FontStyle.new
28
+ @unpacked_folder = params.fetch(:unpacked_folder, nil)
29
+ @xmls_stack = []
22
30
  super(parent: nil)
23
31
  end
32
+
33
+ # @return [String] path to current xml file
34
+ def current_xml
35
+ root_object.unpacked_folder + @xmls_stack.last
36
+ end
37
+
38
+ # Add file to parsing stack
39
+ # @param path [String] path of file to add to stack
40
+ # @return [void]
41
+ def add_to_xmls_stack(path)
42
+ @xmls_stack << if path.include?('..')
43
+ "#{File.dirname(@xmls_stack.last)}/#{path}"
44
+ elsif path.start_with?(@root_subfolder)
45
+ path
46
+ else
47
+ @root_subfolder + path
48
+ end
49
+ end
50
+
51
+ # Get link to file from rels file
52
+ # @param id [String] file to get
53
+ # @return [String] result
54
+ def get_link_from_rels(id)
55
+ dir = "#{unpacked_folder}#{File.dirname(@xmls_stack.last)}/"
56
+ rels_path = dir + "_rels/#{File.basename(@xmls_stack.last)}.rels"
57
+ raise LoadError, "Cannot find .rels file by path: #{rels_path}" unless File.exist?(rels_path)
58
+
59
+ relationships = Relationships.new.parse_file(rels_path)
60
+ relationships.target_by_id(id)
61
+ end
24
62
  end
25
63
  end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module OoxmlParser
4
+ # Class for actions with OOXML file
5
+ class OoxmlFile
6
+ # @return [String] path to file
7
+ attr_reader :path
8
+
9
+ def initialize(path)
10
+ @path = path
11
+ end
12
+
13
+ # Copy this file and rename to zip
14
+ # @return [String] path to result zip
15
+ def copy_file_and_rename_to_zip
16
+ file_name = File.basename(@path)
17
+ tmp_folder = Dir.mktmpdir('ruby-ooxml-parser')
18
+ @zip_path = "#{tmp_folder}/#{file_name}"
19
+ FileUtils.rm_rf(tmp_folder) if File.directory?(tmp_folder)
20
+ FileUtils.mkdir_p(tmp_folder)
21
+ raise "Cannot find file by path #{@path}" unless File.exist?(@path)
22
+
23
+ FileUtils.cp path, tmp_folder
24
+ end
25
+
26
+ # @return [String] path to folder with zip
27
+ def path_to_folder
28
+ @zip_path.sub(File.basename(@zip_path), '')
29
+ end
30
+
31
+ # Unzip specified file
32
+ # @return [void]
33
+ def unzip
34
+ Zip.warn_invalid_date = false
35
+ Zip::File.open(@zip_path) do |zip_file|
36
+ raise LoadError, "There is no files in zip #{@zip_path}" if zip_file.entries.empty?
37
+
38
+ zip_file.each do |file|
39
+ file_path = File.join(path_to_folder, file.name)
40
+ FileUtils.mkdir_p(File.dirname(file_path))
41
+ zip_file.extract(file, file_path) unless File.exist?(file_path)
42
+ end
43
+ end
44
+ end
45
+
46
+ # @return [Symbol] file type recognized by folder structure
47
+ def format_by_folders
48
+ return :docx if Dir.exist?("#{path_to_folder}/word")
49
+ return :xlsx if Dir.exist?("#{path_to_folder}/xl")
50
+ return :pptx if Dir.exist?("#{path_to_folder}/ppt")
51
+
52
+ :zip
53
+ end
54
+
55
+ # Decrypt file protected with password
56
+ # @param password [String] password to file
57
+ # @return [OoxmlFile] path to decrypted file
58
+ def decrypt(password)
59
+ file_name = File.basename(@path)
60
+ tmp_folder = Dir.mktmpdir('ruby-ooxml-parser')
61
+ decrypted_path = "#{tmp_folder}/#{file_name}"
62
+ binary_password = password.encode('utf-16le').bytes.pack('c*').encode('binary')
63
+ OoxmlDecrypt::EncryptedFile.decrypt_to_file(@path, binary_password, decrypted_path)
64
+
65
+ OoxmlFile.new(decrypted_path)
66
+ end
67
+ end
68
+ end
@@ -1,52 +1,46 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative 'parser/encryption_checker'
4
+ require_relative 'parser/ooxml_file'
4
5
 
5
6
  module OoxmlParser
6
7
  # Basic class for OoxmlParser
7
8
  class Parser
8
- # Base method to yield parse document of any type
9
- # @param path_to_file [String] file
10
- # @return [CommonDocumentStructure] structure of doc
11
- def self.parse_format(path_to_file)
12
- return nil if EncryptionChecker.new(path_to_file).encrypted?
9
+ class << self
10
+ # Base method to yield parse document of any type
11
+ # @param [OoxmlFile] file with data
12
+ # @return [CommonDocumentStructure] structure of doc
13
+ def parse_format(file)
14
+ return nil if EncryptionChecker.new(file.path).encrypted?
13
15
 
14
- path_to_zip_file = OOXMLDocumentObject.copy_file_and_rename_to_zip(path_to_file)
15
- OOXMLDocumentObject.path_to_folder = path_to_zip_file.sub(File.basename(path_to_zip_file), '')
16
- OOXMLDocumentObject.unzip_file(path_to_zip_file, OOXMLDocumentObject.path_to_folder)
17
- model = yield
18
- model.file_path = path_to_file if model
19
- FileUtils.rm_rf(OOXMLDocumentObject.path_to_folder)
20
- model
21
- end
16
+ file.copy_file_and_rename_to_zip
17
+ file.unzip
18
+ model = yield(file)
19
+ model.file_path = file.path if model
20
+ FileUtils.rm_rf(file.path_to_folder)
21
+ model
22
+ end
22
23
 
23
- # Base method to parse document of any type
24
- # @param path_to_file [String] file
25
- # @return [CommonDocumentStructure] structure of doc
26
- def self.parse(path_to_file, password: nil)
27
- path_to_file = OOXMLDocumentObject.decrypt_file(path_to_file, password) if password
28
- Parser.parse_format(path_to_file) do
29
- format = Parser.recognize_folder_format
30
- case format
31
- when :docx
32
- DocumentStructure.new.parse
33
- when :xlsx
34
- XLSXWorkbook.new.parse
35
- when :pptx
36
- Presentation.new.parse
37
- else
38
- warn "#{path_to_file} is a simple zip file without OOXML content"
24
+ # Base method to parse document of any type
25
+ # @param path_to_file [String] file
26
+ # @return [CommonDocumentStructure] structure of doc
27
+ def parse(path_to_file, password: nil)
28
+ file = OoxmlFile.new(path_to_file)
29
+ file = file.decrypt(password) if password
30
+ Parser.parse_format(file) do |yielded_file|
31
+ format = yielded_file.format_by_folders
32
+ case format
33
+ when :docx
34
+ DocumentStructure.new(unpacked_folder: yielded_file.path_to_folder).parse
35
+ when :xlsx
36
+ XLSXWorkbook.new(unpacked_folder: yielded_file.path_to_folder).parse
37
+ when :pptx
38
+ Presentation.new(unpacked_folder: yielded_file.path_to_folder).parse
39
+ else
40
+ warn "#{path_to_file} is a simple zip file without OOXML content"
41
+ end
39
42
  end
40
43
  end
41
44
  end
42
-
43
- # Recognize folder format
44
- # @param directory [String] path to dirctory
45
- # @return [Symbol] type of document
46
- def self.recognize_folder_format(directory = OOXMLDocumentObject.path_to_folder)
47
- return :docx if Dir.exist?("#{directory}/word")
48
- return :xlsx if Dir.exist?("#{directory}/xl")
49
- return :pptx if Dir.exist?("#{directory}/ppt")
50
- end
51
45
  end
52
46
  end
@@ -9,8 +9,8 @@ module OoxmlParser
9
9
 
10
10
  def initialize(params = {})
11
11
  @comments_array = []
12
- @file = params.fetch(:file, "#{OOXMLDocumentObject.path_to_folder}word/comments.xml")
13
12
  super(parent: params[:parent])
13
+ @file = params.fetch(:file, "#{root_object.unpacked_folder}word/comments.xml")
14
14
  end
15
15
 
16
16
  # @return [Comment] accessor
@@ -17,7 +17,7 @@ module OoxmlParser
17
17
  # Parse CommentsExtended object
18
18
  # @return [CommentsExtended] result of parsing
19
19
  def parse
20
- file_to_parse = "#{OOXMLDocumentObject.path_to_folder}word/commentsExtended.xml"
20
+ file_to_parse = "#{root_object.unpacked_folder}word/commentsExtended.xml"
21
21
  return nil unless File.exist?(file_to_parse)
22
22
 
23
23
  doc = parse_xml(file_to_parse)
@@ -8,7 +8,7 @@ module OoxmlParser
8
8
  # Parse Document properties
9
9
  # @return [DocumentProperties]
10
10
  def parse
11
- properties_file = "#{OOXMLDocumentObject.path_to_folder}docProps/app.xml"
11
+ properties_file = "#{root_object.unpacked_folder}docProps/app.xml"
12
12
  unless File.exist?(properties_file)
13
13
  warn "There is no 'docProps/app.xml' in docx. It may be some problem with it"
14
14
  return self
@@ -9,7 +9,7 @@ module OoxmlParser
9
9
  # Parse Settings object
10
10
  # @return [DocumentSettings] result of parsing
11
11
  def parse
12
- settings_path = "#{OOXMLDocumentObject.path_to_folder}word/settings.xml"
12
+ settings_path = "#{root_object.unpacked_folder}word/settings.xml"
13
13
  return nil unless File.exist?(settings_path)
14
14
 
15
15
  doc = parse_xml(settings_path)
@@ -47,7 +47,7 @@ module OoxmlParser
47
47
  def parse(node)
48
48
  @id = node.attribute('id').value.to_i
49
49
  parse_type(node)
50
- doc = parse_xml(OOXMLDocumentObject.path_to_folder + xml_path)
50
+ doc = parse_xml(root_object.unpacked_folder + xml_path)
51
51
  doc.search(xpath_for_search).each do |footnote|
52
52
  next unless footnote.attribute('id').value.to_i == @id
53
53
 
@@ -38,10 +38,10 @@ module OoxmlParser
38
38
  # Parse Numbering data
39
39
  # @return [Numbering] result of parse
40
40
  def parse
41
- numbering_xml = "#{OOXMLDocumentObject.path_to_folder}word/numbering.xml"
41
+ numbering_xml = "#{root_object.unpacked_folder}word/numbering.xml"
42
42
  return nil unless File.exist?(numbering_xml)
43
43
 
44
- node = parse_xml(File.open(numbering_xml))
44
+ node = parse_xml(numbering_xml)
45
45
  node.xpath('w:numbering/*').each do |numbering_child_node|
46
46
  case numbering_child_node.name
47
47
  when 'abstractNum'
@@ -43,10 +43,10 @@ module OoxmlParser
43
43
  # @param target [String] name of target
44
44
  # @return [String] path to note xml file
45
45
  def file_path(target)
46
- file = "#{OOXMLDocumentObject.path_to_folder}word/#{target}"
46
+ file = "#{root_object.unpacked_folder}word/#{target}"
47
47
  return file if File.exist?(file)
48
48
 
49
- "#{OOXMLDocumentObject.path_to_folder}#{target}" unless File.exist?(file)
49
+ "#{root_object.unpacked_folder}#{target}" unless File.exist?(file)
50
50
  end
51
51
  end
52
52
  end
@@ -63,8 +63,8 @@ module OoxmlParser
63
63
  when 'cols'
64
64
  @columns = Columns.new.parse(pg_size_subnode)
65
65
  when 'headerReference', 'footerReference'
66
- target = OOXMLDocumentObject.get_link_from_rels(pg_size_subnode.attribute('id').value)
67
- OOXMLDocumentObject.add_to_xmls_stack("word/#{target}")
66
+ target = root_object.get_link_from_rels(pg_size_subnode.attribute('id').value)
67
+ root_object.add_to_xmls_stack("word/#{target}")
68
68
  note = Note.parse(default_paragraph: default_paragraph,
69
69
  default_character: default_character,
70
70
  target: target,
@@ -72,7 +72,7 @@ module OoxmlParser
72
72
  type: File.basename(target).sub('.xml', ''),
73
73
  parent: self)
74
74
  @notes << note
75
- OOXMLDocumentObject.xmls_stack.pop
75
+ root_object.xmls_stack.pop
76
76
  when 'footnotePr'
77
77
  @footnote_properties = FootnoteProperties.new(parent: self).parse(pg_size_subnode)
78
78
  end
@@ -17,7 +17,7 @@ module OoxmlParser
17
17
  # Parse styles data
18
18
  # @return [Styles] result of parsing
19
19
  def parse
20
- doc = parse_xml("#{OOXMLDocumentObject.path_to_folder}word/styles.xml")
20
+ doc = parse_xml("#{root_object.unpacked_folder}word/styles.xml")
21
21
  doc.xpath('w:styles/*').each do |node_child|
22
22
  case node_child.name
23
23
  when 'docDefaults'
@@ -44,7 +44,7 @@ module OoxmlParser
44
44
  # @return [CommentsExtended] extended comments
45
45
  attr_accessor :comments_extended
46
46
 
47
- def initialize
47
+ def initialize(params = {})
48
48
  @elements = []
49
49
  @notes = []
50
50
  @document_properties = DocumentProperties.new
@@ -158,17 +158,16 @@ module OoxmlParser
158
158
  # @return [DocumentStructure] parsed structure
159
159
  def parse
160
160
  @content_types = ContentTypes.new(parent: self).parse
161
- OOXMLDocumentObject.root_subfolder = 'word/'
162
- OOXMLDocumentObject.xmls_stack = []
161
+ @root_subfolder = 'word/'
163
162
  @comments = []
164
163
  DocumentStructure.default_paragraph_style = DocxParagraph.new
165
164
  DocumentStructure.default_run_style = DocxParagraphRun.new(parent: self)
166
- @theme = PresentationTheme.parse('word/theme/theme1.xml')
167
- @relationships = Relationships.new(parent: self).parse_file("#{OOXMLDocumentObject.path_to_folder}word/_rels/document.xml.rels")
165
+ @theme = PresentationTheme.new(parent: self).parse('word/theme/theme1.xml')
166
+ @relationships = Relationships.new(parent: self).parse_file("#{root_object.unpacked_folder}word/_rels/document.xml.rels")
168
167
  parse_styles
169
168
  number = 0
170
- OOXMLDocumentObject.add_to_xmls_stack('word/document.xml')
171
- doc = parse_xml(OOXMLDocumentObject.current_xml)
169
+ root_object.add_to_xmls_stack('word/document.xml')
170
+ doc = parse_xml(root_object.current_xml)
172
171
  doc.search('//w:document').each do |document|
173
172
  document.xpath('w:background').each do |background|
174
173
  @background = DocumentBackground.new(parent: self).parse(background)
@@ -201,12 +200,12 @@ module OoxmlParser
201
200
  end
202
201
  end
203
202
  end
204
- OOXMLDocumentObject.xmls_stack.pop
203
+ root_object.xmls_stack.pop
205
204
  @document_properties = DocumentProperties.new(parent: self).parse
206
205
  @comments = Comments.new(parent: self).parse
207
206
  @comments_extended = CommentsExtended.new(parent: self).parse
208
207
  @comments_document = Comments.new(parent: self,
209
- file: "#{OOXMLDocumentObject.path_to_folder}word/#{relationships.target_by_type('commentsDocument').first}")
208
+ file: "#{root_object.unpacked_folder}word/#{relationships.target_by_type('commentsDocument').first}")
210
209
  .parse
211
210
  @settings = DocumentSettings.new(parent: self).parse
212
211
  self
@@ -215,7 +214,7 @@ module OoxmlParser
215
214
  # Parse default style
216
215
  # @return [void]
217
216
  def parse_default_style
218
- doc = parse_xml("#{OOXMLDocumentObject.path_to_folder}word/styles.xml")
217
+ doc = parse_xml("#{root_object.unpacked_folder}word/styles.xml")
219
218
  doc.search('//w:style').each do |style|
220
219
  next if style.attribute('default').nil?
221
220
 
@@ -256,7 +255,7 @@ module OoxmlParser
256
255
 
257
256
  # Perform parsing styles.xml
258
257
  def parse_styles
259
- file = "#{OOXMLDocumentObject.path_to_folder}/word/styles.xml"
258
+ file = "#{root_object.unpacked_folder}/word/styles.xml"
260
259
  DocumentStructure.default_paragraph_style = DocxParagraph.new(parent: self)
261
260
  DocumentStructure.default_table_paragraph_style = DocxParagraph.new(parent: self)
262
261
  DocumentStructure.default_run_style = DocxParagraphRun.new(parent: self)
@@ -10,8 +10,9 @@ module OoxmlParser
10
10
  # @param path_to_file [String] file path
11
11
  # @return [DocumentStructure] result of parse
12
12
  def self.parse_docx(path_to_file)
13
- Parser.parse_format(path_to_file) do
14
- DocumentStructure.new.parse
13
+ file = OoxmlFile.new(path_to_file)
14
+ Parser.parse_format(file) do |yielded_file|
15
+ DocumentStructure.new(unpacked_folder: yielded_file.path_to_folder).parse
15
16
  end
16
17
  end
17
18
  end
@@ -15,10 +15,10 @@ module OoxmlParser
15
15
  # Parse CommentAuthors object
16
16
  # @param file [Nokogiri::XML:Element] node to parse
17
17
  # @return [CommentAuthors] result of parsing
18
- def parse(file = "#{OOXMLDocumentObject.path_to_folder}/#{OOXMLDocumentObject.root_subfolder}/commentAuthors.xml")
18
+ def parse(file = "#{root_object.unpacked_folder}/#{root_object.root_subfolder}/commentAuthors.xml")
19
19
  return nil unless File.exist?(file)
20
20
 
21
- document = parse_xml(File.open(file))
21
+ document = parse_xml(file)
22
22
  node = document.xpath('*').first
23
23
 
24
24
  node.xpath('*').each do |node_child|
@@ -15,10 +15,10 @@ module OoxmlParser
15
15
  # Parse PresentationComments object
16
16
  # @param file [Nokogiri::XML:Element] node to parse
17
17
  # @return [PresentationComments] result of parsing
18
- def parse(file = "#{OOXMLDocumentObject.path_to_folder}/#{OOXMLDocumentObject.root_subfolder}/comments/comment1.xml")
18
+ def parse(file = "#{root_object.unpacked_folder}/#{root_object.root_subfolder}/comments/comment1.xml")
19
19
  return nil unless File.exist?(file)
20
20
 
21
- document = parse_xml(File.open(file))
21
+ document = parse_xml(file)
22
22
  node = document.xpath('*').first
23
23
 
24
24
  node.xpath('*').each do |node_child|
@@ -9,46 +9,46 @@ module OoxmlParser
9
9
  # @return [FontScheme] font scheme
10
10
  attr_accessor :font_scheme
11
11
 
12
- def initialize(name = '', color_scheme = {})
13
- @name = name
14
- @color_scheme = color_scheme
15
- super(parent: nil)
12
+ def initialize(parent: nil)
13
+ @name = ''
14
+ @color_scheme = {}
15
+ super
16
16
  end
17
17
 
18
18
  # Parse PresentationTheme
19
19
  # @param file [String] path to file to parse
20
20
  # @return [PresentationTheme] result of parsing
21
- def self.parse(file)
22
- OOXMLDocumentObject.theme = PresentationTheme.new
23
- OOXMLDocumentObject.add_to_xmls_stack(file)
24
- unless File.exist?(OOXMLDocumentObject.current_xml)
25
- OOXMLDocumentObject.xmls_stack.pop
21
+ def parse(file)
22
+ root_object.add_to_xmls_stack(file)
23
+ unless File.exist?(root_object.current_xml)
24
+ root_object.xmls_stack.pop
26
25
  return
27
26
  end
28
- doc = OOXMLDocumentObject.theme.parse_xml(OOXMLDocumentObject.current_xml)
27
+ doc = parse_xml(root_object.current_xml)
28
+
29
29
  doc.xpath('a:theme').each do |theme_node|
30
- OOXMLDocumentObject.theme.name = theme_node.attribute('name').value if theme_node.attribute('name')
30
+ @name = theme_node.attribute('name').value if theme_node.attribute('name')
31
31
  theme_node.xpath('a:themeElements/*').each do |theme_element_node|
32
32
  case theme_element_node.name
33
33
  when 'clrScheme'
34
34
  theme_element_node.xpath('*').each do |color_scheme_element|
35
- OOXMLDocumentObject.theme.color_scheme[color_scheme_element.name.to_sym] = ThemeColor.new.parse(color_scheme_element)
35
+ @color_scheme[color_scheme_element.name.to_sym] = ThemeColor.new.parse(color_scheme_element)
36
36
  end
37
- OOXMLDocumentObject.theme.color_scheme[:background1] = OOXMLDocumentObject.theme.color_scheme[:lt1]
38
- OOXMLDocumentObject.theme.color_scheme[:background2] = OOXMLDocumentObject.theme.color_scheme[:lt2]
39
- OOXMLDocumentObject.theme.color_scheme[:bg1] = OOXMLDocumentObject.theme.color_scheme[:lt1]
40
- OOXMLDocumentObject.theme.color_scheme[:bg2] = OOXMLDocumentObject.theme.color_scheme[:lt2]
41
- OOXMLDocumentObject.theme.color_scheme[:text1] = OOXMLDocumentObject.theme.color_scheme[:dk1]
42
- OOXMLDocumentObject.theme.color_scheme[:text2] = OOXMLDocumentObject.theme.color_scheme[:dk2]
43
- OOXMLDocumentObject.theme.color_scheme[:tx1] = OOXMLDocumentObject.theme.color_scheme[:dk1]
44
- OOXMLDocumentObject.theme.color_scheme[:tx2] = OOXMLDocumentObject.theme.color_scheme[:dk2]
37
+ @color_scheme[:background1] = @color_scheme[:lt1]
38
+ @color_scheme[:background2] = @color_scheme[:lt2]
39
+ @color_scheme[:bg1] = @color_scheme[:lt1]
40
+ @color_scheme[:bg2] = @color_scheme[:lt2]
41
+ @color_scheme[:text1] = @color_scheme[:dk1]
42
+ @color_scheme[:text2] = @color_scheme[:dk2]
43
+ @color_scheme[:tx1] = @color_scheme[:dk1]
44
+ @color_scheme[:tx2] = @color_scheme[:dk2]
45
45
  when 'fontScheme'
46
- OOXMLDocumentObject.theme.font_scheme = FontScheme.new(parent: self).parse(theme_element_node)
46
+ @font_scheme = FontScheme.new(parent: self).parse(theme_element_node)
47
47
  end
48
48
  end
49
49
  end
50
- OOXMLDocumentObject.xmls_stack.pop
51
- OOXMLDocumentObject.theme
50
+ root_object.xmls_stack.pop
51
+ self
52
52
  end
53
53
  end
54
54
  end
@@ -30,9 +30,9 @@ module OoxmlParser
30
30
  when 'tbl'
31
31
  graphic_data << Table.new(parent: self).parse(graphic_node_child)
32
32
  when 'chart'
33
- OOXMLDocumentObject.add_to_xmls_stack(OOXMLDocumentObject.get_link_from_rels(graphic_node_child.attribute('id').value))
33
+ root_object.add_to_xmls_stack(root_object.get_link_from_rels(graphic_node_child.attribute('id').value))
34
34
  graphic_data << Chart.new(parent: self).parse
35
- OOXMLDocumentObject.xmls_stack.pop
35
+ root_object.xmls_stack.pop
36
36
  when 'oleObj'
37
37
  graphic_data << OleObject.new(parent: self).parse(graphic_node_child)
38
38
  end
@@ -51,9 +51,9 @@ module OoxmlParser
51
51
  # Parse Slide object
52
52
  # @return [Slide] result of parsing
53
53
  def parse
54
- OOXMLDocumentObject.add_to_xmls_stack(@xml_path)
54
+ root_object.add_to_xmls_stack(@xml_path)
55
55
  @name = File.basename(@xml_path, '.*')
56
- node = parse_xml(OOXMLDocumentObject.current_xml)
56
+ node = parse_xml(root_object.current_xml)
57
57
  node.xpath('//p:sld/*').each do |node_child|
58
58
  case node_child.name
59
59
  when 'cSld'
@@ -66,8 +66,8 @@ module OoxmlParser
66
66
  @alternate_content = PresentationAlternateContent.new(parent: self).parse(node_child)
67
67
  end
68
68
  end
69
- OOXMLDocumentObject.xmls_stack.pop
70
- @relationships = Relationships.new(parent: self).parse_file("#{OOXMLDocumentObject.path_to_folder}#{File.dirname(@xml_path)}/_rels/#{@name}.xml.rels")
69
+ root_object.xmls_stack.pop
70
+ @relationships = Relationships.new(parent: self).parse_file("#{root_object.unpacked_folder}#{File.dirname(@xml_path)}/_rels/#{@name}.xml.rels")
71
71
  parse_note
72
72
  self
73
73
  end
@@ -79,7 +79,7 @@ module OoxmlParser
79
79
  notes_target = @relationships.target_by_type('notes')
80
80
  return nil if notes_target.empty?
81
81
 
82
- @note = PresentationNotes.new(parent: self).parse("#{OOXMLDocumentObject.path_to_folder}#{File.dirname(@xml_path)}/#{notes_target.first}")
82
+ @note = PresentationNotes.new(parent: self).parse("#{root_object.unpacked_folder}#{File.dirname(@xml_path)}/#{notes_target.first}")
83
83
  end
84
84
  end
85
85
  end
@@ -10,7 +10,7 @@ module OoxmlParser
10
10
  # @param file [String] path to file to parse
11
11
  # @return [SlideLayoutFile] result of parsing
12
12
  def parse(file)
13
- OOXMLDocumentObject.add_to_xmls_stack(file.gsub(OOXMLDocumentObject.path_to_folder, ''))
13
+ root_object.add_to_xmls_stack(file.gsub(root_object.unpacked_folder, ''))
14
14
  doc = parse_xml(file)
15
15
  doc.xpath('p:sldLayout/*').each do |node_child|
16
16
  case node_child.name
@@ -18,7 +18,7 @@ module OoxmlParser
18
18
  @common_slide_data = CommonSlideData.new(parent: self).parse(node_child)
19
19
  end
20
20
  end
21
- OOXMLDocumentObject.xmls_stack.pop
21
+ root_object.xmls_stack.pop
22
22
  self
23
23
  end
24
24
  end
@@ -5,7 +5,7 @@ module OoxmlParser
5
5
  module SlideLayoutsHelper
6
6
  # @return [Array<String>] list of slide layouts files
7
7
  def slide_layouts_files
8
- Dir["#{OOXMLDocumentObject.path_to_folder}ppt/slideLayouts/*.xml"]
8
+ Dir["#{root_object.unpacked_folder}ppt/slideLayouts/*.xml"]
9
9
  end
10
10
 
11
11
  private
@@ -10,7 +10,7 @@ module OoxmlParser
10
10
  # @param file [String] path to file to parse
11
11
  # @return [SlideMasterFile] result of parsing
12
12
  def parse(file)
13
- OOXMLDocumentObject.add_to_xmls_stack(file.gsub(OOXMLDocumentObject.path_to_folder, ''))
13
+ root_object.add_to_xmls_stack(file.gsub(root_object.unpacked_folder, ''))
14
14
  doc = parse_xml(file)
15
15
  doc.xpath('p:sldMaster/*').each do |node_child|
16
16
  case node_child.name
@@ -18,7 +18,7 @@ module OoxmlParser
18
18
  @common_slide_data = CommonSlideData.new(parent: self).parse(node_child)
19
19
  end
20
20
  end
21
- OOXMLDocumentObject.xmls_stack.pop
21
+ root_object.xmls_stack.pop
22
22
  self
23
23
  end
24
24
  end
@@ -5,7 +5,7 @@ module OoxmlParser
5
5
  module SlideMastersHelper
6
6
  # @return [Array<String>] list of slide masters files
7
7
  def slide_masters_files
8
- Dir["#{OOXMLDocumentObject.path_to_folder}ppt/slideMasters/*.xml"]
8
+ Dir["#{root_object.unpacked_folder}ppt/slideMasters/*.xml"]
9
9
  end
10
10
 
11
11
  private
@@ -14,7 +14,7 @@ module OoxmlParser
14
14
  # Parse TableStyles object
15
15
  # @param file [Nokogiri::XML:Element] node to parse
16
16
  # @return [TableStyles] result of parsing
17
- def parse(file = "#{OOXMLDocumentObject.path_to_folder}/#{OOXMLDocumentObject.root_subfolder}/tableStyles.xml")
17
+ def parse(file = "#{root_object.unpacked_folder}/#{root_object.root_subfolder}/tableStyles.xml")
18
18
  return nil unless File.exist?(file)
19
19
 
20
20
  document = parse_xml(file)
@@ -43,11 +43,10 @@ module OoxmlParser
43
43
  # @return [Presentation] parsed presentation
44
44
  def parse
45
45
  @content_types = ContentTypes.new(parent: self).parse
46
- OOXMLDocumentObject.root_subfolder = 'ppt/'
47
- OOXMLDocumentObject.xmls_stack = []
48
- OOXMLDocumentObject.add_to_xmls_stack('ppt/presentation.xml')
49
- doc = parse_xml(OOXMLDocumentObject.current_xml)
50
- @theme = PresentationTheme.parse('ppt/theme/theme1.xml')
46
+ @root_subfolder = 'ppt/'
47
+ root_object.add_to_xmls_stack('ppt/presentation.xml')
48
+ doc = parse_xml(root_object.current_xml)
49
+ @theme = PresentationTheme.new(parent: self).parse('ppt/theme/theme1.xml')
51
50
  @table_styles = TableStyles.new(parent: self).parse
52
51
  @comment_authors = CommentAuthors.new(parent: self).parse
53
52
  @comments = PresentationComments.new(parent: self).parse
@@ -60,13 +59,13 @@ module OoxmlParser
60
59
  presentation_node_child.xpath('p:sldId').each do |silde_id_node|
61
60
  slide_id = silde_id_node.attr('r:id')
62
61
  @slides << Slide.new(parent: self,
63
- xml_path: "#{OOXMLDocumentObject.root_subfolder}/#{OOXMLDocumentObject.get_link_from_rels(slide_id)}")
62
+ xml_path: "#{root_object.root_subfolder}/#{root_object.get_link_from_rels(slide_id)}")
64
63
  .parse
65
64
  end
66
65
  end
67
66
  end
68
- OOXMLDocumentObject.xmls_stack.pop
69
- @relationships = Relationships.new(parent: self).parse_file("#{OOXMLDocumentObject.path_to_folder}/ppt/_rels/presentation.xml.rels")
67
+ root_object.xmls_stack.pop
68
+ @relationships = Relationships.new(parent: self).parse_file("#{root_object.unpacked_folder}/ppt/_rels/presentation.xml.rels")
70
69
  parse_slide_layouts
71
70
  parse_slide_masters
72
71
  self
@@ -9,8 +9,9 @@ module OoxmlParser
9
9
  # @param path_to_file [String] file path
10
10
  # @return [Presentation] result of parse
11
11
  def self.parse_pptx(path_to_file)
12
- Parser.parse_format(path_to_file) do
13
- Presentation.new.parse
12
+ file = OoxmlFile.new(path_to_file)
13
+ Parser.parse_format(file) do |yielded_file|
14
+ Presentation.new(unpacked_folder: yielded_file.path_to_folder).parse
14
15
  end
15
16
  end
16
17
  end
@@ -4,6 +4,6 @@ module OoxmlParser
4
4
  # This module holds the RuboCop version information.
5
5
  module Version
6
6
  # [String] Version of Gem
7
- STRING = '0.29.0'
7
+ STRING = '0.31.0'
8
8
  end
9
9
  end
@@ -15,8 +15,8 @@ module OoxmlParser
15
15
  # @param file [String] file to parse
16
16
  # @return [Chartsheet] result of parsing
17
17
  def parse(file)
18
- OOXMLDocumentObject.add_to_xmls_stack(OOXMLDocumentObject.root_subfolder + file)
19
- doc = parse_xml(OOXMLDocumentObject.current_xml)
18
+ root_object.add_to_xmls_stack(root_object.root_subfolder + file)
19
+ doc = parse_xml(root_object.current_xml)
20
20
  node = doc.xpath('//xmlns:chartsheet').first
21
21
  node.xpath('*').each do |node_child|
22
22
  case node_child.name
@@ -26,7 +26,7 @@ module OoxmlParser
26
26
  end
27
27
  end
28
28
  end
29
- OOXMLDocumentObject.xmls_stack.pop
29
+ root_object.xmls_stack.pop
30
30
  self
31
31
  end
32
32
  end
@@ -33,7 +33,7 @@ module OoxmlParser
33
33
  # @return [PivotCacheDefinition] pivot cache definition for current pivot cache
34
34
  def parse_pivot_cache_definition
35
35
  definition_file = root_object.relationships.target_by_id(id)
36
- full_file_path = "#{OOXMLDocumentObject.path_to_folder}/xl/#{definition_file}"
36
+ full_file_path = "#{root_object.unpacked_folder}/xl/#{definition_file}"
37
37
  @pivot_cache_definition = PivotCacheDefinition.new(parent: root_object)
38
38
  .parse(full_file_path)
39
39
  end
@@ -55,7 +55,7 @@ module OoxmlParser
55
55
  # @param [String] file path
56
56
  # @return [PivotTableDefinition] result of parsing
57
57
  def parse(file)
58
- doc = Nokogiri::XML.parse(File.open("#{OOXMLDocumentObject.path_to_folder}/#{file}"))
58
+ doc = parse_xml("#{root_object.unpacked_folder}/#{file}")
59
59
  node = doc.xpath('//xmlns:pivotTableDefinition').first
60
60
  node.attributes.each do |key, value|
61
61
  case key
@@ -32,7 +32,7 @@ module OoxmlParser
32
32
  # Parse StyleSheet object
33
33
  # @return [StyleSheet] result of parsing
34
34
  def parse
35
- doc = parse_xml("#{OOXMLDocumentObject.path_to_folder}/#{OOXMLDocumentObject.root_subfolder}/styles.xml")
35
+ doc = parse_xml("#{root_object.unpacked_folder}/#{root_object.root_subfolder}/styles.xml")
36
36
  doc.root.xpath('*').each do |node_child|
37
37
  case node_child.name
38
38
  when 'numFmts'
@@ -19,8 +19,8 @@ module OoxmlParser
19
19
  # @param node [Nokogiri::XML:Element] node to parse
20
20
  # @return [TablePart] result of parsing
21
21
  def parse(node)
22
- link_to_table_part_xml = OOXMLDocumentObject.get_link_from_rels(node.attribute('id').value)
23
- doc = parse_xml(OOXMLDocumentObject.path_to_folder + link_to_table_part_xml.gsub('..', 'xl'))
22
+ link_to_table_part_xml = root_object.get_link_from_rels(node.attribute('id').value)
23
+ doc = parse_xml(root_object.unpacked_folder + link_to_table_part_xml.gsub('..', 'xl'))
24
24
  table_node = doc.xpath('xmlns:table').first
25
25
  table_node.attributes.each do |key, value|
26
26
  case key
@@ -61,9 +61,9 @@ module OoxmlParser
61
61
  # Perform parsing of relationships
62
62
  # @return [nil]
63
63
  def parse_relationships
64
- OOXMLDocumentObject.add_to_xmls_stack("#{OOXMLDocumentObject.root_subfolder}/worksheets/_rels/#{@xml_name}.rels")
65
- @relationships = Relationships.new(parent: self).parse_file(OOXMLDocumentObject.current_xml) if File.exist?(OOXMLDocumentObject.current_xml)
66
- OOXMLDocumentObject.xmls_stack.pop
64
+ root_object.add_to_xmls_stack("#{root_object.root_subfolder}/worksheets/_rels/#{@xml_name}.rels")
65
+ @relationships = Relationships.new(parent: self).parse_file(root_object.current_xml) if File.exist?(root_object.current_xml)
66
+ root_object.xmls_stack.pop
67
67
  end
68
68
 
69
69
  # @return [True, false] if structure contain any user data
@@ -79,7 +79,7 @@ module OoxmlParser
79
79
 
80
80
  # Parse list of drawings in file
81
81
  def parse_drawing
82
- drawing_node = parse_xml(OOXMLDocumentObject.current_xml)
82
+ drawing_node = parse_xml(root_object.current_xml)
83
83
  drawing_node.xpath('xdr:wsDr/*').each do |drawing_node_child|
84
84
  @drawings << XlsxDrawing.new(parent: self).parse(drawing_node_child)
85
85
  end
@@ -91,8 +91,8 @@ module OoxmlParser
91
91
  def parse(path_to_xml_file)
92
92
  @xml_name = File.basename path_to_xml_file
93
93
  parse_relationships
94
- OOXMLDocumentObject.add_to_xmls_stack("#{OOXMLDocumentObject.root_subfolder}/worksheets/#{File.basename(path_to_xml_file)}")
95
- doc = parse_xml(OOXMLDocumentObject.current_xml)
94
+ root_object.add_to_xmls_stack("#{root_object.root_subfolder}/worksheets/#{File.basename(path_to_xml_file)}")
95
+ doc = parse_xml(root_object.current_xml)
96
96
  sheet = doc.search('//xmlns:worksheet').first
97
97
  sheet.xpath('*').each do |worksheet_node_child|
98
98
  case worksheet_node_child.name
@@ -107,11 +107,11 @@ module OoxmlParser
107
107
  @merge << merge_node.attribute('ref').value.to_s
108
108
  end
109
109
  when 'drawing'
110
- path_to_drawing = OOXMLDocumentObject.get_link_from_rels(worksheet_node_child.attribute('id').value)
110
+ path_to_drawing = root_object.get_link_from_rels(worksheet_node_child.attribute('id').value)
111
111
  unless path_to_drawing.nil?
112
- OOXMLDocumentObject.add_to_xmls_stack(path_to_drawing)
112
+ root_object.add_to_xmls_stack(path_to_drawing)
113
113
  parse_drawing
114
- OOXMLDocumentObject.xmls_stack.pop
114
+ root_object.xmls_stack.pop
115
115
  end
116
116
  when 'hyperlinks'
117
117
  worksheet_node_child.xpath('xmlns:hyperlink').each do |hyperlink_node|
@@ -150,7 +150,7 @@ module OoxmlParser
150
150
  end
151
151
  end
152
152
  parse_comments
153
- OOXMLDocumentObject.xmls_stack.pop
153
+ root_object.xmls_stack.pop
154
154
  self
155
155
  end
156
156
 
@@ -175,7 +175,7 @@ module OoxmlParser
175
175
  comments_target = relationships.target_by_type('comment')
176
176
  return if comments_target.empty?
177
177
 
178
- comments_file = "#{OOXMLDocumentObject.path_to_folder}/#{OOXMLDocumentObject.root_subfolder}/#{comments_target.first.gsub('..', '')}"
178
+ comments_file = "#{root_object.unpacked_folder}/#{root_object.root_subfolder}/#{comments_target.first.gsub('..', '')}"
179
179
  @comments = ExcelComments.new(parent: self).parse(comments_file)
180
180
  end
181
181
  end
@@ -109,7 +109,7 @@ module OoxmlParser
109
109
  shared_strings_target = relationships.target_by_type('sharedString')
110
110
  return if shared_strings_target.empty?
111
111
 
112
- shared_string_file = "#{OOXMLDocumentObject.path_to_folder}/xl/#{shared_strings_target.first}"
112
+ shared_string_file = "#{root_object.unpacked_folder}/xl/#{shared_strings_target.first}"
113
113
  @shared_strings_table = SharedStringTable.new(parent: self).parse(shared_string_file)
114
114
  end
115
115
 
@@ -117,13 +117,12 @@ module OoxmlParser
117
117
  # @return [XLSXWorkbook]
118
118
  def parse
119
119
  @content_types = ContentTypes.new(parent: self).parse
120
- @relationships = Relationships.new(parent: self).parse_file("#{OOXMLDocumentObject.path_to_folder}xl/_rels/workbook.xml.rels")
120
+ @relationships = Relationships.new(parent: self).parse_file("#{root_object.unpacked_folder}xl/_rels/workbook.xml.rels")
121
121
  parse_shared_strings
122
- OOXMLDocumentObject.xmls_stack = []
123
- OOXMLDocumentObject.root_subfolder = 'xl/'
124
- OOXMLDocumentObject.add_to_xmls_stack('xl/workbook.xml')
125
- @doc = Nokogiri::XML.parse(File.open(OOXMLDocumentObject.current_xml))
126
- @theme = PresentationTheme.parse("xl/#{link_to_theme_xml}") if link_to_theme_xml
122
+ @root_subfolder = 'xl/'
123
+ root_object.add_to_xmls_stack('xl/workbook.xml')
124
+ @doc = parse_xml(root_object.current_xml)
125
+ @theme = PresentationTheme.new(parent: self).parse("xl/#{link_to_theme_xml}") if link_to_theme_xml
127
126
  @style_sheet = StyleSheet.new(parent: self).parse
128
127
  @doc.xpath('xmlns:workbook/xmlns:sheets/xmlns:sheet').each do |sheet|
129
128
  @sheets << Sheet.new(parent: self).parse(sheet)
@@ -139,7 +138,7 @@ module OoxmlParser
139
138
  parse_pivot_table
140
139
  parse_defined_names
141
140
  parse_workbook_protection
142
- OOXMLDocumentObject.xmls_stack.pop
141
+ root_object.xmls_stack.pop
143
142
  self
144
143
  end
145
144
 
@@ -9,8 +9,9 @@ module OoxmlParser
9
9
  # @param path_to_file [String] file path
10
10
  # @return [XLSXWorkbook] result of parse
11
11
  def self.parse_xlsx(path_to_file)
12
- Parser.parse_format(path_to_file) do
13
- XLSXWorkbook.new.parse
12
+ file = OoxmlFile.new(path_to_file)
13
+ Parser.parse_format(file) do |yielded_file|
14
+ XLSXWorkbook.new(unpacked_folder: yielded_file.path_to_folder).parse
14
15
  end
15
16
  end
16
17
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ooxml_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.29.0
4
+ version: 0.31.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - ONLYOFFICE
@@ -10,7 +10,7 @@ authors:
10
10
  autorequire:
11
11
  bindir: bin
12
12
  cert_chain: []
13
- date: 2022-09-05 00:00:00.000000000 Z
13
+ date: 2022-09-27 00:00:00.000000000 Z
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
16
16
  name: nokogiri
@@ -363,6 +363,7 @@ files:
363
363
  - lib/ooxml_parser/common_parser/common_document_structure.rb
364
364
  - lib/ooxml_parser/common_parser/parser.rb
365
365
  - lib/ooxml_parser/common_parser/parser/encryption_checker.rb
366
+ - lib/ooxml_parser/common_parser/parser/ooxml_file.rb
366
367
  - lib/ooxml_parser/configuration.rb
367
368
  - lib/ooxml_parser/docx_parser.rb
368
369
  - lib/ooxml_parser/docx_parser/document_structure.rb
@@ -607,7 +608,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
607
608
  - !ruby/object:Gem::Version
608
609
  version: '0'
609
610
  requirements: []
610
- rubygems_version: 3.3.21
611
+ rubygems_version: 3.3.22
611
612
  signing_key:
612
613
  specification_version: 4
613
614
  summary: OoxmlParser Gem