ooxml_parser 0.28.0 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ddd7d84f5f19a2b12fad76d455ebc614f9eb1b7a5c6f96f3585e649dc2569c4a
4
- data.tar.gz: ca87a76367cb58ed7d02c7be6bcdb2ffe1e6635cccccff5a5ca33f44e44ed60b
3
+ metadata.gz: ae537423c384b461a5c2eb77bfe60b692ab334e1bd6ae22cc928e28230302997
4
+ data.tar.gz: c8696409b7862346b631367d7fa014ceea48e18deb3a6b9832fbb4dcc57dab13
5
5
  SHA512:
6
- metadata.gz: 8e85e11b6a5faf18784cc9f3e9b216a1aad962aaf2be54f6dc35d1e3c8fbb73e9b4ed86270e796f01260fcdf2404729c2a36b5b9d133020916f142e12f32b903
7
- data.tar.gz: 5e3c2129a515ce309a33e5f8c75847cde732a6ce4108f36bb8c3222d3b02f48b42bef3faa9eecaafc063f691e58d39e020f1eda3e44767b9452813ab8d3ca087
6
+ metadata.gz: 9b3655be06b3aa59bdb700ba359f0dffd6bbe54840978665870673edb2e7249c179807b102cc7b49a503c638b0508a3cb46f0183afa4be7a8b8baff93b3dbab6
7
+ data.tar.gz: 36b999f281320ca0792e6504060eb65ba0f97005ed82b131dea276f6b136e6a0cca0281a3ab4a0c67ce213fe31e26263a1603bc7cf4ac10a847e264e64ccd512
@@ -36,7 +36,7 @@ module OoxmlParser
36
36
  full_path_to_file = OOXMLDocumentObject.path_to_folder + OOXMLDocumentObject.root_subfolder + @path.gsub('..', '')
37
37
  if File.exist?(full_path_to_file)
38
38
  @content = if File.extname(@path) == '.xlsx'
39
- OoxmlParser::Parser.parse(full_path_to_file)
39
+ parse_ole_xlsx(full_path_to_file)
40
40
  else
41
41
  File.binread(full_path_to_file)
42
42
  end
@@ -45,5 +45,21 @@ module OoxmlParser
45
45
  end
46
46
  self
47
47
  end
48
+
49
+ private
50
+
51
+ # Parse ole xlsx file
52
+ # @param [String] full_path to file
53
+ # @return [XLSXWorkbook]
54
+ def parse_ole_xlsx(full_path)
55
+ # TODO: Fix this ugly hack with global vars
56
+ # by replacing all global variables
57
+ stack = OOXMLDocumentObject.xmls_stack
58
+ dir = OOXMLDocumentObject.path_to_folder
59
+ result = OoxmlParser::Parser.parse(full_path)
60
+ OOXMLDocumentObject.xmls_stack = stack
61
+ OOXMLDocumentObject.path_to_folder = dir
62
+ result
63
+ end
48
64
  end
49
65
  end
@@ -29,7 +29,7 @@ module OoxmlParser
29
29
  format = Parser.recognize_folder_format
30
30
  case format
31
31
  when :docx
32
- DocumentStructure.parse
32
+ DocumentStructure.new.parse
33
33
  when :xlsx
34
34
  XLSXWorkbook.new.parse
35
35
  when :pptx
@@ -21,8 +21,12 @@ module OoxmlParser
21
21
  attr_accessor :next_style
22
22
  # @return [DocxParagraphRun] run properties
23
23
  attr_accessor :run_properties
24
+ # @return [Nokogiri::XML:Node] run properties node
25
+ attr_accessor :run_properties_node
24
26
  # @return [DocxParagraph] run properties
25
27
  attr_accessor :paragraph_properties
28
+ # @return [Nokogiri::XML:Node] paragraph properties node
29
+ attr_accessor :paragraph_properties_node
26
30
  # @return [TableProperties] properties of table
27
31
  attr_accessor :table_properties
28
32
  # @return [Array, TableStyleProperties] list of table style properties
@@ -76,9 +80,11 @@ module OoxmlParser
76
80
  when 'next'
77
81
  @next_style = subnode.attribute('val').value
78
82
  when 'rPr'
79
- @run_properties = DocxParagraphRun.new(parent: self).parse_properties(subnode)
83
+ @run_properties_node = subnode
84
+ @run_properties = DocxParagraphRun.new(parent: self).parse_properties(@run_properties_node)
80
85
  when 'pPr'
81
- @paragraph_properties = ParagraphProperties.new(parent: self).parse(subnode)
86
+ @paragraph_properties_node = subnode
87
+ @paragraph_properties = ParagraphProperties.new(parent: self).parse(@paragraph_properties_node)
82
88
  when 'tblPr'
83
89
  @table_properties = TableProperties.new(parent: self).parse(subnode)
84
90
  when 'trPr'
@@ -12,7 +12,6 @@ require_relative 'docx_paragraph/inserted'
12
12
  require_relative 'docx_paragraph/structured_document_tag'
13
13
  require_relative 'docx_paragraph/frame_properties'
14
14
  require_relative 'docx_paragraph/docx_formula'
15
- require_relative 'docx_paragraph/style_parametres'
16
15
  module OoxmlParser
17
16
  # Class for data of DocxParagraph
18
17
  class DocxParagraph < OOXMLDocumentObject
@@ -202,7 +201,8 @@ module OoxmlParser
202
201
  when 'contextualSpacing'
203
202
  @contextual_spacing = true
204
203
  when 'pStyle'
205
- parse_paragraph_style_xml(node_child.attribute('val').value, default_char_style)
204
+ @paragraph_style_ref = ParagraphStyleRef.new(parent: self).parse(node_child)
205
+ fill_style_data(default_char_style)
206
206
  when 'ind'
207
207
  @ind = DocumentStructure.default_paragraph_style.ind.dup.parse(node_child)
208
208
  when 'numPr'
@@ -229,24 +229,13 @@ module OoxmlParser
229
229
  self
230
230
  end
231
231
 
232
- # Parse style xml
233
- # @param id [String] id of style to parse
232
+ # Fill data from styles
234
233
  # @param character_style [DocxParagraphRun] style to parse
235
234
  # @return [void]
236
- def parse_paragraph_style_xml(id, character_style)
237
- doc = parse_xml("#{OOXMLDocumentObject.path_to_folder}word/styles.xml")
238
- doc.search('//w:style').each do |style|
239
- next unless style.attribute('styleId').value == id
240
-
241
- style.xpath('w:pPr').each do |p_pr|
242
- parse_paragraph_style(p_pr, character_style)
243
- @style = StyleParametres.new(parent: self).parse(style)
244
- end
245
- style.xpath('w:rPr').each do |r_pr|
246
- character_style.parse_properties(r_pr)
247
- end
248
- break
249
- end
235
+ def fill_style_data(character_style)
236
+ @style = root_object.document_style_by_id(@paragraph_style_ref.value)
237
+ parse_paragraph_style(@style.paragraph_properties_node, character_style) if @style.paragraph_properties_node
238
+ character_style.parse_properties(@style.run_properties_node) if @style.run_properties_node
250
239
  end
251
240
 
252
241
  extend Gem::Deprecate
@@ -156,61 +156,60 @@ module OoxmlParser
156
156
 
157
157
  # Parse docx file
158
158
  # @return [DocumentStructure] parsed structure
159
- def self.parse
160
- doc_structure = DocumentStructure.new
161
- doc_structure.content_types = ContentTypes.new(parent: doc_structure).parse
159
+ def parse
160
+ @content_types = ContentTypes.new(parent: self).parse
162
161
  OOXMLDocumentObject.root_subfolder = 'word/'
163
162
  OOXMLDocumentObject.xmls_stack = []
164
163
  @comments = []
165
164
  DocumentStructure.default_paragraph_style = DocxParagraph.new
166
- DocumentStructure.default_run_style = DocxParagraphRun.new(parent: doc_structure)
167
- doc_structure.theme = PresentationTheme.parse('word/theme/theme1.xml')
168
- doc_structure.relationships = Relationships.new(parent: self).parse_file("#{OOXMLDocumentObject.path_to_folder}word/_rels/document.xml.rels")
169
- doc_structure.parse_styles
165
+ DocumentStructure.default_run_style = DocxParagraphRun.new(parent: self)
166
+ @theme = PresentationTheme.parse('word/theme/theme1.xml')
167
+ @relationships = Relationships.new(parent: self).parse_file("#{OOXMLDocumentObject.path_to_folder}word/_rels/document.xml.rels")
168
+ parse_styles
170
169
  number = 0
171
170
  OOXMLDocumentObject.add_to_xmls_stack('word/document.xml')
172
- doc = doc_structure.parse_xml(OOXMLDocumentObject.current_xml)
171
+ doc = parse_xml(OOXMLDocumentObject.current_xml)
173
172
  doc.search('//w:document').each do |document|
174
173
  document.xpath('w:background').each do |background|
175
- doc_structure.background = DocumentBackground.new(parent: doc_structure).parse(background)
174
+ @background = DocumentBackground.new(parent: self).parse(background)
176
175
  end
177
176
  document.xpath('w:body').each do |body|
178
177
  body.xpath('*').each do |element|
179
178
  case element.name
180
179
  when 'p'
181
180
  child = element.child
182
- unless child.nil? && doc_structure.elements.last.instance_of?(Table)
183
- paragraph_style = DocumentStructure.default_paragraph_style.dup.parse(element, number, DocumentStructure.default_run_style, parent: doc_structure)
181
+ unless child.nil? && @elements.last.instance_of?(Table)
182
+ paragraph_style = DocumentStructure.default_paragraph_style.dup.parse(element, number, DocumentStructure.default_run_style, parent: self)
184
183
  number += 1
185
- doc_structure.elements << paragraph_style.dup
184
+ @elements << paragraph_style.dup
186
185
  end
187
186
  when 'tbl'
188
- table = Table.new(parent: doc_structure).parse(element,
189
- number,
190
- TableProperties.new)
187
+ table = Table.new(parent: self).parse(element,
188
+ number,
189
+ TableProperties.new)
191
190
  number += 1
192
- doc_structure.elements << table
191
+ @elements << table
193
192
  when 'sdt'
194
- doc_structure.elements << StructuredDocumentTag.new(parent: doc_structure).parse(element)
193
+ @elements << StructuredDocumentTag.new(parent: self).parse(element)
195
194
  end
196
195
  end
197
196
  body.xpath('w:sectPr').each do |sect_pr|
198
- doc_structure.page_properties = PageProperties.new(parent: doc_structure).parse(sect_pr,
199
- DocumentStructure.default_paragraph_style,
200
- DocumentStructure.default_run_style)
201
- doc_structure.notes = doc_structure.page_properties.notes # keep copy of notes to compatibility with previous docx models
197
+ @page_properties = PageProperties.new(parent: self).parse(sect_pr,
198
+ DocumentStructure.default_paragraph_style,
199
+ DocumentStructure.default_run_style)
200
+ @notes = page_properties.notes # keep copy of notes to compatibility with previous docx models
202
201
  end
203
202
  end
204
203
  end
205
204
  OOXMLDocumentObject.xmls_stack.pop
206
- doc_structure.document_properties = DocumentProperties.new(parent: doc_structure).parse
207
- doc_structure.comments = Comments.new(parent: doc_structure).parse
208
- doc_structure.comments_extended = CommentsExtended.new(parent: doc_structure).parse
209
- doc_structure.comments_document = Comments.new(parent: doc_structure,
210
- file: "#{OOXMLDocumentObject.path_to_folder}word/#{doc_structure.relationships.target_by_type('commentsDocument').first}")
211
- .parse
212
- doc_structure.settings = DocumentSettings.new(parent: doc_structure).parse
213
- doc_structure
205
+ @document_properties = DocumentProperties.new(parent: self).parse
206
+ @comments = Comments.new(parent: self).parse
207
+ @comments_extended = CommentsExtended.new(parent: self).parse
208
+ @comments_document = Comments.new(parent: self,
209
+ file: "#{OOXMLDocumentObject.path_to_folder}word/#{relationships.target_by_type('commentsDocument').first}")
210
+ .parse
211
+ @settings = DocumentSettings.new(parent: self).parse
212
+ self
214
213
  end
215
214
 
216
215
  # Parse default style
@@ -11,7 +11,7 @@ module OoxmlParser
11
11
  # @return [DocumentStructure] result of parse
12
12
  def self.parse_docx(path_to_file)
13
13
  Parser.parse_format(path_to_file) do
14
- DocumentStructure.parse
14
+ DocumentStructure.new.parse
15
15
  end
16
16
  end
17
17
  end
@@ -4,6 +4,6 @@ module OoxmlParser
4
4
  # This module holds the RuboCop version information.
5
5
  module Version
6
6
  # [String] Version of Gem
7
- STRING = '0.28.0'
7
+ STRING = '0.29.0'
8
8
  end
9
9
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ooxml_parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.28.0
4
+ version: 0.29.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - ONLYOFFICE
@@ -423,7 +423,6 @@ files:
423
423
  - lib/ooxml_parser/docx_parser/document_structure/docx_paragraph/sdt/sdt_content.rb
424
424
  - lib/ooxml_parser/docx_parser/document_structure/docx_paragraph/sdt/sdt_properties.rb
425
425
  - lib/ooxml_parser/docx_parser/document_structure/docx_paragraph/structured_document_tag.rb
426
- - lib/ooxml_parser/docx_parser/document_structure/docx_paragraph/style_parametres.rb
427
426
  - lib/ooxml_parser/docx_parser/document_structure/header_footer.rb
428
427
  - lib/ooxml_parser/docx_parser/document_structure/numbering.rb
429
428
  - lib/ooxml_parser/docx_parser/document_structure/numbering/abstract_numbering.rb
@@ -1,30 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module OoxmlParser
4
- # Style Parameter Data
5
- class StyleParametres < OOXMLDocumentObject
6
- attr_accessor :q_format, :hidden, :name
7
-
8
- def initialize(params = {})
9
- @name = params[:name]
10
- @q_format = params.fetch(:q_format, false)
11
- @hidden = params.fetch(:hidden, false)
12
- super(parent: params[:parent])
13
- end
14
-
15
- # Parse StyleParametres data
16
- # @param [Nokogiri::XML:Element] node with StyleParametres data
17
- # @return [StyleParametres] value of Columns data
18
- def parse(node)
19
- node.xpath('*').each do |node_child|
20
- case node_child.name
21
- when 'name'
22
- @name = node_child.attribute('val').value
23
- when 'qFormat'
24
- @q_format = option_enabled?(node_child)
25
- end
26
- end
27
- self
28
- end
29
- end
30
- end