docx 0.12.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 16df363293a0fcb4945e4ab7aa968f7138f2d0e38d25bcf689a5c5142279e520
4
- data.tar.gz: '089d8ef9c78a7ae13d980d3a78ce729b05fe6dff1fefe8bbdfe4488a6b7b0f91'
3
+ metadata.gz: 236ded16324395b50579ba21fdc84891251ffc75b9bd1f5c65c4d125c6689c2f
4
+ data.tar.gz: d994c54de22f1829928d4fe60087d8f4667905d4e13e88e9b607a9ae3ac34077
5
5
  SHA512:
6
- metadata.gz: 8a23b154632578c3a702d3e16b6bbc3d763f062860ea9cf8db6c6814e89c1e7d99073fa28b7f90276c883d5c1f47be9d688d6f05177c06dbfc3bc452ccadc371
7
- data.tar.gz: a8a735936caad5c75f56cace55e980df01bb25af795d5b8d040b735b14510898b220da3b8f40c52b74c68dc371c9a2d25bc2791d44a54a74dde9db727d3432ae
6
+ metadata.gz: a5a969c0011ac31aba461a485474015b230a76d2c449275f90a5267f4ee9b38ba698090561fea0cade728502be7db6e306c8d9a29cc662400122ed138a209e18
7
+ data.tar.gz: f0e12812fd5d488162a4d263a41d095e31ab0500824ae99cc58f4fafe82ef4468b44fa68bcf0db86b3ecd305cd263602334dfbea81fa14622d7303060e461779
data/README.md CHANGED
@@ -154,6 +154,15 @@ doc.paragraphs.each do |p|
154
154
  end
155
155
  end
156
156
 
157
+ # Substitute a placeholder even when Word has split it across several runs
158
+ # (e.g. "{{first_name}}" stored as "{{fi", "rst_na", "me}}"). Paragraph#substitute
159
+ # matches across run boundaries, where the per-run TextRun#substitute above cannot.
160
+ # Accepts a String or a Regexp (capture-group backreferences work in the replacement).
161
+ doc.paragraphs.each do |p|
162
+ p.substitute('{{first_name}}', 'Jane')
163
+ p.substitute(/\{\{(\w+)\}\}/, 'value of \1')
164
+ end
165
+
157
166
  # Substitute text with access to captures, note block arg is a MatchData, a bit
158
167
  # different than String.gsub. https://ruby-doc.org/3.3.7/MatchData.html
159
168
  doc.paragraphs.each do |p|
@@ -65,6 +65,59 @@ module Docx
65
65
  text_runs.each { |tr| yield(tr) }
66
66
  end
67
67
 
68
+ # Substitute text within the paragraph, even when a match spans multiple
69
+ # text runs (e.g. a "{{placeholder}}" that Word split across several runs,
70
+ # such as "{{fi", "rst_na", "me}}"). The per-run TextRun#substitute cannot
71
+ # match those, but this can, because it joins the runs first.
72
+ #
73
+ # The matched region is collapsed into the first run it touches, so that
74
+ # run's formatting is kept while the other spanned runs are emptied; runs
75
+ # outside the match are left untouched.
76
+ #
77
+ # +pattern+ may be a String or a Regexp; +replacement+ follows String#sub
78
+ # semantics, so capture-group backreferences (e.g. '\1') work with a Regexp.
79
+ #
80
+ # # given a paragraph reading "Hello {{first_name}}!"
81
+ # paragraph.substitute('{{first_name}}', 'Jane') # => "Hello Jane!"
82
+ # paragraph.substitute(/\{\{(\w+)\}\}/, 'value of \1')
83
+ #
84
+ # See https://github.com/ruby-docx/docx/issues/147
85
+ def substitute(pattern, replacement)
86
+ search_from = 0
87
+ loop do
88
+ runs = text_runs
89
+ break if runs.empty?
90
+
91
+ offsets = []
92
+ cursor = 0
93
+ runs.each do |run|
94
+ offsets << cursor
95
+ cursor += run.text.length
96
+ end
97
+ full_text = runs.map(&:text).join
98
+
99
+ match = full_text.match(pattern, search_from)
100
+ break unless match
101
+ break if match.end(0) == match.begin(0) # ignore empty matches
102
+
103
+ match_start = match.begin(0)
104
+ match_end = match.end(0) # exclusive
105
+ first = offsets.rindex { |offset| offset <= match_start }
106
+ last = offsets.rindex { |offset| offset < match_end }
107
+
108
+ combined = runs[first..last].map(&:text).join
109
+ local_start = match_start - offsets[first]
110
+ local_end = match_end - offsets[first]
111
+ replaced = combined[local_start...local_end].sub(pattern, replacement)
112
+ runs[first].text = combined[0...local_start] + replaced + combined[local_end..-1]
113
+ ((first + 1)..last).each { |index| runs[index].text = '' }
114
+
115
+ # advance past the inserted replacement so it is not re-matched
116
+ search_from = match_start + replaced.length
117
+ end
118
+ self
119
+ end
120
+
68
121
  def aligned_left?
69
122
  ['left', nil].include?(alignment)
70
123
  end
@@ -87,7 +140,7 @@ module Docx
87
140
 
88
141
  def font_color
89
142
  color_tag = @node.xpath('w:r//w:rPr//w:color').first
90
- color_tag ? color_tag.attributes['val'].value : nil
143
+ color_tag ? color_tag.attributes['val']&.value : nil
91
144
  end
92
145
 
93
146
  def style
@@ -13,14 +13,16 @@ module Docx
13
13
  'tbl'
14
14
  end
15
15
 
16
- def initialize(node)
16
+ def initialize(node, document_properties = {}, doc = nil)
17
17
  @node = node
18
18
  @properties_tag = 'tblGrid'
19
+ @document_properties = document_properties
20
+ @document = doc
19
21
  end
20
22
 
21
23
  # Array of row
22
24
  def rows
23
- @node.xpath('w:tr').map {|r_node| Containers::TableRow.new(r_node) }
25
+ @node.xpath('w:tr').map {|r_node| Containers::TableRow.new(r_node, @document_properties, @document) }
24
26
  end
25
27
 
26
28
  def row_count
@@ -31,7 +33,7 @@ module Docx
31
33
  def columns
32
34
  columns_containers = []
33
35
  (0..(column_count-1)).each do |i|
34
- columns_containers[i] = Containers::TableColumn.new @node.xpath("w:tr//w:tc[#{i+1}]")
36
+ columns_containers[i] = Containers::TableColumn.new(@node.xpath("w:tr//w:tc[#{i+1}]"), @document_properties, @document)
35
37
  end
36
38
  columns_containers
37
39
  end
@@ -12,9 +12,11 @@ module Docx
12
12
  'tc'
13
13
  end
14
14
 
15
- def initialize(node)
15
+ def initialize(node, document_properties = {}, doc = nil)
16
16
  @node = node
17
17
  @properties_tag = 'tcPr'
18
+ @document_properties = document_properties
19
+ @document = doc
18
20
  end
19
21
 
20
22
  # Return text of paragraph's cell
@@ -24,7 +26,7 @@ module Docx
24
26
 
25
27
  # Array of paragraphs contained within cell
26
28
  def paragraphs
27
- @node.xpath('w:p').map {|p_node| Containers::Paragraph.new(p_node) }
29
+ @node.xpath('w:p').map {|p_node| Containers::Paragraph.new(p_node, @document_properties, @document) }
28
30
  end
29
31
 
30
32
  # Iterate over each text run within a paragraph's cell
@@ -12,10 +12,12 @@ module Docx
12
12
  'w:gridCol'
13
13
  end
14
14
 
15
- def initialize(cell_nodes)
15
+ def initialize(cell_nodes, document_properties = {}, doc = nil)
16
16
  @node = ''
17
17
  @properties_tag = ''
18
- @cells = cell_nodes.map { |c_node| Containers::TableCell.new(c_node) }
18
+ @document_properties = document_properties
19
+ @document = doc
20
+ @cells = cell_nodes.map { |c_node| Containers::TableCell.new(c_node, @document_properties, @document) }
19
21
  end
20
22
 
21
23
  # Array of cells contained within row
@@ -12,14 +12,16 @@ module Docx
12
12
  'tr'
13
13
  end
14
14
 
15
- def initialize(node)
15
+ def initialize(node, document_properties = {}, doc = nil)
16
16
  @node = node
17
17
  @properties_tag = ''
18
+ @document_properties = document_properties
19
+ @document = doc
18
20
  end
19
21
 
20
22
  # Array of cells contained within row
21
23
  def cells
22
- @node.xpath('w:tc').map {|c_node| Containers::TableCell.new(c_node) }
24
+ @node.xpath('w:tc').map {|c_node| Containers::TableCell.new(c_node, @document_properties, @document) }
23
25
  end
24
26
 
25
27
  end
data/lib/docx/document.rb CHANGED
@@ -27,6 +27,9 @@ module Docx
27
27
  def initialize(path_or_io, options = {})
28
28
  @replace = {}
29
29
 
30
+ # accept path-like objects (e.g. Pathname, File) by using their path (#101)
31
+ path_or_io = path_or_io.to_path if path_or_io.respond_to?(:to_path)
32
+
30
33
  # if path-or_io is string && does not contain a null byte
31
34
  if (path_or_io.instance_of?(String) && !/\u0000/.match?(path_or_io))
32
35
  @zip = Zip::File.open(path_or_io)
@@ -40,6 +43,7 @@ module Docx
40
43
  @document_xml = document.get_input_stream.read
41
44
  @doc = Nokogiri::XML(@document_xml)
42
45
  load_styles
46
+ load_rels
43
47
  load_headers
44
48
  load_footers
45
49
  yield(self) if block_given?
@@ -100,7 +104,11 @@ module Docx
100
104
  # Hyperlink targets are extracted from the document.xml.rels file
101
105
  def hyperlinks
102
106
  hyperlink_relationships.each_with_object({}) do |rel, hash|
103
- hash[rel.attributes['Id'].value] = rel.attributes['Target'].value
107
+ id = rel.attributes['Id']
108
+ target = rel.attributes['Target']
109
+ next unless id && target
110
+
111
+ hash[id.value] = target.value
104
112
  end
105
113
  end
106
114
 
@@ -180,7 +188,7 @@ module Docx
180
188
  end
181
189
 
182
190
  def default_paragraph_style
183
- @styles.at_xpath("w:styles/w:style[@w:type='paragraph' and @w:default='1']/w:name/@w:val").value
191
+ @styles&.at_xpath("w:styles/w:style[@w:type='paragraph' and @w:default='1']/w:name/@w:val")&.value
184
192
  end
185
193
 
186
194
  def style_name_of(style_id)
@@ -226,18 +234,22 @@ module Docx
226
234
  def load_styles
227
235
  @styles_xml = @zip.read('word/styles.xml')
228
236
  @styles = Nokogiri::XML(@styles_xml)
229
- load_rels
230
237
  rescue Errno::ENOENT => e
231
238
  warn e.message
232
239
  nil
233
240
  end
234
241
 
242
+ # Loaded independently of styles so that a document without word/styles.xml
243
+ # still initializes @rels (see #158).
235
244
  def load_rels
236
245
  rels_entry = @zip.glob('word/_rels/document*.xml.rels').first
237
246
  raise Errno::ENOENT unless rels_entry
238
247
 
239
248
  @rels_xml = rels_entry.get_input_stream.read
240
249
  @rels = Nokogiri::XML(@rels_xml)
250
+ rescue Errno::ENOENT => e
251
+ warn e.message
252
+ nil
241
253
  end
242
254
 
243
255
  #--
@@ -267,7 +279,7 @@ module Docx
267
279
  end
268
280
 
269
281
  def parse_table_from(t_node)
270
- Elements::Containers::Table.new(t_node)
282
+ Elements::Containers::Table.new(t_node, document_properties, self)
271
283
  end
272
284
  end
273
285
  end
data/lib/docx/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Docx #:nodoc:
4
- VERSION = '0.12.0'
4
+ VERSION = '0.13.0'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: docx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.0
4
+ version: 0.13.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christopher Hunt