docx 0.11.0 → 0.13.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bba1301a8a8e00e7fd9ee748af89280af2348120628c9a3d806dbcef78b17bb4
4
- data.tar.gz: 699e0f10746445987737d090a0cf11886251f98385224dd0c9ec543b59676735
3
+ metadata.gz: 236ded16324395b50579ba21fdc84891251ffc75b9bd1f5c65c4d125c6689c2f
4
+ data.tar.gz: d994c54de22f1829928d4fe60087d8f4667905d4e13e88e9b607a9ae3ac34077
5
5
  SHA512:
6
- metadata.gz: 52fd4f0aeafbf5a9ce0f5c3b4b75b8235fa291806a95dcb9455c993fe322b949aea7780a56a92ce78c2d51248eab4c6366e3511eda11991a739f53cd052460b7
7
- data.tar.gz: 598be461460ae0aee15f4f13028850497df9a9bb30d47594f3ce678725095a8dd3b72ebc5cc1bb32825f10efef8c4cf27a7c54736c2875b344e0374af8daa92b
6
+ metadata.gz: a5a969c0011ac31aba461a485474015b230a76d2c449275f90a5267f4ee9b38ba698090561fea0cade728502be7db6e306c8d9a29cc662400122ed138a209e18
7
+ data.tar.gz: f0e12812fd5d488162a4d263a41d095e31ab0500824ae99cc58f4fafe82ef4468b44fa68bcf0db86b3ecd305cd263602334dfbea81fa14622d7303060e461779
data/README.md CHANGED
@@ -5,13 +5,13 @@
5
5
  [![Coverage Status](https://coveralls.io/repos/github/ruby-docx/docx/badge.svg?branch=master)](https://coveralls.io/github/ruby-docx/docx?branch=master)
6
6
  [![Gitter](https://badges.gitter.im/ruby-docx/community.svg)](https://gitter.im/ruby-docx/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
7
7
 
8
- A ruby library/gem for interacting with `.docx` files. currently capabilities include reading paragraphs/bookmarks, inserting text at bookmarks, reading tables/rows/columns/cells and saving the document.
8
+ A ruby library/gem for interacting with `.docx` files. currently capabilities include reading paragraphs/bookmarks, inserting text at bookmarks, reading and writing headers/footers, reading tables/rows/columns/cells and saving the document.
9
9
 
10
10
  ## Usage
11
11
 
12
12
  ### Prerequisites
13
13
 
14
- - Ruby 2.6 or later
14
+ - Ruby 2.7 or later
15
15
 
16
16
  ### Install
17
17
 
@@ -63,6 +63,26 @@ doc = Docx::Document.open(buffer)
63
63
  # Everything about reading is the same as shown above
64
64
  ```
65
65
 
66
+ ### Reading headers and footers
67
+
68
+ ``` ruby
69
+ require 'docx'
70
+
71
+ doc = Docx::Document.open('example.docx')
72
+
73
+ # Headers and footers are returned as hashes keyed by their file name
74
+ # (e.g. "header1", "footer1"), with Nokogiri documents as values.
75
+ doc.headers.each do |name, header|
76
+ puts name
77
+ puts header.text
78
+ end
79
+
80
+ doc.footers.each do |name, footer|
81
+ puts name
82
+ puts footer.text
83
+ end
84
+ ```
85
+
66
86
  ### Rendering html
67
87
  ``` ruby
68
88
  require 'docx'
@@ -116,7 +136,11 @@ doc = Docx::Document.open('example.docx')
116
136
  doc.bookmarks['example_bookmark'].insert_text_after("Hello world.")
117
137
 
118
138
  # Insert multiple lines of text at our bookmark
119
- doc.bookmarks['example_bookmark_2'].insert_multiple_lines_after(['Hello', 'World', 'foo'])
139
+ doc.bookmarks['example_bookmark_2'].insert_multiple_lines(['Hello', 'World', 'foo'])
140
+
141
+ # Bookmarks placed in headers and footers are included too, and edits to them
142
+ # are saved along with the document.
143
+ doc.bookmarks['header_bookmark'].insert_text_after("Hello from the header.")
120
144
 
121
145
  # Remove paragraphs
122
146
  doc.paragraphs.each do |p|
@@ -130,6 +154,15 @@ doc.paragraphs.each do |p|
130
154
  end
131
155
  end
132
156
 
157
+ # Substitute a placeholder even when Word has split it across several runs
158
+ # (e.g. "{{first_name}}" stored as "{{fi", "rst_na", "me}}"). Paragraph#substitute
159
+ # matches across run boundaries, where the per-run TextRun#substitute above cannot.
160
+ # Accepts a String or a Regexp (capture-group backreferences work in the replacement).
161
+ doc.paragraphs.each do |p|
162
+ p.substitute('{{first_name}}', 'Jane')
163
+ p.substitute(/\{\{(\w+)\}\}/, 'value of \1')
164
+ end
165
+
133
166
  # Substitute text with access to captures, note block arg is a MatchData, a bit
134
167
  # different than String.gsub. https://ruby-doc.org/3.3.7/MatchData.html
135
168
  doc.paragraphs.each do |p|
@@ -65,6 +65,59 @@ module Docx
65
65
  text_runs.each { |tr| yield(tr) }
66
66
  end
67
67
 
68
+ # Substitute text within the paragraph, even when a match spans multiple
69
+ # text runs (e.g. a "{{placeholder}}" that Word split across several runs,
70
+ # such as "{{fi", "rst_na", "me}}"). The per-run TextRun#substitute cannot
71
+ # match those, but this can, because it joins the runs first.
72
+ #
73
+ # The matched region is collapsed into the first run it touches, so that
74
+ # run's formatting is kept while the other spanned runs are emptied; runs
75
+ # outside the match are left untouched.
76
+ #
77
+ # +pattern+ may be a String or a Regexp; +replacement+ follows String#sub
78
+ # semantics, so capture-group backreferences (e.g. '\1') work with a Regexp.
79
+ #
80
+ # # given a paragraph reading "Hello {{first_name}}!"
81
+ # paragraph.substitute('{{first_name}}', 'Jane') # => "Hello Jane!"
82
+ # paragraph.substitute(/\{\{(\w+)\}\}/, 'value of \1')
83
+ #
84
+ # See https://github.com/ruby-docx/docx/issues/147
85
+ def substitute(pattern, replacement)
86
+ search_from = 0
87
+ loop do
88
+ runs = text_runs
89
+ break if runs.empty?
90
+
91
+ offsets = []
92
+ cursor = 0
93
+ runs.each do |run|
94
+ offsets << cursor
95
+ cursor += run.text.length
96
+ end
97
+ full_text = runs.map(&:text).join
98
+
99
+ match = full_text.match(pattern, search_from)
100
+ break unless match
101
+ break if match.end(0) == match.begin(0) # ignore empty matches
102
+
103
+ match_start = match.begin(0)
104
+ match_end = match.end(0) # exclusive
105
+ first = offsets.rindex { |offset| offset <= match_start }
106
+ last = offsets.rindex { |offset| offset < match_end }
107
+
108
+ combined = runs[first..last].map(&:text).join
109
+ local_start = match_start - offsets[first]
110
+ local_end = match_end - offsets[first]
111
+ replaced = combined[local_start...local_end].sub(pattern, replacement)
112
+ runs[first].text = combined[0...local_start] + replaced + combined[local_end..-1]
113
+ ((first + 1)..last).each { |index| runs[index].text = '' }
114
+
115
+ # advance past the inserted replacement so it is not re-matched
116
+ search_from = match_start + replaced.length
117
+ end
118
+ self
119
+ end
120
+
68
121
  def aligned_left?
69
122
  ['left', nil].include?(alignment)
70
123
  end
@@ -87,7 +140,7 @@ module Docx
87
140
 
88
141
  def font_color
89
142
  color_tag = @node.xpath('w:r//w:rPr//w:color').first
90
- color_tag ? color_tag.attributes['val'].value : nil
143
+ color_tag ? color_tag.attributes['val']&.value : nil
91
144
  end
92
145
 
93
146
  def style
@@ -13,14 +13,16 @@ module Docx
13
13
  'tbl'
14
14
  end
15
15
 
16
- def initialize(node)
16
+ def initialize(node, document_properties = {}, doc = nil)
17
17
  @node = node
18
18
  @properties_tag = 'tblGrid'
19
+ @document_properties = document_properties
20
+ @document = doc
19
21
  end
20
22
 
21
23
  # Array of row
22
24
  def rows
23
- @node.xpath('w:tr').map {|r_node| Containers::TableRow.new(r_node) }
25
+ @node.xpath('w:tr').map {|r_node| Containers::TableRow.new(r_node, @document_properties, @document) }
24
26
  end
25
27
 
26
28
  def row_count
@@ -31,7 +33,7 @@ module Docx
31
33
  def columns
32
34
  columns_containers = []
33
35
  (0..(column_count-1)).each do |i|
34
- columns_containers[i] = Containers::TableColumn.new @node.xpath("w:tr//w:tc[#{i+1}]")
36
+ columns_containers[i] = Containers::TableColumn.new(@node.xpath("w:tr//w:tc[#{i+1}]"), @document_properties, @document)
35
37
  end
36
38
  columns_containers
37
39
  end
@@ -12,9 +12,11 @@ module Docx
12
12
  'tc'
13
13
  end
14
14
 
15
- def initialize(node)
15
+ def initialize(node, document_properties = {}, doc = nil)
16
16
  @node = node
17
17
  @properties_tag = 'tcPr'
18
+ @document_properties = document_properties
19
+ @document = doc
18
20
  end
19
21
 
20
22
  # Return text of paragraph's cell
@@ -24,7 +26,7 @@ module Docx
24
26
 
25
27
  # Array of paragraphs contained within cell
26
28
  def paragraphs
27
- @node.xpath('w:p').map {|p_node| Containers::Paragraph.new(p_node) }
29
+ @node.xpath('w:p').map {|p_node| Containers::Paragraph.new(p_node, @document_properties, @document) }
28
30
  end
29
31
 
30
32
  # Iterate over each text run within a paragraph's cell
@@ -12,10 +12,12 @@ module Docx
12
12
  'w:gridCol'
13
13
  end
14
14
 
15
- def initialize(cell_nodes)
15
+ def initialize(cell_nodes, document_properties = {}, doc = nil)
16
16
  @node = ''
17
17
  @properties_tag = ''
18
- @cells = cell_nodes.map { |c_node| Containers::TableCell.new(c_node) }
18
+ @document_properties = document_properties
19
+ @document = doc
20
+ @cells = cell_nodes.map { |c_node| Containers::TableCell.new(c_node, @document_properties, @document) }
19
21
  end
20
22
 
21
23
  # Array of cells contained within row
@@ -12,14 +12,16 @@ module Docx
12
12
  'tr'
13
13
  end
14
14
 
15
- def initialize(node)
15
+ def initialize(node, document_properties = {}, doc = nil)
16
16
  @node = node
17
17
  @properties_tag = ''
18
+ @document_properties = document_properties
19
+ @document = doc
18
20
  end
19
21
 
20
22
  # Array of cells contained within row
21
23
  def cells
22
- @node.xpath('w:tc').map {|c_node| Containers::TableCell.new(c_node) }
24
+ @node.xpath('w:tc').map {|c_node| Containers::TableCell.new(c_node, @document_properties, @document) }
23
25
  end
24
26
 
25
27
  end
data/lib/docx/document.rb CHANGED
@@ -27,6 +27,9 @@ module Docx
27
27
  def initialize(path_or_io, options = {})
28
28
  @replace = {}
29
29
 
30
+ # accept path-like objects (e.g. Pathname, File) by using their path (#101)
31
+ path_or_io = path_or_io.to_path if path_or_io.respond_to?(:to_path)
32
+
30
33
  # if path-or_io is string && does not contain a null byte
31
34
  if (path_or_io.instance_of?(String) && !/\u0000/.match?(path_or_io))
32
35
  @zip = Zip::File.open(path_or_io)
@@ -40,6 +43,7 @@ module Docx
40
43
  @document_xml = document.get_input_stream.read
41
44
  @doc = Nokogiri::XML(@document_xml)
42
45
  load_styles
46
+ load_rels
43
47
  load_headers
44
48
  load_footers
45
49
  yield(self) if block_given?
@@ -70,6 +74,9 @@ module Docx
70
74
  def bookmarks
71
75
  bkmrks_hsh = {}
72
76
  bkmrks_ary = @doc.xpath('//w:bookmarkStart').map { |b_node| parse_bookmark_from b_node }
77
+ # also scan headers and footers so their bookmarks can be read and edited
78
+ bkmrks_ary += headers.values.flat_map { |h| h.xpath('//w:bookmarkStart').map { |b_node| parse_bookmark_from b_node } }
79
+ bkmrks_ary += footers.values.flat_map { |f| f.xpath('//w:bookmarkStart').map { |b_node| parse_bookmark_from b_node } }
73
80
  # auto-generated by office 2010
74
81
  bkmrks_ary.reject! { |b| b.name == '_GoBack' }
75
82
  bkmrks_ary.each { |b| bkmrks_hsh[b.name] = b }
@@ -97,7 +104,11 @@ module Docx
97
104
  # Hyperlink targets are extracted from the document.xml.rels file
98
105
  def hyperlinks
99
106
  hyperlink_relationships.each_with_object({}) do |rel, hash|
100
- hash[rel.attributes['Id'].value] = rel.attributes['Target'].value
107
+ id = rel.attributes['Id']
108
+ target = rel.attributes['Target']
109
+ next unless id && target
110
+
111
+ hash[id.value] = target.value
101
112
  end
102
113
  end
103
114
 
@@ -177,7 +188,7 @@ module Docx
177
188
  end
178
189
 
179
190
  def default_paragraph_style
180
- @styles.at_xpath("w:styles/w:style[@w:type='paragraph' and @w:default='1']/w:name/@w:val").value
191
+ @styles&.at_xpath("w:styles/w:style[@w:type='paragraph' and @w:default='1']/w:name/@w:val")&.value
181
192
  end
182
193
 
183
194
  def style_name_of(style_id)
@@ -223,18 +234,22 @@ module Docx
223
234
  def load_styles
224
235
  @styles_xml = @zip.read('word/styles.xml')
225
236
  @styles = Nokogiri::XML(@styles_xml)
226
- load_rels
227
237
  rescue Errno::ENOENT => e
228
238
  warn e.message
229
239
  nil
230
240
  end
231
241
 
242
+ # Loaded independently of styles so that a document without word/styles.xml
243
+ # still initializes @rels (see #158).
232
244
  def load_rels
233
245
  rels_entry = @zip.glob('word/_rels/document*.xml.rels').first
234
246
  raise Errno::ENOENT unless rels_entry
235
247
 
236
248
  @rels_xml = rels_entry.get_input_stream.read
237
249
  @rels = Nokogiri::XML(@rels_xml)
250
+ rescue Errno::ENOENT => e
251
+ warn e.message
252
+ nil
238
253
  end
239
254
 
240
255
  #--
@@ -264,7 +279,7 @@ module Docx
264
279
  end
265
280
 
266
281
  def parse_table_from(t_node)
267
- Elements::Containers::Table.new(t_node)
282
+ Elements::Containers::Table.new(t_node, document_properties, self)
268
283
  end
269
284
  end
270
285
  end
data/lib/docx/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Docx #:nodoc:
4
- VERSION = '0.11.0'
4
+ VERSION = '0.13.0'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: docx
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.11.0
4
+ version: 0.13.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christopher Hunt