docx 0.11.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +36 -3
- data/lib/docx/containers/paragraph.rb +54 -1
- data/lib/docx/containers/table.rb +5 -3
- data/lib/docx/containers/table_cell.rb +4 -2
- data/lib/docx/containers/table_column.rb +4 -2
- data/lib/docx/containers/table_row.rb +4 -2
- data/lib/docx/document.rb +19 -4
- data/lib/docx/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 236ded16324395b50579ba21fdc84891251ffc75b9bd1f5c65c4d125c6689c2f
|
|
4
|
+
data.tar.gz: d994c54de22f1829928d4fe60087d8f4667905d4e13e88e9b607a9ae3ac34077
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a5a969c0011ac31aba461a485474015b230a76d2c449275f90a5267f4ee9b38ba698090561fea0cade728502be7db6e306c8d9a29cc662400122ed138a209e18
|
|
7
|
+
data.tar.gz: f0e12812fd5d488162a4d263a41d095e31ab0500824ae99cc58f4fafe82ef4468b44fa68bcf0db86b3ecd305cd263602334dfbea81fa14622d7303060e461779
|
data/README.md
CHANGED
|
@@ -5,13 +5,13 @@
|
|
|
5
5
|
[](https://coveralls.io/github/ruby-docx/docx?branch=master)
|
|
6
6
|
[](https://gitter.im/ruby-docx/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge)
|
|
7
7
|
|
|
8
|
-
A ruby library/gem for interacting with `.docx` files. currently capabilities include reading paragraphs/bookmarks, inserting text at bookmarks, reading tables/rows/columns/cells and saving the document.
|
|
8
|
+
A ruby library/gem for interacting with `.docx` files. currently capabilities include reading paragraphs/bookmarks, inserting text at bookmarks, reading and writing headers/footers, reading tables/rows/columns/cells and saving the document.
|
|
9
9
|
|
|
10
10
|
## Usage
|
|
11
11
|
|
|
12
12
|
### Prerequisites
|
|
13
13
|
|
|
14
|
-
- Ruby 2.
|
|
14
|
+
- Ruby 2.7 or later
|
|
15
15
|
|
|
16
16
|
### Install
|
|
17
17
|
|
|
@@ -63,6 +63,26 @@ doc = Docx::Document.open(buffer)
|
|
|
63
63
|
# Everything about reading is the same as shown above
|
|
64
64
|
```
|
|
65
65
|
|
|
66
|
+
### Reading headers and footers
|
|
67
|
+
|
|
68
|
+
``` ruby
|
|
69
|
+
require 'docx'
|
|
70
|
+
|
|
71
|
+
doc = Docx::Document.open('example.docx')
|
|
72
|
+
|
|
73
|
+
# Headers and footers are returned as hashes keyed by their file name
|
|
74
|
+
# (e.g. "header1", "footer1"), with Nokogiri documents as values.
|
|
75
|
+
doc.headers.each do |name, header|
|
|
76
|
+
puts name
|
|
77
|
+
puts header.text
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
doc.footers.each do |name, footer|
|
|
81
|
+
puts name
|
|
82
|
+
puts footer.text
|
|
83
|
+
end
|
|
84
|
+
```
|
|
85
|
+
|
|
66
86
|
### Rendering html
|
|
67
87
|
``` ruby
|
|
68
88
|
require 'docx'
|
|
@@ -116,7 +136,11 @@ doc = Docx::Document.open('example.docx')
|
|
|
116
136
|
doc.bookmarks['example_bookmark'].insert_text_after("Hello world.")
|
|
117
137
|
|
|
118
138
|
# Insert multiple lines of text at our bookmark
|
|
119
|
-
doc.bookmarks['example_bookmark_2'].
|
|
139
|
+
doc.bookmarks['example_bookmark_2'].insert_multiple_lines(['Hello', 'World', 'foo'])
|
|
140
|
+
|
|
141
|
+
# Bookmarks placed in headers and footers are included too, and edits to them
|
|
142
|
+
# are saved along with the document.
|
|
143
|
+
doc.bookmarks['header_bookmark'].insert_text_after("Hello from the header.")
|
|
120
144
|
|
|
121
145
|
# Remove paragraphs
|
|
122
146
|
doc.paragraphs.each do |p|
|
|
@@ -130,6 +154,15 @@ doc.paragraphs.each do |p|
|
|
|
130
154
|
end
|
|
131
155
|
end
|
|
132
156
|
|
|
157
|
+
# Substitute a placeholder even when Word has split it across several runs
|
|
158
|
+
# (e.g. "{{first_name}}" stored as "{{fi", "rst_na", "me}}"). Paragraph#substitute
|
|
159
|
+
# matches across run boundaries, where the per-run TextRun#substitute above cannot.
|
|
160
|
+
# Accepts a String or a Regexp (capture-group backreferences work in the replacement).
|
|
161
|
+
doc.paragraphs.each do |p|
|
|
162
|
+
p.substitute('{{first_name}}', 'Jane')
|
|
163
|
+
p.substitute(/\{\{(\w+)\}\}/, 'value of \1')
|
|
164
|
+
end
|
|
165
|
+
|
|
133
166
|
# Substitute text with access to captures, note block arg is a MatchData, a bit
|
|
134
167
|
# different than String.gsub. https://ruby-doc.org/3.3.7/MatchData.html
|
|
135
168
|
doc.paragraphs.each do |p|
|
|
@@ -65,6 +65,59 @@ module Docx
|
|
|
65
65
|
text_runs.each { |tr| yield(tr) }
|
|
66
66
|
end
|
|
67
67
|
|
|
68
|
+
# Substitute text within the paragraph, even when a match spans multiple
|
|
69
|
+
# text runs (e.g. a "{{placeholder}}" that Word split across several runs,
|
|
70
|
+
# such as "{{fi", "rst_na", "me}}"). The per-run TextRun#substitute cannot
|
|
71
|
+
# match those, but this can, because it joins the runs first.
|
|
72
|
+
#
|
|
73
|
+
# The matched region is collapsed into the first run it touches, so that
|
|
74
|
+
# run's formatting is kept while the other spanned runs are emptied; runs
|
|
75
|
+
# outside the match are left untouched.
|
|
76
|
+
#
|
|
77
|
+
# +pattern+ may be a String or a Regexp; +replacement+ follows String#sub
|
|
78
|
+
# semantics, so capture-group backreferences (e.g. '\1') work with a Regexp.
|
|
79
|
+
#
|
|
80
|
+
# # given a paragraph reading "Hello {{first_name}}!"
|
|
81
|
+
# paragraph.substitute('{{first_name}}', 'Jane') # => "Hello Jane!"
|
|
82
|
+
# paragraph.substitute(/\{\{(\w+)\}\}/, 'value of \1')
|
|
83
|
+
#
|
|
84
|
+
# See https://github.com/ruby-docx/docx/issues/147
|
|
85
|
+
def substitute(pattern, replacement)
|
|
86
|
+
search_from = 0
|
|
87
|
+
loop do
|
|
88
|
+
runs = text_runs
|
|
89
|
+
break if runs.empty?
|
|
90
|
+
|
|
91
|
+
offsets = []
|
|
92
|
+
cursor = 0
|
|
93
|
+
runs.each do |run|
|
|
94
|
+
offsets << cursor
|
|
95
|
+
cursor += run.text.length
|
|
96
|
+
end
|
|
97
|
+
full_text = runs.map(&:text).join
|
|
98
|
+
|
|
99
|
+
match = full_text.match(pattern, search_from)
|
|
100
|
+
break unless match
|
|
101
|
+
break if match.end(0) == match.begin(0) # ignore empty matches
|
|
102
|
+
|
|
103
|
+
match_start = match.begin(0)
|
|
104
|
+
match_end = match.end(0) # exclusive
|
|
105
|
+
first = offsets.rindex { |offset| offset <= match_start }
|
|
106
|
+
last = offsets.rindex { |offset| offset < match_end }
|
|
107
|
+
|
|
108
|
+
combined = runs[first..last].map(&:text).join
|
|
109
|
+
local_start = match_start - offsets[first]
|
|
110
|
+
local_end = match_end - offsets[first]
|
|
111
|
+
replaced = combined[local_start...local_end].sub(pattern, replacement)
|
|
112
|
+
runs[first].text = combined[0...local_start] + replaced + combined[local_end..-1]
|
|
113
|
+
((first + 1)..last).each { |index| runs[index].text = '' }
|
|
114
|
+
|
|
115
|
+
# advance past the inserted replacement so it is not re-matched
|
|
116
|
+
search_from = match_start + replaced.length
|
|
117
|
+
end
|
|
118
|
+
self
|
|
119
|
+
end
|
|
120
|
+
|
|
68
121
|
def aligned_left?
|
|
69
122
|
['left', nil].include?(alignment)
|
|
70
123
|
end
|
|
@@ -87,7 +140,7 @@ module Docx
|
|
|
87
140
|
|
|
88
141
|
def font_color
|
|
89
142
|
color_tag = @node.xpath('w:r//w:rPr//w:color').first
|
|
90
|
-
color_tag ? color_tag.attributes['val']
|
|
143
|
+
color_tag ? color_tag.attributes['val']&.value : nil
|
|
91
144
|
end
|
|
92
145
|
|
|
93
146
|
def style
|
|
@@ -13,14 +13,16 @@ module Docx
|
|
|
13
13
|
'tbl'
|
|
14
14
|
end
|
|
15
15
|
|
|
16
|
-
def initialize(node)
|
|
16
|
+
def initialize(node, document_properties = {}, doc = nil)
|
|
17
17
|
@node = node
|
|
18
18
|
@properties_tag = 'tblGrid'
|
|
19
|
+
@document_properties = document_properties
|
|
20
|
+
@document = doc
|
|
19
21
|
end
|
|
20
22
|
|
|
21
23
|
# Array of row
|
|
22
24
|
def rows
|
|
23
|
-
@node.xpath('w:tr').map {|r_node| Containers::TableRow.new(r_node) }
|
|
25
|
+
@node.xpath('w:tr').map {|r_node| Containers::TableRow.new(r_node, @document_properties, @document) }
|
|
24
26
|
end
|
|
25
27
|
|
|
26
28
|
def row_count
|
|
@@ -31,7 +33,7 @@ module Docx
|
|
|
31
33
|
def columns
|
|
32
34
|
columns_containers = []
|
|
33
35
|
(0..(column_count-1)).each do |i|
|
|
34
|
-
columns_containers[i] = Containers::TableColumn.new
|
|
36
|
+
columns_containers[i] = Containers::TableColumn.new(@node.xpath("w:tr//w:tc[#{i+1}]"), @document_properties, @document)
|
|
35
37
|
end
|
|
36
38
|
columns_containers
|
|
37
39
|
end
|
|
@@ -12,9 +12,11 @@ module Docx
|
|
|
12
12
|
'tc'
|
|
13
13
|
end
|
|
14
14
|
|
|
15
|
-
def initialize(node)
|
|
15
|
+
def initialize(node, document_properties = {}, doc = nil)
|
|
16
16
|
@node = node
|
|
17
17
|
@properties_tag = 'tcPr'
|
|
18
|
+
@document_properties = document_properties
|
|
19
|
+
@document = doc
|
|
18
20
|
end
|
|
19
21
|
|
|
20
22
|
# Return text of paragraph's cell
|
|
@@ -24,7 +26,7 @@ module Docx
|
|
|
24
26
|
|
|
25
27
|
# Array of paragraphs contained within cell
|
|
26
28
|
def paragraphs
|
|
27
|
-
@node.xpath('w:p').map {|p_node| Containers::Paragraph.new(p_node) }
|
|
29
|
+
@node.xpath('w:p').map {|p_node| Containers::Paragraph.new(p_node, @document_properties, @document) }
|
|
28
30
|
end
|
|
29
31
|
|
|
30
32
|
# Iterate over each text run within a paragraph's cell
|
|
@@ -12,10 +12,12 @@ module Docx
|
|
|
12
12
|
'w:gridCol'
|
|
13
13
|
end
|
|
14
14
|
|
|
15
|
-
def initialize(cell_nodes)
|
|
15
|
+
def initialize(cell_nodes, document_properties = {}, doc = nil)
|
|
16
16
|
@node = ''
|
|
17
17
|
@properties_tag = ''
|
|
18
|
-
@
|
|
18
|
+
@document_properties = document_properties
|
|
19
|
+
@document = doc
|
|
20
|
+
@cells = cell_nodes.map { |c_node| Containers::TableCell.new(c_node, @document_properties, @document) }
|
|
19
21
|
end
|
|
20
22
|
|
|
21
23
|
# Array of cells contained within row
|
|
@@ -12,14 +12,16 @@ module Docx
|
|
|
12
12
|
'tr'
|
|
13
13
|
end
|
|
14
14
|
|
|
15
|
-
def initialize(node)
|
|
15
|
+
def initialize(node, document_properties = {}, doc = nil)
|
|
16
16
|
@node = node
|
|
17
17
|
@properties_tag = ''
|
|
18
|
+
@document_properties = document_properties
|
|
19
|
+
@document = doc
|
|
18
20
|
end
|
|
19
21
|
|
|
20
22
|
# Array of cells contained within row
|
|
21
23
|
def cells
|
|
22
|
-
@node.xpath('w:tc').map {|c_node| Containers::TableCell.new(c_node) }
|
|
24
|
+
@node.xpath('w:tc').map {|c_node| Containers::TableCell.new(c_node, @document_properties, @document) }
|
|
23
25
|
end
|
|
24
26
|
|
|
25
27
|
end
|
data/lib/docx/document.rb
CHANGED
|
@@ -27,6 +27,9 @@ module Docx
|
|
|
27
27
|
def initialize(path_or_io, options = {})
|
|
28
28
|
@replace = {}
|
|
29
29
|
|
|
30
|
+
# accept path-like objects (e.g. Pathname, File) by using their path (#101)
|
|
31
|
+
path_or_io = path_or_io.to_path if path_or_io.respond_to?(:to_path)
|
|
32
|
+
|
|
30
33
|
# if path-or_io is string && does not contain a null byte
|
|
31
34
|
if (path_or_io.instance_of?(String) && !/\u0000/.match?(path_or_io))
|
|
32
35
|
@zip = Zip::File.open(path_or_io)
|
|
@@ -40,6 +43,7 @@ module Docx
|
|
|
40
43
|
@document_xml = document.get_input_stream.read
|
|
41
44
|
@doc = Nokogiri::XML(@document_xml)
|
|
42
45
|
load_styles
|
|
46
|
+
load_rels
|
|
43
47
|
load_headers
|
|
44
48
|
load_footers
|
|
45
49
|
yield(self) if block_given?
|
|
@@ -70,6 +74,9 @@ module Docx
|
|
|
70
74
|
def bookmarks
|
|
71
75
|
bkmrks_hsh = {}
|
|
72
76
|
bkmrks_ary = @doc.xpath('//w:bookmarkStart').map { |b_node| parse_bookmark_from b_node }
|
|
77
|
+
# also scan headers and footers so their bookmarks can be read and edited
|
|
78
|
+
bkmrks_ary += headers.values.flat_map { |h| h.xpath('//w:bookmarkStart').map { |b_node| parse_bookmark_from b_node } }
|
|
79
|
+
bkmrks_ary += footers.values.flat_map { |f| f.xpath('//w:bookmarkStart').map { |b_node| parse_bookmark_from b_node } }
|
|
73
80
|
# auto-generated by office 2010
|
|
74
81
|
bkmrks_ary.reject! { |b| b.name == '_GoBack' }
|
|
75
82
|
bkmrks_ary.each { |b| bkmrks_hsh[b.name] = b }
|
|
@@ -97,7 +104,11 @@ module Docx
|
|
|
97
104
|
# Hyperlink targets are extracted from the document.xml.rels file
|
|
98
105
|
def hyperlinks
|
|
99
106
|
hyperlink_relationships.each_with_object({}) do |rel, hash|
|
|
100
|
-
|
|
107
|
+
id = rel.attributes['Id']
|
|
108
|
+
target = rel.attributes['Target']
|
|
109
|
+
next unless id && target
|
|
110
|
+
|
|
111
|
+
hash[id.value] = target.value
|
|
101
112
|
end
|
|
102
113
|
end
|
|
103
114
|
|
|
@@ -177,7 +188,7 @@ module Docx
|
|
|
177
188
|
end
|
|
178
189
|
|
|
179
190
|
def default_paragraph_style
|
|
180
|
-
@styles
|
|
191
|
+
@styles&.at_xpath("w:styles/w:style[@w:type='paragraph' and @w:default='1']/w:name/@w:val")&.value
|
|
181
192
|
end
|
|
182
193
|
|
|
183
194
|
def style_name_of(style_id)
|
|
@@ -223,18 +234,22 @@ module Docx
|
|
|
223
234
|
def load_styles
|
|
224
235
|
@styles_xml = @zip.read('word/styles.xml')
|
|
225
236
|
@styles = Nokogiri::XML(@styles_xml)
|
|
226
|
-
load_rels
|
|
227
237
|
rescue Errno::ENOENT => e
|
|
228
238
|
warn e.message
|
|
229
239
|
nil
|
|
230
240
|
end
|
|
231
241
|
|
|
242
|
+
# Loaded independently of styles so that a document without word/styles.xml
|
|
243
|
+
# still initializes @rels (see #158).
|
|
232
244
|
def load_rels
|
|
233
245
|
rels_entry = @zip.glob('word/_rels/document*.xml.rels').first
|
|
234
246
|
raise Errno::ENOENT unless rels_entry
|
|
235
247
|
|
|
236
248
|
@rels_xml = rels_entry.get_input_stream.read
|
|
237
249
|
@rels = Nokogiri::XML(@rels_xml)
|
|
250
|
+
rescue Errno::ENOENT => e
|
|
251
|
+
warn e.message
|
|
252
|
+
nil
|
|
238
253
|
end
|
|
239
254
|
|
|
240
255
|
#--
|
|
@@ -264,7 +279,7 @@ module Docx
|
|
|
264
279
|
end
|
|
265
280
|
|
|
266
281
|
def parse_table_from(t_node)
|
|
267
|
-
Elements::Containers::Table.new(t_node)
|
|
282
|
+
Elements::Containers::Table.new(t_node, document_properties, self)
|
|
268
283
|
end
|
|
269
284
|
end
|
|
270
285
|
end
|
data/lib/docx/version.rb
CHANGED