docx 0.12.0 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +9 -0
- data/lib/docx/containers/paragraph.rb +54 -1
- data/lib/docx/containers/table.rb +5 -3
- data/lib/docx/containers/table_cell.rb +4 -2
- data/lib/docx/containers/table_column.rb +4 -2
- data/lib/docx/containers/table_row.rb +4 -2
- data/lib/docx/document.rb +16 -4
- data/lib/docx/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 236ded16324395b50579ba21fdc84891251ffc75b9bd1f5c65c4d125c6689c2f
|
|
4
|
+
data.tar.gz: d994c54de22f1829928d4fe60087d8f4667905d4e13e88e9b607a9ae3ac34077
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a5a969c0011ac31aba461a485474015b230a76d2c449275f90a5267f4ee9b38ba698090561fea0cade728502be7db6e306c8d9a29cc662400122ed138a209e18
|
|
7
|
+
data.tar.gz: f0e12812fd5d488162a4d263a41d095e31ab0500824ae99cc58f4fafe82ef4468b44fa68bcf0db86b3ecd305cd263602334dfbea81fa14622d7303060e461779
|
data/README.md
CHANGED
|
@@ -154,6 +154,15 @@ doc.paragraphs.each do |p|
|
|
|
154
154
|
end
|
|
155
155
|
end
|
|
156
156
|
|
|
157
|
+
# Substitute a placeholder even when Word has split it across several runs
|
|
158
|
+
# (e.g. "{{first_name}}" stored as "{{fi", "rst_na", "me}}"). Paragraph#substitute
|
|
159
|
+
# matches across run boundaries, where the per-run TextRun#substitute above cannot.
|
|
160
|
+
# Accepts a String or a Regexp (capture-group backreferences work in the replacement).
|
|
161
|
+
doc.paragraphs.each do |p|
|
|
162
|
+
p.substitute('{{first_name}}', 'Jane')
|
|
163
|
+
p.substitute(/\{\{(\w+)\}\}/, 'value of \1')
|
|
164
|
+
end
|
|
165
|
+
|
|
157
166
|
# Substitute text with access to captures, note block arg is a MatchData, a bit
|
|
158
167
|
# different than String.gsub. https://ruby-doc.org/3.3.7/MatchData.html
|
|
159
168
|
doc.paragraphs.each do |p|
|
|
@@ -65,6 +65,59 @@ module Docx
|
|
|
65
65
|
text_runs.each { |tr| yield(tr) }
|
|
66
66
|
end
|
|
67
67
|
|
|
68
|
+
# Substitute text within the paragraph, even when a match spans multiple
|
|
69
|
+
# text runs (e.g. a "{{placeholder}}" that Word split across several runs,
|
|
70
|
+
# such as "{{fi", "rst_na", "me}}"). The per-run TextRun#substitute cannot
|
|
71
|
+
# match those, but this can, because it joins the runs first.
|
|
72
|
+
#
|
|
73
|
+
# The matched region is collapsed into the first run it touches, so that
|
|
74
|
+
# run's formatting is kept while the other spanned runs are emptied; runs
|
|
75
|
+
# outside the match are left untouched.
|
|
76
|
+
#
|
|
77
|
+
# +pattern+ may be a String or a Regexp; +replacement+ follows String#sub
|
|
78
|
+
# semantics, so capture-group backreferences (e.g. '\1') work with a Regexp.
|
|
79
|
+
#
|
|
80
|
+
# # given a paragraph reading "Hello {{first_name}}!"
|
|
81
|
+
# paragraph.substitute('{{first_name}}', 'Jane') # => "Hello Jane!"
|
|
82
|
+
# paragraph.substitute(/\{\{(\w+)\}\}/, 'value of \1')
|
|
83
|
+
#
|
|
84
|
+
# See https://github.com/ruby-docx/docx/issues/147
|
|
85
|
+
def substitute(pattern, replacement)
|
|
86
|
+
search_from = 0
|
|
87
|
+
loop do
|
|
88
|
+
runs = text_runs
|
|
89
|
+
break if runs.empty?
|
|
90
|
+
|
|
91
|
+
offsets = []
|
|
92
|
+
cursor = 0
|
|
93
|
+
runs.each do |run|
|
|
94
|
+
offsets << cursor
|
|
95
|
+
cursor += run.text.length
|
|
96
|
+
end
|
|
97
|
+
full_text = runs.map(&:text).join
|
|
98
|
+
|
|
99
|
+
match = full_text.match(pattern, search_from)
|
|
100
|
+
break unless match
|
|
101
|
+
break if match.end(0) == match.begin(0) # ignore empty matches
|
|
102
|
+
|
|
103
|
+
match_start = match.begin(0)
|
|
104
|
+
match_end = match.end(0) # exclusive
|
|
105
|
+
first = offsets.rindex { |offset| offset <= match_start }
|
|
106
|
+
last = offsets.rindex { |offset| offset < match_end }
|
|
107
|
+
|
|
108
|
+
combined = runs[first..last].map(&:text).join
|
|
109
|
+
local_start = match_start - offsets[first]
|
|
110
|
+
local_end = match_end - offsets[first]
|
|
111
|
+
replaced = combined[local_start...local_end].sub(pattern, replacement)
|
|
112
|
+
runs[first].text = combined[0...local_start] + replaced + combined[local_end..-1]
|
|
113
|
+
((first + 1)..last).each { |index| runs[index].text = '' }
|
|
114
|
+
|
|
115
|
+
# advance past the inserted replacement so it is not re-matched
|
|
116
|
+
search_from = match_start + replaced.length
|
|
117
|
+
end
|
|
118
|
+
self
|
|
119
|
+
end
|
|
120
|
+
|
|
68
121
|
def aligned_left?
|
|
69
122
|
['left', nil].include?(alignment)
|
|
70
123
|
end
|
|
@@ -87,7 +140,7 @@ module Docx
|
|
|
87
140
|
|
|
88
141
|
def font_color
|
|
89
142
|
color_tag = @node.xpath('w:r//w:rPr//w:color').first
|
|
90
|
-
color_tag ? color_tag.attributes['val']
|
|
143
|
+
color_tag ? color_tag.attributes['val']&.value : nil
|
|
91
144
|
end
|
|
92
145
|
|
|
93
146
|
def style
|
|
@@ -13,14 +13,16 @@ module Docx
|
|
|
13
13
|
'tbl'
|
|
14
14
|
end
|
|
15
15
|
|
|
16
|
-
def initialize(node)
|
|
16
|
+
def initialize(node, document_properties = {}, doc = nil)
|
|
17
17
|
@node = node
|
|
18
18
|
@properties_tag = 'tblGrid'
|
|
19
|
+
@document_properties = document_properties
|
|
20
|
+
@document = doc
|
|
19
21
|
end
|
|
20
22
|
|
|
21
23
|
# Array of row
|
|
22
24
|
def rows
|
|
23
|
-
@node.xpath('w:tr').map {|r_node| Containers::TableRow.new(r_node) }
|
|
25
|
+
@node.xpath('w:tr').map {|r_node| Containers::TableRow.new(r_node, @document_properties, @document) }
|
|
24
26
|
end
|
|
25
27
|
|
|
26
28
|
def row_count
|
|
@@ -31,7 +33,7 @@ module Docx
|
|
|
31
33
|
def columns
|
|
32
34
|
columns_containers = []
|
|
33
35
|
(0..(column_count-1)).each do |i|
|
|
34
|
-
columns_containers[i] = Containers::TableColumn.new
|
|
36
|
+
columns_containers[i] = Containers::TableColumn.new(@node.xpath("w:tr//w:tc[#{i+1}]"), @document_properties, @document)
|
|
35
37
|
end
|
|
36
38
|
columns_containers
|
|
37
39
|
end
|
|
@@ -12,9 +12,11 @@ module Docx
|
|
|
12
12
|
'tc'
|
|
13
13
|
end
|
|
14
14
|
|
|
15
|
-
def initialize(node)
|
|
15
|
+
def initialize(node, document_properties = {}, doc = nil)
|
|
16
16
|
@node = node
|
|
17
17
|
@properties_tag = 'tcPr'
|
|
18
|
+
@document_properties = document_properties
|
|
19
|
+
@document = doc
|
|
18
20
|
end
|
|
19
21
|
|
|
20
22
|
# Return text of paragraph's cell
|
|
@@ -24,7 +26,7 @@ module Docx
|
|
|
24
26
|
|
|
25
27
|
# Array of paragraphs contained within cell
|
|
26
28
|
def paragraphs
|
|
27
|
-
@node.xpath('w:p').map {|p_node| Containers::Paragraph.new(p_node) }
|
|
29
|
+
@node.xpath('w:p').map {|p_node| Containers::Paragraph.new(p_node, @document_properties, @document) }
|
|
28
30
|
end
|
|
29
31
|
|
|
30
32
|
# Iterate over each text run within a paragraph's cell
|
|
@@ -12,10 +12,12 @@ module Docx
|
|
|
12
12
|
'w:gridCol'
|
|
13
13
|
end
|
|
14
14
|
|
|
15
|
-
def initialize(cell_nodes)
|
|
15
|
+
def initialize(cell_nodes, document_properties = {}, doc = nil)
|
|
16
16
|
@node = ''
|
|
17
17
|
@properties_tag = ''
|
|
18
|
-
@
|
|
18
|
+
@document_properties = document_properties
|
|
19
|
+
@document = doc
|
|
20
|
+
@cells = cell_nodes.map { |c_node| Containers::TableCell.new(c_node, @document_properties, @document) }
|
|
19
21
|
end
|
|
20
22
|
|
|
21
23
|
# Array of cells contained within row
|
|
@@ -12,14 +12,16 @@ module Docx
|
|
|
12
12
|
'tr'
|
|
13
13
|
end
|
|
14
14
|
|
|
15
|
-
def initialize(node)
|
|
15
|
+
def initialize(node, document_properties = {}, doc = nil)
|
|
16
16
|
@node = node
|
|
17
17
|
@properties_tag = ''
|
|
18
|
+
@document_properties = document_properties
|
|
19
|
+
@document = doc
|
|
18
20
|
end
|
|
19
21
|
|
|
20
22
|
# Array of cells contained within row
|
|
21
23
|
def cells
|
|
22
|
-
@node.xpath('w:tc').map {|c_node| Containers::TableCell.new(c_node) }
|
|
24
|
+
@node.xpath('w:tc').map {|c_node| Containers::TableCell.new(c_node, @document_properties, @document) }
|
|
23
25
|
end
|
|
24
26
|
|
|
25
27
|
end
|
data/lib/docx/document.rb
CHANGED
|
@@ -27,6 +27,9 @@ module Docx
|
|
|
27
27
|
def initialize(path_or_io, options = {})
|
|
28
28
|
@replace = {}
|
|
29
29
|
|
|
30
|
+
# accept path-like objects (e.g. Pathname, File) by using their path (#101)
|
|
31
|
+
path_or_io = path_or_io.to_path if path_or_io.respond_to?(:to_path)
|
|
32
|
+
|
|
30
33
|
# if path-or_io is string && does not contain a null byte
|
|
31
34
|
if (path_or_io.instance_of?(String) && !/\u0000/.match?(path_or_io))
|
|
32
35
|
@zip = Zip::File.open(path_or_io)
|
|
@@ -40,6 +43,7 @@ module Docx
|
|
|
40
43
|
@document_xml = document.get_input_stream.read
|
|
41
44
|
@doc = Nokogiri::XML(@document_xml)
|
|
42
45
|
load_styles
|
|
46
|
+
load_rels
|
|
43
47
|
load_headers
|
|
44
48
|
load_footers
|
|
45
49
|
yield(self) if block_given?
|
|
@@ -100,7 +104,11 @@ module Docx
|
|
|
100
104
|
# Hyperlink targets are extracted from the document.xml.rels file
|
|
101
105
|
def hyperlinks
|
|
102
106
|
hyperlink_relationships.each_with_object({}) do |rel, hash|
|
|
103
|
-
|
|
107
|
+
id = rel.attributes['Id']
|
|
108
|
+
target = rel.attributes['Target']
|
|
109
|
+
next unless id && target
|
|
110
|
+
|
|
111
|
+
hash[id.value] = target.value
|
|
104
112
|
end
|
|
105
113
|
end
|
|
106
114
|
|
|
@@ -180,7 +188,7 @@ module Docx
|
|
|
180
188
|
end
|
|
181
189
|
|
|
182
190
|
def default_paragraph_style
|
|
183
|
-
@styles
|
|
191
|
+
@styles&.at_xpath("w:styles/w:style[@w:type='paragraph' and @w:default='1']/w:name/@w:val")&.value
|
|
184
192
|
end
|
|
185
193
|
|
|
186
194
|
def style_name_of(style_id)
|
|
@@ -226,18 +234,22 @@ module Docx
|
|
|
226
234
|
def load_styles
|
|
227
235
|
@styles_xml = @zip.read('word/styles.xml')
|
|
228
236
|
@styles = Nokogiri::XML(@styles_xml)
|
|
229
|
-
load_rels
|
|
230
237
|
rescue Errno::ENOENT => e
|
|
231
238
|
warn e.message
|
|
232
239
|
nil
|
|
233
240
|
end
|
|
234
241
|
|
|
242
|
+
# Loaded independently of styles so that a document without word/styles.xml
|
|
243
|
+
# still initializes @rels (see #158).
|
|
235
244
|
def load_rels
|
|
236
245
|
rels_entry = @zip.glob('word/_rels/document*.xml.rels').first
|
|
237
246
|
raise Errno::ENOENT unless rels_entry
|
|
238
247
|
|
|
239
248
|
@rels_xml = rels_entry.get_input_stream.read
|
|
240
249
|
@rels = Nokogiri::XML(@rels_xml)
|
|
250
|
+
rescue Errno::ENOENT => e
|
|
251
|
+
warn e.message
|
|
252
|
+
nil
|
|
241
253
|
end
|
|
242
254
|
|
|
243
255
|
#--
|
|
@@ -267,7 +279,7 @@ module Docx
|
|
|
267
279
|
end
|
|
268
280
|
|
|
269
281
|
def parse_table_from(t_node)
|
|
270
|
-
Elements::Containers::Table.new(t_node)
|
|
282
|
+
Elements::Containers::Table.new(t_node, document_properties, self)
|
|
271
283
|
end
|
|
272
284
|
end
|
|
273
285
|
end
|
data/lib/docx/version.rb
CHANGED