swordfish 0.0.5 → 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/lib/swordfish/document.rb +8 -2
- data/lib/swordfish/formats/docx.rb +45 -11
- data/lib/swordfish/nodes/paragraph.rb +1 -1
- data/lib/swordfish/nodes/text.rb +14 -8
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
OGRlYTY4Yjg5NmY5N2QzMWU2Yzg0ZjAxMDAwM2VmZjUwNThhNDMzOA==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
NGUzNTViOGMwMTJmZmFjOGE4YTA1NzU3MGQwYTMyY2I3YmYzZjhlNQ==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
YWI3YmFiZGJiYzk3NTRiNTAwNjljZmQ5ODA2NjY0MjMzNjljNTVmYWU3ZmUx
|
10
|
+
OGYwMTljZjU3YjY3MmYzYjcwYmE3NGYzZDc3ZThlNmNmOWJlZDA2YTg2NGFm
|
11
|
+
YzQwMmY0ZmZkMDM4NmQ1OTE4OGZmMjdkODEwZjE3NTUxYWFhZmI=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
YTAxYmU1NDBjNzIwNWNhZWRjY2UzMjIwMGQ5ZTk0ZjgwMmVhMzQ5YjA0Yjhh
|
14
|
+
NWEyYzQxZDMzYzEyOWViYjQ2Y2RjZjg3OTkzMjRkYTc0NWZmOGIyMzVlNWZj
|
15
|
+
M2ZlN2U3MDE0YjZhYzkwMjA0MDQ2Y2FmNDViNDMwMmU1NDM5Mzk=
|
data/lib/swordfish/document.rb
CHANGED
@@ -157,11 +157,16 @@ module Swordfish
|
|
157
157
|
# The 'run' is the basic unit of text in Office OpenXML. A paragraph, table cell, or other
|
158
158
|
# block element may contain one or more runs, and each run has an associated set of styles.
|
159
159
|
texts = []
|
160
|
-
node
|
160
|
+
# A complex field is a special type of node spanning multiple runs, where most of the runs
|
161
|
+
# designate a special control flow rather than normal text.
|
162
|
+
complex_field = nil
|
163
|
+
|
164
|
+
nodes = node.is_a?(Array) ? node : node.children
|
165
|
+
nodes.each_with_index do |run_xml, idx|
|
161
166
|
case run_xml.name
|
162
167
|
when 'r'
|
163
|
-
|
164
|
-
|
168
|
+
if run_xml.xpath('./w:t').length > 0 && complex_field.nil?
|
169
|
+
# A True run node
|
165
170
|
# Only examine the run if it includes text codes. The run may also include
|
166
171
|
# things like comment nodes, which should be ignored.
|
167
172
|
text = Swordfish::Node::Text.new
|
@@ -177,6 +182,36 @@ module Swordfish
|
|
177
182
|
@swordfish_doc.images[image.original_name] = read_image(image.original_name)
|
178
183
|
texts << image
|
179
184
|
end
|
185
|
+
elsif run_xml.xpath('./w:fldChar').length > 0 || complex_field
|
186
|
+
# A complex field
|
187
|
+
case
|
188
|
+
when run_xml.xpath('./w:fldChar').length > 0 && run_xml.xpath('./w:fldChar')[0]['w:fldCharType'] == 'begin'
|
189
|
+
# Start the complex field
|
190
|
+
complex_field = true
|
191
|
+
when run_xml.xpath('./w:instrText').length > 0
|
192
|
+
# An instruction run, defining the complex field's behavior
|
193
|
+
instruction = run_xml.xpath('./w:instrText')[0].content
|
194
|
+
if instruction =~ /^\s*HYPERLINK/
|
195
|
+
# A hyperlink
|
196
|
+
complex_field = Swordfish::Node::Hyperlink.new
|
197
|
+
complex_field.href = instruction.match(/^\s*HYPERLINK "([^"]+)"/).captures[0]
|
198
|
+
else
|
199
|
+
# Anything else
|
200
|
+
complex_field = Swordfish::Node::Text.new
|
201
|
+
end
|
202
|
+
when run_xml.xpath('./w:t').length > 0 && complex_field.children.length.zero?
|
203
|
+
# The textual content
|
204
|
+
complex_field.append(_node_parse_runs(nodes.to_a[idx..-1]))
|
205
|
+
when run_xml.xpath('./w:fldChar').length > 0 && run_xml.xpath('./w:fldChar')[0]['w:fldCharType'] == 'end'
|
206
|
+
# End the complex field
|
207
|
+
if complex_field
|
208
|
+
texts << complex_field
|
209
|
+
complex_field = nil
|
210
|
+
else
|
211
|
+
# Handle the case where _node_parse_runs gets called from within a complex field
|
212
|
+
return texts
|
213
|
+
end
|
214
|
+
end
|
180
215
|
end
|
181
216
|
when 'hyperlink'
|
182
217
|
# Hyperlink nodes are placed amongst other run nodes, but
|
@@ -189,16 +224,15 @@ module Swordfish
|
|
189
224
|
end
|
190
225
|
end
|
191
226
|
# Clean up runs by merging them if they have identical styles
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
to_delete << text
|
198
|
-
end
|
227
|
+
texts = texts.reduce([]) do |memo, run|
|
228
|
+
if memo.length > 0 && memo.last.is_a?(Swordfish::Node::Text) && run.is_a?(Swordfish::Node::Text) && memo.last.style == run.style
|
229
|
+
memo.last.content += run.content
|
230
|
+
else
|
231
|
+
memo << run
|
199
232
|
end
|
233
|
+
memo
|
200
234
|
end
|
201
|
-
|
235
|
+
|
202
236
|
texts
|
203
237
|
end
|
204
238
|
|
data/lib/swordfish/nodes/text.rb
CHANGED
@@ -11,15 +11,21 @@ module Swordfish
|
|
11
11
|
|
12
12
|
def to_html
|
13
13
|
@content ||= ""
|
14
|
+
@content.gsub!(/[[:space:]]/, ' ')
|
15
|
+
leading_space = !!@content.lstrip! # If there is a leading or trailing space,
|
16
|
+
trailing_space = !!@content.rstrip! # shift it outside of any formatting tags
|
14
17
|
html = CGI::escapeHTML(@content)
|
15
|
-
html
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
18
|
+
if html.length > 0
|
19
|
+
html = "<i>#{html}</i>" if @style.italic?
|
20
|
+
html = "<b>#{html}</b>" if @style.bold?
|
21
|
+
html = "<u>#{html}</u>" if @style.underline?
|
22
|
+
html = "<strike>#{html}</strike>" if @style.strikethrough?
|
23
|
+
html = "<sup>#{html}</sup>" if @style.superscript?
|
24
|
+
html = "<sub>#{html}</sub>" if @style.subscript?
|
25
|
+
html = "<strong>#{html}</strong>" if @style.strong?
|
26
|
+
html = "<em>#{html}</em>" if @style.emphasis?
|
27
|
+
end
|
28
|
+
html = "#{' ' if leading_space}#{html}#{' ' if trailing_space}"
|
23
29
|
html
|
24
30
|
end
|
25
31
|
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: swordfish
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Martin Posthumus
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-06-
|
11
|
+
date: 2014-06-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|