swordfish 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- NDJiOGRjNGUyYjg3OWRjZWYwNTJhNWI5YTQ0NTgyMWUxMTdlMWJiOQ==
4
+ OGRlYTY4Yjg5NmY5N2QzMWU2Yzg0ZjAxMDAwM2VmZjUwNThhNDMzOA==
5
5
  data.tar.gz: !binary |-
6
- ZGJkYTgxMDUzZGRiZTU3M2I1MTEwMjJhNGE1NzZmMzlhNGNhZTFlOQ==
6
+ NGUzNTViOGMwMTJmZmFjOGE4YTA1NzU3MGQwYTMyY2I3YmYzZjhlNQ==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- YmY4YzdlODI5NGUwYzFjOTliZmJmZmRlZjc4Njc4YzlhMmQ3MGZiYjA5NGZj
10
- NmE4YjdlNTYxNmM1MzM1MWFmMjUxOTE2MDhlMGI1NTFkZTdlMWI5ZTQwOTA1
11
- MjIyZGI1NzY4YzcyNGU1NWI3MDllNTIwOGZkNmU0OWE4OTc5OGU=
9
+ YWI3YmFiZGJiYzk3NTRiNTAwNjljZmQ5ODA2NjY0MjMzNjljNTVmYWU3ZmUx
10
+ OGYwMTljZjU3YjY3MmYzYjcwYmE3NGYzZDc3ZThlNmNmOWJlZDA2YTg2NGFm
11
+ YzQwMmY0ZmZkMDM4NmQ1OTE4OGZmMjdkODEwZjE3NTUxYWFhZmI=
12
12
  data.tar.gz: !binary |-
13
- MjgzM2YxMTMzNDlkMDJlN2JhNWI4NzM4M2U4Y2IyOGI0MjkyOGY1MDEzZjNk
14
- MjYzNGYzNzBlYzY2YzdhZGExM2M4MDMzMDBmYjYxYTRlMzNlMWQ4MmY0YWIy
15
- MTllYzM2MDQwNGZiMjEwMjdmZjZhNjMxNGFlMDNhNWMzN2UwMjg=
13
+ YTAxYmU1NDBjNzIwNWNhZWRjY2UzMjIwMGQ5ZTk0ZjgwMmVhMzQ5YjA0Yjhh
14
+ NWEyYzQxZDMzYzEyOWViYjQ2Y2RjZjg3OTkzMjRkYTc0NWZmOGIyMzVlNWZj
15
+ M2ZlN2U3MDE0YjZhYzkwMjA0MDQ2Y2FmNDViNDMwMmU1NDM5Mzk=
@@ -56,8 +56,14 @@ module Swordfish
56
56
  end
57
57
  end
58
58
 
59
- def to_html
60
- @nodes.map(&:to_html).join
59
+ def to_html(opts = {})
60
+ html = @nodes.map(&:to_html).join
61
+
62
+ if opts[:pretty]
63
+ Nokogiri::HTML(html).to_html
64
+ else
65
+ html
66
+ end
61
67
  end
62
68
 
63
69
  private
@@ -157,11 +157,16 @@ module Swordfish
157
157
  # The 'run' is the basic unit of text in Office OpenXML. A paragraph, table cell, or other
158
158
  # block element may contain one or more runs, and each run has an associated set of styles.
159
159
  texts = []
160
- node.children.each do |run_xml|
160
+ # A complex field is a special type of node spanning multiple runs, where most of the runs
161
+ # designate a special control flow rather than normal text.
162
+ complex_field = nil
163
+
164
+ nodes = node.is_a?(Array) ? node : node.children
165
+ nodes.each_with_index do |run_xml, idx|
161
166
  case run_xml.name
162
167
  when 'r'
163
- # A true run node
164
- if run_xml.xpath('./w:t').length > 0
168
+ if run_xml.xpath('./w:t').length > 0 && complex_field.nil?
169
+ # A True run node
165
170
  # Only examine the run if it includes text codes. The run may also include
166
171
  # things like comment nodes, which should be ignored.
167
172
  text = Swordfish::Node::Text.new
@@ -177,6 +182,36 @@ module Swordfish
177
182
  @swordfish_doc.images[image.original_name] = read_image(image.original_name)
178
183
  texts << image
179
184
  end
185
+ elsif run_xml.xpath('./w:fldChar').length > 0 || complex_field
186
+ # A complex field
187
+ case
188
+ when run_xml.xpath('./w:fldChar').length > 0 && run_xml.xpath('./w:fldChar')[0]['w:fldCharType'] == 'begin'
189
+ # Start the complex field
190
+ complex_field = true
191
+ when run_xml.xpath('./w:instrText').length > 0
192
+ # An instruction run, defining the complex field's behavior
193
+ instruction = run_xml.xpath('./w:instrText')[0].content
194
+ if instruction =~ /^\s*HYPERLINK/
195
+ # A hyperlink
196
+ complex_field = Swordfish::Node::Hyperlink.new
197
+ complex_field.href = instruction.match(/^\s*HYPERLINK "([^"]+)"/).captures[0]
198
+ else
199
+ # Anything else
200
+ complex_field = Swordfish::Node::Text.new
201
+ end
202
+ when run_xml.xpath('./w:t').length > 0 && complex_field.children.length.zero?
203
+ # The textual content
204
+ complex_field.append(_node_parse_runs(nodes.to_a[idx..-1]))
205
+ when run_xml.xpath('./w:fldChar').length > 0 && run_xml.xpath('./w:fldChar')[0]['w:fldCharType'] == 'end'
206
+ # End the complex field
207
+ if complex_field
208
+ texts << complex_field
209
+ complex_field = nil
210
+ else
211
+ # Handle the case where _node_parse_runs gets called from within a complex field
212
+ return texts
213
+ end
214
+ end
180
215
  end
181
216
  when 'hyperlink'
182
217
  # Hyperlink nodes are placed amongst other run nodes, but
@@ -189,16 +224,15 @@ module Swordfish
189
224
  end
190
225
  end
191
226
  # Clean up runs by merging them if they have identical styles
192
- to_delete = []
193
- texts.each_with_index do |text, idx|
194
- if idx > 0
195
- if text.is_a?(Swordfish::Node::Text) && texts[idx-1].is_a?(Swordfish::Node::Text) && text.style == texts[idx-1].style
196
- texts[idx-1].content += text.content
197
- to_delete << text
198
- end
227
+ texts = texts.reduce([]) do |memo, run|
228
+ if memo.length > 0 && memo.last.is_a?(Swordfish::Node::Text) && run.is_a?(Swordfish::Node::Text) && memo.last.style == run.style
229
+ memo.last.content += run.content
230
+ else
231
+ memo << run
199
232
  end
233
+ memo
200
234
  end
201
- texts.reject! {|t| to_delete.include?(t) }
235
+
202
236
  texts
203
237
  end
204
238
 
@@ -12,7 +12,7 @@ module Swordfish
12
12
  @children.map(&:to_html).join
13
13
  else
14
14
  text = @children.map(&:to_html).join
15
- "<p>#{text}</p>" unless text.length.zero?
15
+ "<p>#{text}</p>" unless text =~ /^[[:space:]]*$/
16
16
  end
17
17
  end
18
18
 
@@ -11,15 +11,21 @@ module Swordfish
11
11
 
12
12
  def to_html
13
13
  @content ||= ""
14
+ @content.gsub!(/[[:space:]]/, ' ')
15
+ leading_space = !!@content.lstrip! # If there is a leading or trailing space,
16
+ trailing_space = !!@content.rstrip! # shift it outside of any formatting tags
14
17
  html = CGI::escapeHTML(@content)
15
- html = "<i>#{html}</i>" if @style.italic?
16
- html = "<b>#{html}</b>" if @style.bold?
17
- html = "<u>#{html}</u>" if @style.underline?
18
- html = "<strike>#{html}</strike>" if @style.strikethrough?
19
- html = "<sup>#{html}</sup>" if @style.superscript?
20
- html = "<sub>#{html}</sub>" if @style.subscript?
21
- html = "<strong>#{html}</strong>" if @style.strong?
22
- html = "<em>#{html}</em>" if @style.emphasis?
18
+ if html.length > 0
19
+ html = "<i>#{html}</i>" if @style.italic?
20
+ html = "<b>#{html}</b>" if @style.bold?
21
+ html = "<u>#{html}</u>" if @style.underline?
22
+ html = "<strike>#{html}</strike>" if @style.strikethrough?
23
+ html = "<sup>#{html}</sup>" if @style.superscript?
24
+ html = "<sub>#{html}</sub>" if @style.subscript?
25
+ html = "<strong>#{html}</strong>" if @style.strong?
26
+ html = "<em>#{html}</em>" if @style.emphasis?
27
+ end
28
+ html = "#{' ' if leading_space}#{html}#{' ' if trailing_space}"
23
29
  html
24
30
  end
25
31
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: swordfish
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martin Posthumus
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-06-03 00:00:00.000000000 Z
11
+ date: 2014-06-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler