swordfish 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- MGMzODc2Y2FlNzUzMzdiMGNmMjlmODA1MjUwYjg3MWJhMzYwZWViOQ==
4
+ NDQyYjhkZjNiNTRjNTE3ZWRlYmFmNWMzOGI5Nzg0OTU3MjNhMWRkOQ==
5
5
  data.tar.gz: !binary |-
6
- ODgzMmMyNDBkNTEzZDg1M2NkNWFiY2RlN2VlZDBjYmE2M2I3NTNlYw==
6
+ YjA5YWZmZDMyMWY4NzgxMmQ1MWVhNTU4YmQ0MDA4YmU4NDUyOTRhNg==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- ZjY4M2JiYmZlZGM1MTMxY2FlYWI2MjVlOTg1YmU0OTMxZDZmOTY1MWRmZjc3
10
- ZTFiYTJiZTAyODJiZjA0OWM1MjAzNDE0YWQyODVjYTY4ZjYwYzlmNmFmM2Y1
11
- MzE5NThjZDQwMGRmNDJjNjc4YzI3NmZkNWM3Y2I3Nzc4ZjQ2MWQ=
9
+ OTg0NzAwNTFjMzhhNDYzNWY3MjgyMzQ3MDdkMDVhNjE1NWYzMTc0ZjFhZWJj
10
+ YzM1M2YzNzQ4YjEyZDg2YTQwOWY3OGJjY2JhZmE4MGUyZGQ1NmEwNzkwMDk1
11
+ N2FjMzQ2MjRjNDcwYWY3YmVmODhkMGNhMjEwMTc2OTRmYzFmODY=
12
12
  data.tar.gz: !binary |-
13
- OTRhZmZkYjQzOWQ5NGIxOGQ1NmNiNGE1MWUwNTBjYTIyMWFmOWUxNDdkN2U1
14
- MzBmYWZhM2UwOGYxNmFkN2M0ZjY1ZWI1Y2MzZjczMzgxMGU5MmY2M2MwZWVh
15
- N2QxYjIzMTkzYTdkODkzMDg2Mjg3OWMwYTcwZGQ0ZmI4Yzk1MjM=
13
+ NzRhNWRiOWFjMzAwNGNlZjQ0OTZiZWM3MDYwM2NkYzNlMDQ0YWNjYTk0MGJk
14
+ Zjk3NmZhYzA0NmVhYWJjYjFkZTg0MDY0OGY1MzFmMjM5MTE3ZjgwNGU5NTVk
15
+ NmEwZWZiOGU3MGFkMTk3MTZjOGJmMGZlZGQ4YjMzMjBjNjkyM2M=
@@ -8,17 +8,20 @@ require 'swordfish/nodes/hyperlink'
8
8
  require 'swordfish/nodes/table'
9
9
  require 'swordfish/nodes/table_row'
10
10
  require 'swordfish/nodes/table_cell'
11
+ require 'swordfish/nodes/image'
11
12
 
12
13
  # Swordfish::Document is the internal representation of a parsed document.
13
14
 
14
15
  module Swordfish
15
16
  class Document
16
17
 
17
- attr_reader :nodes # An array of all top-level elements in the document
18
+ attr_reader :nodes # An array of all top-level elements in the document
19
+ attr_accessor :images # Stored image assets
18
20
 
19
21
  # On initialization, set the nodes list to an empty array
20
22
  def initialize
21
23
  @nodes = []
24
+ @images = {}
22
25
  end
23
26
 
24
27
  # Pass in a node and append it to the nodes array
@@ -30,8 +33,36 @@ module Swordfish
30
33
  end
31
34
  end
32
35
 
36
+ # Retrieve an image by name
37
+ def get_image(name)
38
+ @images[name]
39
+ end
40
+
41
+ # Save an image to a specified directory
42
+ def save_image(image, dest)
43
+ @images[image].open
44
+ File.open(dest, 'w') { |f| f.write(@images[image].read) }
45
+ @images[image].close
46
+ end
47
+
48
+ # Change the value that an image should report its source to be
49
+ def update_image_path(original_name, new_path)
50
+ find_nodes_by_type(Swordfish::Node::Image).each do |image_node|
51
+ if image_node.original_name == original_name
52
+ image_node.path = new_path
53
+ end
54
+ end
55
+ end
56
+
33
57
  def to_html
34
58
  @nodes.map(&:to_html).join
35
59
  end
60
+
61
+ private
62
+
63
+ # Return all nodes of a given type
64
+ def find_nodes_by_type(klass)
65
+ @nodes.collect{|n| n.find_nodes_by_type(klass)}.flatten
66
+ end
36
67
  end
37
68
  end
@@ -8,6 +8,8 @@ module Swordfish
8
8
  class DOCX
9
9
 
10
10
  attr_reader :swordfish_doc # The Swordfish::Document corresponding to the parsed document
11
+ attr_reader :docx_archive # The source archive
12
+ attr_reader :namespaces # A hash of XML namespaces used in this doc
11
13
 
12
14
  # Parse a document and return a Swordfish::Document object
13
15
  def self.open(filepath)
@@ -20,11 +22,12 @@ module Swordfish
20
22
  relationships = docx_archive.read 'word/_rels/document.xml.rels'
21
23
 
22
24
  # Parse the XML files and generate the Swordfish::Document
23
- swordfish_docx = new document, styles, numbering, relationships
25
+ swordfish_docx = new docx_archive, document, styles, numbering, relationships
24
26
  swordfish_docx.swordfish_doc
25
27
  end
26
28
 
27
- def initialize(document_xml, styles_xml, numbering_xml, relationships_xml)
29
+ def initialize(archive, document_xml, styles_xml, numbering_xml, relationships_xml)
30
+ @docx_archive = archive
28
31
  @swordfish_doc = Swordfish::Document.new
29
32
  parse_styles styles_xml
30
33
  parse_numbering numbering_xml
@@ -45,6 +48,7 @@ module Swordfish
45
48
  # Parse the document structure XML
46
49
  def parse(document_xml)
47
50
  @xml = Nokogiri::XML(document_xml)
51
+ @namespaces = @xml.collect_namespaces
48
52
 
49
53
  # Iterate over each element node and dispatch it to the appropriate parser
50
54
  @xml.xpath('//w:body').children.each do |node|
@@ -138,6 +142,14 @@ module Swordfish
138
142
  end
139
143
  end
140
144
 
145
+ # Extract an image resource as a tempfile
146
+ def read_image(image_name)
147
+ tempfile = Tempfile.new(image_name)
148
+ tempfile.write @docx_archive.get_input_stream("word/media/#{image_name}").read
149
+ tempfile.close
150
+ tempfile
151
+ end
152
+
141
153
  # NODE PARSERS
142
154
  # Each of the methods below (beginning with '_node') are specialized parsers for handling
143
155
  # a particular type of XML element.
@@ -151,13 +163,22 @@ module Swordfish
151
163
  case run_xml.name
152
164
  when 'r'
153
165
  # A true run node
154
- text = Swordfish::Node::Text.new
155
166
  if run_xml.xpath('./w:t').length > 0
156
167
  # Only examine the run if it includes text codes. The run may also include
157
168
  # things like comment nodes, which should be ignored.
169
+ text = Swordfish::Node::Text.new
158
170
  text.content = run_xml.xpath('./w:t')[0].content
159
171
  get_styles_for_node(text, run_xml.xpath('./w:rPr')[0])
160
172
  texts << text
173
+ elsif run_xml.xpath('.//pic:pic', :pic => @namespaces['xmlns:pic']).length > 0
174
+ # An image run
175
+ image = Swordfish::Node::Image.new
176
+ relationship_id = run_xml.xpath('.//pic:pic/pic:blipFill/a:blip', :pic => @namespaces['xmlns:pic'], :a => @namespaces['xmlns:a'])[0]['r:embed'] rescue nil
177
+ if relationship_id
178
+ image.original_name = @relationships[relationship_id].split('/').last
179
+ @swordfish_doc.images[image.original_name] = read_image(image.original_name)
180
+ texts << image
181
+ end
161
182
  end
162
183
  when 'hyperlink'
163
184
  # Hyperlink nodes are placed amongst other run nodes, but
@@ -60,6 +60,13 @@ module Swordfish
60
60
  end
61
61
  end
62
62
 
63
+ # Find all descendant nodes of a given type
64
+ def find_nodes_by_type(klass)
65
+ nodes = @children.collect{|n| n.find_nodes_by_type(klass)}.flatten
66
+ nodes << self if self.is_a?(klass)
67
+ nodes.compact
68
+ end
69
+
63
70
  end
64
71
 
65
72
  class BadContentError < Exception
@@ -0,0 +1,26 @@
1
+ # An image node
2
+ # Actual image data is stored at the document level, and can be
3
+ # retrieved by calling get_image(image_image) on the document
4
+ # object.
5
+
6
+ module Swordfish
7
+ module Node
8
+ class Image < Base
9
+
10
+ # @original_name holds the name of the file as it is reported by the source document
11
+ attr_accessor :original_name
12
+ # @path holds a new name for the image that must be assigned explicitly
13
+ attr_accessor :path
14
+
15
+ # Override Base append because an image node should never have children
16
+ def append(node)
17
+ raise BadContentError
18
+ end
19
+
20
+ def to_html
21
+ "<img src='#{@path ? @path : @original_name}'>"
22
+ end
23
+
24
+ end
25
+ end
26
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: swordfish
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Martin Posthumus
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-05-27 00:00:00.000000000 Z
11
+ date: 2014-06-01 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -65,6 +65,7 @@ files:
65
65
  - lib/swordfish/formats/docx.rb
66
66
  - lib/swordfish/nodes/base.rb
67
67
  - lib/swordfish/nodes/hyperlink.rb
68
+ - lib/swordfish/nodes/image.rb
68
69
  - lib/swordfish/nodes/list.rb
69
70
  - lib/swordfish/nodes/list_item.rb
70
71
  - lib/swordfish/nodes/paragraph.rb