swordfish 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/lib/swordfish/document.rb +32 -1
- data/lib/swordfish/formats/docx.rb +24 -3
- data/lib/swordfish/nodes/base.rb +7 -0
- data/lib/swordfish/nodes/image.rb +26 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
NDQyYjhkZjNiNTRjNTE3ZWRlYmFmNWMzOGI5Nzg0OTU3MjNhMWRkOQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
YjA5YWZmZDMyMWY4NzgxMmQ1MWVhNTU4YmQ0MDA4YmU4NDUyOTRhNg==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
OTg0NzAwNTFjMzhhNDYzNWY3MjgyMzQ3MDdkMDVhNjE1NWYzMTc0ZjFhZWJj
|
10
|
+
YzM1M2YzNzQ4YjEyZDg2YTQwOWY3OGJjY2JhZmE4MGUyZGQ1NmEwNzkwMDk1
|
11
|
+
N2FjMzQ2MjRjNDcwYWY3YmVmODhkMGNhMjEwMTc2OTRmYzFmODY=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
NzRhNWRiOWFjMzAwNGNlZjQ0OTZiZWM3MDYwM2NkYzNlMDQ0YWNjYTk0MGJk
|
14
|
+
Zjk3NmZhYzA0NmVhYWJjYjFkZTg0MDY0OGY1MzFmMjM5MTE3ZjgwNGU5NTVk
|
15
|
+
NmEwZWZiOGU3MGFkMTk3MTZjOGJmMGZlZGQ4YjMzMjBjNjkyM2M=
|
data/lib/swordfish/document.rb
CHANGED
@@ -8,17 +8,20 @@ require 'swordfish/nodes/hyperlink'
|
|
8
8
|
require 'swordfish/nodes/table'
|
9
9
|
require 'swordfish/nodes/table_row'
|
10
10
|
require 'swordfish/nodes/table_cell'
|
11
|
+
require 'swordfish/nodes/image'
|
11
12
|
|
12
13
|
# Swordfish::Document is the internal representation of a parsed document.
|
13
14
|
|
14
15
|
module Swordfish
|
15
16
|
class Document
|
16
17
|
|
17
|
-
attr_reader :nodes
|
18
|
+
attr_reader :nodes # An array of all top-level elements in the document
|
19
|
+
attr_accessor :images # Stored image assets
|
18
20
|
|
19
21
|
# On initialization, set the nodes list to an empty array
|
20
22
|
def initialize
|
21
23
|
@nodes = []
|
24
|
+
@images = {}
|
22
25
|
end
|
23
26
|
|
24
27
|
# Pass in a node and append it to the nodes array
|
@@ -30,8 +33,36 @@ module Swordfish
|
|
30
33
|
end
|
31
34
|
end
|
32
35
|
|
36
|
+
# Retrieve an image by name
|
37
|
+
def get_image(name)
|
38
|
+
@images[name]
|
39
|
+
end
|
40
|
+
|
41
|
+
# Save an image to a specified directory
|
42
|
+
def save_image(image, dest)
|
43
|
+
@images[image].open
|
44
|
+
File.open(dest, 'w') { |f| f.write(@images[image].read) }
|
45
|
+
@images[image].close
|
46
|
+
end
|
47
|
+
|
48
|
+
# Change the value that an image should report its source to be
|
49
|
+
def update_image_path(original_name, new_path)
|
50
|
+
find_nodes_by_type(Swordfish::Node::Image).each do |image_node|
|
51
|
+
if image_node.original_name == original_name
|
52
|
+
image_node.path = new_path
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
33
57
|
def to_html
|
34
58
|
@nodes.map(&:to_html).join
|
35
59
|
end
|
60
|
+
|
61
|
+
private
|
62
|
+
|
63
|
+
# Return all nodes of a given type
|
64
|
+
def find_nodes_by_type(klass)
|
65
|
+
@nodes.collect{|n| n.find_nodes_by_type(klass)}.flatten
|
66
|
+
end
|
36
67
|
end
|
37
68
|
end
|
@@ -8,6 +8,8 @@ module Swordfish
|
|
8
8
|
class DOCX
|
9
9
|
|
10
10
|
attr_reader :swordfish_doc # The Swordfish::Document corresponding to the parsed document
|
11
|
+
attr_reader :docx_archive # The source archive
|
12
|
+
attr_reader :namespaces # A hash of XML namespaces used in this doc
|
11
13
|
|
12
14
|
# Parse a document and return a Swordfish::Document object
|
13
15
|
def self.open(filepath)
|
@@ -20,11 +22,12 @@ module Swordfish
|
|
20
22
|
relationships = docx_archive.read 'word/_rels/document.xml.rels'
|
21
23
|
|
22
24
|
# Parse the XML files and generate the Swordfish::Document
|
23
|
-
swordfish_docx = new document, styles, numbering, relationships
|
25
|
+
swordfish_docx = new docx_archive, document, styles, numbering, relationships
|
24
26
|
swordfish_docx.swordfish_doc
|
25
27
|
end
|
26
28
|
|
27
|
-
def initialize(document_xml, styles_xml, numbering_xml, relationships_xml)
|
29
|
+
def initialize(archive, document_xml, styles_xml, numbering_xml, relationships_xml)
|
30
|
+
@docx_archive = archive
|
28
31
|
@swordfish_doc = Swordfish::Document.new
|
29
32
|
parse_styles styles_xml
|
30
33
|
parse_numbering numbering_xml
|
@@ -45,6 +48,7 @@ module Swordfish
|
|
45
48
|
# Parse the document structure XML
|
46
49
|
def parse(document_xml)
|
47
50
|
@xml = Nokogiri::XML(document_xml)
|
51
|
+
@namespaces = @xml.collect_namespaces
|
48
52
|
|
49
53
|
# Iterate over each element node and dispatch it to the appropriate parser
|
50
54
|
@xml.xpath('//w:body').children.each do |node|
|
@@ -138,6 +142,14 @@ module Swordfish
|
|
138
142
|
end
|
139
143
|
end
|
140
144
|
|
145
|
+
# Extract an image resource as a tempfile
|
146
|
+
def read_image(image_name)
|
147
|
+
tempfile = Tempfile.new(image_name)
|
148
|
+
tempfile.write @docx_archive.get_input_stream("word/media/#{image_name}").read
|
149
|
+
tempfile.close
|
150
|
+
tempfile
|
151
|
+
end
|
152
|
+
|
141
153
|
# NODE PARSERS
|
142
154
|
# Each of the methods below (beginning with '_node') are specialized parsers for handling
|
143
155
|
# a particular type of XML element.
|
@@ -151,13 +163,22 @@ module Swordfish
|
|
151
163
|
case run_xml.name
|
152
164
|
when 'r'
|
153
165
|
# A true run node
|
154
|
-
text = Swordfish::Node::Text.new
|
155
166
|
if run_xml.xpath('./w:t').length > 0
|
156
167
|
# Only examine the run if it includes text codes. The run may also include
|
157
168
|
# things like comment nodes, which should be ignored.
|
169
|
+
text = Swordfish::Node::Text.new
|
158
170
|
text.content = run_xml.xpath('./w:t')[0].content
|
159
171
|
get_styles_for_node(text, run_xml.xpath('./w:rPr')[0])
|
160
172
|
texts << text
|
173
|
+
elsif run_xml.xpath('.//pic:pic', :pic => @namespaces['xmlns:pic']).length > 0
|
174
|
+
# An image run
|
175
|
+
image = Swordfish::Node::Image.new
|
176
|
+
relationship_id = run_xml.xpath('.//pic:pic/pic:blipFill/a:blip', :pic => @namespaces['xmlns:pic'], :a => @namespaces['xmlns:a'])[0]['r:embed'] rescue nil
|
177
|
+
if relationship_id
|
178
|
+
image.original_name = @relationships[relationship_id].split('/').last
|
179
|
+
@swordfish_doc.images[image.original_name] = read_image(image.original_name)
|
180
|
+
texts << image
|
181
|
+
end
|
161
182
|
end
|
162
183
|
when 'hyperlink'
|
163
184
|
# Hyperlink nodes are placed amongst other run nodes, but
|
data/lib/swordfish/nodes/base.rb
CHANGED
@@ -60,6 +60,13 @@ module Swordfish
|
|
60
60
|
end
|
61
61
|
end
|
62
62
|
|
63
|
+
# Find all descendant nodes of a given type
|
64
|
+
def find_nodes_by_type(klass)
|
65
|
+
nodes = @children.collect{|n| n.find_nodes_by_type(klass)}.flatten
|
66
|
+
nodes << self if self.is_a?(klass)
|
67
|
+
nodes.compact
|
68
|
+
end
|
69
|
+
|
63
70
|
end
|
64
71
|
|
65
72
|
class BadContentError < Exception
|
@@ -0,0 +1,26 @@
|
|
1
|
+
# An image node
|
2
|
+
# Actual image data is stored at the document level, and can be
|
3
|
+
# retrieved by calling get_image(image_image) on the document
|
4
|
+
# object.
|
5
|
+
|
6
|
+
module Swordfish
|
7
|
+
module Node
|
8
|
+
class Image < Base
|
9
|
+
|
10
|
+
# @original_name holds the name of the file as it is reported by the source document
|
11
|
+
attr_accessor :original_name
|
12
|
+
# @path holds a new name for the image that must be assigned explicitly
|
13
|
+
attr_accessor :path
|
14
|
+
|
15
|
+
# Override Base append because an image node should never have children
|
16
|
+
def append(node)
|
17
|
+
raise BadContentError
|
18
|
+
end
|
19
|
+
|
20
|
+
def to_html
|
21
|
+
"<img src='#{@path ? @path : @original_name}'>"
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: swordfish
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Martin Posthumus
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-06-01 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -65,6 +65,7 @@ files:
|
|
65
65
|
- lib/swordfish/formats/docx.rb
|
66
66
|
- lib/swordfish/nodes/base.rb
|
67
67
|
- lib/swordfish/nodes/hyperlink.rb
|
68
|
+
- lib/swordfish/nodes/image.rb
|
68
69
|
- lib/swordfish/nodes/list.rb
|
69
70
|
- lib/swordfish/nodes/list_item.rb
|
70
71
|
- lib/swordfish/nodes/paragraph.rb
|