swordfish 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,15 @@
1
+ ---
2
+ !binary "U0hBMQ==":
3
+ metadata.gz: !binary |-
4
+ NzVjN2FlM2EwNzY2ZWQxOTJiZjdiZmVlZTZiZDdhMTE2MjYyNWQ1Ng==
5
+ data.tar.gz: !binary |-
6
+ MTU0ZjQzMTRiOTkzMGU5NDdkMDk0MzAyZTc5NTkyNTBiNzQwOGNiMA==
7
+ SHA512:
8
+ metadata.gz: !binary |-
9
+ ZWM2M2IzZjJmMmExMGYxNGU3MTllNmE2ZjQ2YTdlZDhiOTE0YWU3YjYwYzBl
10
+ MjBiY2ZiYTE0MWQ2OWRlYTkyYzE3ZTg1Y2I4NDIwOWYxNDY3MDk1ZWM0NjYw
11
+ YzcxYmVjZjEzNWEyNjI5NmU5OWM1Y2IyZTg3YzBhNWFhNjliNjg=
12
+ data.tar.gz: !binary |-
13
+ NmI2NGQ4ZjIxOWI2YzEzNWI5OTEwMzE4NzEyYjRmZjg2MmJjOTcwMDQ1OWYx
14
+ NzZjZjBjZmEyMzhhMTcwNjEyMzE1M2RkNzM2ZWQwZTE1YzhiMjVhNWRmOTQx
15
+ NWJlZTdkMTY1MDg0YTY5NDcwMGUzODdkM2I4ODFmOWEzMWM1MjE=
data/README.md ADDED
@@ -0,0 +1,4 @@
1
+ swordfish
2
+ =====
3
+
4
+ Document parser
@@ -0,0 +1,37 @@
1
+ require 'swordfish/stylesheet'
2
+ require 'swordfish/nodes/base'
3
+ require 'swordfish/nodes/text'
4
+ require 'swordfish/nodes/paragraph'
5
+ require 'swordfish/nodes/list'
6
+ require 'swordfish/nodes/list_item'
7
+ require 'swordfish/nodes/hyperlink'
8
+ require 'swordfish/nodes/table'
9
+ require 'swordfish/nodes/table_row'
10
+ require 'swordfish/nodes/table_cell'
11
+
12
+ # Swordfish::Document is the internal representation of a parsed document.
13
+
14
+ module Swordfish
15
+ class Document
16
+
17
+ attr_reader :nodes # An array of all top-level elements in the document
18
+
19
+ # On initialization, set the nodes list to an empty array
20
+ def initialize
21
+ @nodes = []
22
+ end
23
+
24
+ # Pass in a node and append it to the nodes array
25
+ def append(node)
26
+ if Swordfish::Node.constants.include? node.class.to_s.split('::').last.to_sym
27
+ @nodes << node
28
+ else
29
+ raise ArgumentError, "Object is not a node"
30
+ end
31
+ end
32
+
33
+ def to_html
34
+ @nodes.map(&:to_html).join
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,274 @@
1
+ require 'zip'
2
+ require 'nokogiri'
3
+ require 'swordfish/document'
4
+
5
+ # Swordfish::DOCX defines a parser for .docx (Office OpenXML) formats
6
+
7
+ module Swordfish
8
+ class DOCX
9
+
10
+ attr_reader :swordfish_doc # The Swordfish::Document corresponding to the parsed document
11
+
12
+ # Parse a document and return a Swordfish::Document object
13
+ def self.open(filepath)
14
+ # .docx is a zipped file format consisting of several XML files.
15
+ # Read in the content of each needed file.
16
+ docx_archive = Zip::File.open(filepath)
17
+ document = docx_archive.read 'word/document.xml'
18
+ styles = docx_archive.read 'word/styles.xml'
19
+ numbering = docx_archive.read 'word/numbering.xml'
20
+ relationships = docx_archive.read 'word/_rels/document.xml.rels'
21
+
22
+ # Parse the XML files and generate the Swordfish::Document
23
+ swordfish_docx = new document, styles, numbering, relationships
24
+ swordfish_docx.swordfish_doc
25
+ end
26
+
27
+ def initialize(document_xml, styles_xml, numbering_xml, relationships_xml)
28
+ @swordfish_doc = Swordfish::Document.new
29
+ parse_styles styles_xml
30
+ parse_numbering numbering_xml
31
+ parse_relationships relationships_xml
32
+ parse document_xml
33
+ end
34
+
35
+ private
36
+
37
+ # Take the contents of the build buffer and flush them into the Swordfish::Document object.
38
+ # This buffer is needed for certain docx constructs that consist of multiple top-level
39
+ # elements but correspond to a single Swordfish::Node, such as lists.
40
+ def flush
41
+ @swordfish_doc.append(@buffer) if @buffer
42
+ @buffer = nil
43
+ end
44
+
45
+ # Parse the document structure XML
46
+ def parse(document_xml)
47
+ @xml = Nokogiri::XML(document_xml)
48
+
49
+ # Iterate over each element node and dispatch it to the appropriate parser
50
+ @xml.xpath('//w:body').children.each do |node|
51
+ case node.name
52
+ when 'p'
53
+ if node.xpath('.//w:numPr').length == 0
54
+ # Regular paragraph
55
+ flush
56
+ @swordfish_doc.append _node_parse_paragraph(node)
57
+ else
58
+ # List paragraph
59
+ # (Don't flush because we need to first ensure the list is fully parsed)
60
+ _node_parse_list(node)
61
+ end
62
+ when 'tbl'
63
+ flush
64
+ @swordfish_doc.append _node_parse_table(node)
65
+ end
66
+ end
67
+ flush
68
+ end
69
+
70
+ # Parse styles out of a docx element property nodeset (*Pr) and stylize the Swordfish::Node
71
+ def get_styles_for_node(swordfish_node, xml_nodeset)
72
+ return unless xml_nodeset
73
+ xml_nodeset.children.each do |style_node|
74
+ case style_node.name
75
+ when 'i'
76
+ swordfish_node.stylize :italic
77
+ when 'b'
78
+ swordfish_node.stylize :bold
79
+ when 'u'
80
+ swordfish_node.stylize :underline
81
+ when 'strike'
82
+ swordfish_node.stylize :strikethrough
83
+ when 'vertAlign'
84
+ if style_node['w:val'] == 'superscript'
85
+ swordfish_node.stylize :superscript
86
+ elsif style_node['w:val'] == 'subscript'
87
+ swordfish_node.stylize :subscript
88
+ end
89
+ end
90
+ end
91
+ end
92
+
93
+ # Parse the document styles XML
94
+ def parse_styles(styles_xml)
95
+ end
96
+
97
+ # Parse the abstract numbering XML (defining things such as list numbering)
98
+ def parse_numbering(numbering_xml)
99
+ # The XML maps a numbering ID (numId) to an abstract numbering schema ID (abstractNumId).
100
+ # The abstract numbering schema defines display formats for each level of indentation (lvl).
101
+ # This function will load up the relevant data into the @numbering class variable in the form
102
+ # of a nested hash: @numbering[numbering ID][indentation level] = number format.
103
+ @numbering = {}
104
+ xml = Nokogiri::XML(numbering_xml)
105
+ xml.xpath("//w:num").each do |num|
106
+ numId = num['w:numId'].to_i
107
+ abstractNumId = num.xpath("./w:abstractNumId")[0]['w:val'].to_i
108
+ abstract_numbering = {}
109
+ xml.xpath("//w:abstractNum[@w:abstractNumId='#{abstractNumId}']/w:lvl").each do |level_format|
110
+ level = level_format['w:ilvl'].to_i
111
+ format = level_format.xpath("./w:numFmt")[0]['w:val']
112
+ abstract_numbering[level] = format
113
+ end
114
+ @numbering[numId] = abstract_numbering
115
+ end
116
+ end
117
+
118
+ # Parse the relationships XML (defining things such as internal references and external links)
119
+ def parse_relationships(relationships_xml)
120
+ # The XML contains a list of relationships identified by an id. Each relationship includes
121
+ # a target attribute designating the reference. THis function will load up the relevant
122
+ # data into the @relationships class variable in the form of a hash:
123
+ # @relationships[relationship ID] = target URI.
124
+ @relationships = {}
125
+ xml = Nokogiri::XML(relationships_xml)
126
+ xml.css("Relationship").each do |rel| # Nokogiri doesn't seem to like XPath here for some reason
127
+ @relationships[rel['Id']] = rel['Target']
128
+ end
129
+ end
130
+
131
+ # NODE PARSERS
132
+ # Each of the methods below (beginning with '_node') are specialized parsers for handling
133
+ # a particular type of XML element.
134
+
135
+ # Parse one or more runs
136
+ def _node_parse_runs(node)
137
+ # The 'run' is the basic unit of text in Office OpenXML. A paragraph, table cell, or other
138
+ # block element may contain one or more runs, and each run has an associated set of styles.
139
+ texts = []
140
+ node.children.each do |run_xml|
141
+ case run_xml.name
142
+ when 'r'
143
+ # A true run node
144
+ text = Swordfish::Node::Text.new
145
+ text.content = run_xml.xpath('./w:t')[0].content
146
+ get_styles_for_node(text, run_xml.xpath('./w:rPr')[0])
147
+ texts << text
148
+ when 'hyperlink'
149
+ # Hyperlink nodes are placed amongst other run nodes, but
150
+ # they themselves also contain runs. Hyperlinks include
151
+ # a relationship ID attribute defining their reference.
152
+ link = Swordfish::Node::Hyperlink.new
153
+ link.href = @relationships[run_xml['r:id']]
154
+ _node_parse_runs(run_xml).each {|r| link.append(r)}
155
+ texts << link
156
+ end
157
+ end
158
+ texts
159
+ end
160
+
161
+ # Parse a paragraph
162
+ def _node_parse_paragraph(node)
163
+ paragraph = Swordfish::Node::Paragraph.new
164
+ _node_parse_runs(node).each {|r| paragraph.append(r)}
165
+ paragraph
166
+ end
167
+
168
+ # Parse a list
169
+ def _node_parse_list(node)
170
+ # In Office OpenXML, a list is not a distinct element type, but rather a
171
+ # specialized paragraph that references an abstract numbering scheme
172
+ # and includes an indentation level. As a result, the build buffer
173
+ # must be used to assemble the Swordfish::Node representation of the list,
174
+ # since the only way to tell the list has been fully parsed is to encounter
175
+ # a non-list element.
176
+
177
+ # Get the list item's abstract numbering and level
178
+ list_item = Swordfish::Node::ListItem.new
179
+ _node_parse_runs(node).each {|r| list_item.append(r)}
180
+ level = node.xpath(".//w:numPr/w:ilvl")[0]['w:val'].to_i
181
+ numbering_scheme = node.xpath(".//w:numPr/w:numId")[0]['w:val'].to_i
182
+
183
+ # If the build buffer is empty, this is a new list
184
+ unless @buffer
185
+ @buffer = Swordfish::Node::List.new
186
+ @buffer.stylize @numbering[numbering_scheme][level].to_sym
187
+ end
188
+
189
+ # Compare the level of this list item to the bottommost node in
190
+ # the build buffer to determine where in the hierarchy to add
191
+ # this node (i.e., are we dealing with list nesting or not?)
192
+ if @buffer.depth_of_final_node >= level
193
+ # Add sibling to existing list
194
+ target = @buffer
195
+ level.times do
196
+ target = target.last_list_item.nested_list
197
+ end
198
+ target.append list_item
199
+ elsif @buffer.depth_of_final_node < level
200
+ # Add new nested list
201
+ target = @buffer
202
+ (level - 1).times do
203
+ target = target.last_list_item.nested_list
204
+ end
205
+ list = Swordfish::Node::List.new
206
+ list.append list_item
207
+ list.stylize @numbering[numbering_scheme][level].to_sym
208
+ target.last_list_item.append list
209
+ end
210
+ end
211
+
212
+ # Parse a table
213
+ def _node_parse_table(node)
214
+ table = Swordfish::Node::Table.new
215
+ node.xpath("./w:tr").each do |row|
216
+ table.append _node_parse_table_row(row)
217
+ end
218
+ table
219
+ end
220
+
221
+ # Parse a table row
222
+ def _node_parse_table_row(node)
223
+ row = Swordfish::Node::TableRow.new
224
+ node.xpath('./w:tc').each do |cell|
225
+ row.append _node_parse_table_cell(cell)
226
+ end
227
+ row
228
+ end
229
+
230
+ # Parse a table cell
231
+ def _node_parse_table_cell(node)
232
+ # In a Swordfish::Node::Table object, the number of table cells must equal the
233
+ # total number of rows times the total number of columns; that is, even if
234
+ # two cells are merged together, there must be a Swordfish::Node::TableCell for
235
+ # each one. Merges are defined using the "merge_up" and "merge_left" properties.
236
+
237
+ cell = Swordfish::Node::TableCell.new
238
+ extra_cells = []
239
+
240
+ # Get the inner content of the cell
241
+ node.xpath("./w:p").each do |paragraph|
242
+ cell.append _node_parse_paragraph(paragraph)
243
+ end
244
+
245
+ # Determine whether this cell spans multiple rows. In Office OpenXML,
246
+ # a table cell is defined in every row, even if the cell is vertically-merged. The representation
247
+ # of the merged cell within each row is given a vMerge property, with the topmost one also
248
+ # having a vMerge value of "restart", and the others having no vMerge value.
249
+ if node.xpath("./w:tcPr/w:vMerge").length > 0 && node.xpath("./w:tcPr/w:vMerge")[0]['w:val'].nil?
250
+ cell.merge_up = true
251
+ end
252
+
253
+ # Determine whether this cell spans multiple columns. Unlike with vertical merges,
254
+ # a horizontally-merged Office OpenXML cell is only defined once, but is given a gridSpan
255
+ # property defining the number of columns it spans. Since Swordfish requires a cell for each
256
+ # column, loop to generate the additional cells, and set their merge_left values appropriately.
257
+ if node.xpath("./w:tcPr/w:gridSpan").length > 0
258
+ node.xpath("./w:tcPr/w:gridSpan")[0]['w:val'].to_i.-(1).times do
259
+ c = Swordfish::Node::TableCell.new
260
+ c.merge_left = true
261
+ extra_cells << c
262
+ end
263
+ end
264
+
265
+ # Return the generated cell or cells
266
+ if extra_cells.empty?
267
+ return cell
268
+ else
269
+ return [cell] + extra_cells
270
+ end
271
+ end
272
+
273
+ end
274
+ end
@@ -0,0 +1,47 @@
1
+ # Superclass for all Swordfish::Node objects
2
+
3
+ module Swordfish
4
+ module Node
5
+ class Base
6
+
7
+ attr_accessor :content
8
+ attr_reader :children
9
+ attr_reader :style
10
+
11
+ # Initialize with a blank stylesheet and no children
12
+ def initialize
13
+ @style = Swordfish::Stylesheet.new []
14
+ @children = []
15
+ end
16
+
17
+ # Append a node or nodes to this node as a child
18
+ def append(node)
19
+ @children ||= []
20
+ @children << node
21
+ @children.flatten!
22
+ end
23
+
24
+ # Take a style or styles and add them to this node's stylesheet
25
+ def stylize(styles)
26
+ @style.merge styles
27
+ end
28
+
29
+ # Every subclass must implement to_html in order to be converted to HTML
30
+ def to_html
31
+ raise NotImplementedError
32
+ end
33
+
34
+ # Given a hash, create instance variables for each key in that hash.
35
+ # This is used for communication between nodes in the hierarchy.
36
+ def inform!(hash)
37
+ hash.each do |k, v|
38
+ instance_variable_set "@#{k}", v
39
+ end
40
+ end
41
+
42
+ end
43
+
44
+ class BadContentError < Exception
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,15 @@
1
+ # A hyperlink node.
2
+
3
+ module Swordfish
4
+ module Node
5
+ class Hyperlink < Base
6
+
7
+ attr_accessor :href
8
+
9
+ def to_html
10
+ "<a href='#{@href}'>#{@children.map(&:to_html).join}</a>"
11
+ end
12
+
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,33 @@
1
+ # A list node
2
+
3
+ module Swordfish
4
+ module Node
5
+ class List < Base
6
+
7
+ def to_html
8
+ if @style.bullet?
9
+ "<ul>#{@children.map(&:to_html).join}</ul>"
10
+ else
11
+ "<ol>#{@children.map(&:to_html).join}</ol>"
12
+ end
13
+ end
14
+
15
+ # Get the zero-indexed depth of the bottommost child list
16
+ # (This is not the deepest node, just the last child)
17
+ def depth_of_final_node
18
+ depth = 0
19
+ node = self
20
+ while !@children.empty? && node = node.last_list_item.nested_list do
21
+ depth += 1
22
+ end
23
+ depth
24
+ end
25
+
26
+ # Return the final child list item (no nesting)
27
+ def last_list_item
28
+ @children.last
29
+ end
30
+
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,18 @@
1
+ # A list item node
2
+
3
+ module Swordfish
4
+ module Node
5
+ class ListItem < Base
6
+
7
+ def to_html
8
+ "<li>#{@children.map(&:to_html).join}</li>"
9
+ end
10
+
11
+ # Return the nested list, or nil if this list item has no nested lists
12
+ def nested_list
13
+ @children.last.is_a?(Swordfish::Node::List) ? @children.last : nil
14
+ end
15
+
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,17 @@
1
+ # A paragraph node
2
+
3
+ module Swordfish
4
+ module Node
5
+ class Paragraph < Base
6
+
7
+ def to_html
8
+ if @content
9
+ "<p>#{@content}</p>"
10
+ else
11
+ "<p>#{@children.map(&:to_html).join}</p>"
12
+ end
13
+ end
14
+
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,85 @@
1
+ # A table node
2
+
3
+ module Swordfish
4
+ module Node
5
+ class Table < Base
6
+
7
+ # Get the number of rows in the table
8
+ def rows
9
+ @children.length
10
+ end
11
+
12
+ # Get the number of columns in the table
13
+ def cols
14
+ @children[0].children.length
15
+ end
16
+
17
+ # Return the TableCell object at a given position
18
+ def cell_at(row, col)
19
+ @children[row].children[col]
20
+ end
21
+
22
+ def to_html
23
+ collapse_cells!
24
+ "<table><tbody>#{@children.map(&:to_html).join}</tbody></table>"
25
+ end
26
+
27
+ private
28
+
29
+ # A Swordfish::Node::Table always contains rows*cols cells, even
30
+ # if some of them are to be merged. This method determines how
31
+ # cells ought to be merged together and then informs each cell
32
+ # of its configuration, so that each cell will then properly know
33
+ # how to render itself (if at all).
34
+ def collapse_cells!
35
+ # Create a 2D array representing each cell, and give each one
36
+ # an initial colspan and rowspan of 1
37
+ structure = []
38
+ rows.times do
39
+ r = []
40
+ cols.times do
41
+ r << {:colspan => 1, :rowspan => 1}
42
+ end
43
+ structure << r
44
+ end
45
+
46
+ # Iterate over each table cell and see if it has the merge_up
47
+ # or merge_left properties set. If so, find the corresponding
48
+ # "parent" cell and incremenet its colspan or rowspan appropriately.
49
+ # If the cell is to be merged up or left, set its value to nil
50
+ # within the "structure" variable.
51
+ rows.times do |r|
52
+ cols.times do |c|
53
+ if cell_at(r, c).merge_up?
54
+ (r-1).downto(0).each do |i|
55
+ unless structure[i][c].nil?
56
+ structure[i][c][:rowspan] += 1
57
+ break
58
+ end
59
+ end
60
+ structure[r][c] = nil
61
+ end
62
+ if cell_at(r, c).merge_left?
63
+ (c-1).downto(0).each do |i|
64
+ unless structure[r][i].nil?
65
+ structure[r][i][:colspan] += 1
66
+ break
67
+ end
68
+ end
69
+ structure[r][c] = nil
70
+ end
71
+ end
72
+ end
73
+
74
+ # Inform every table cell of its calculated colspan and rowspan.
75
+ # If the cell is not to be drawn, set its rowspan and colspan to 0.
76
+ rows.times do |r|
77
+ cols.times do |c|
78
+ cell_at(r, c).inform!(structure[r][c] || {:colspan => 0, :rowspan => 0})
79
+ end
80
+ end
81
+ end
82
+
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,37 @@
1
+ # A table cell node
2
+
3
+ module Swordfish
4
+ module Node
5
+ class TableCell < Base
6
+
7
+ attr_accessor :merge_left
8
+ attr_accessor :merge_up
9
+ attr_reader :rowspan
10
+ attr_reader :colspan
11
+
12
+ # True if this cell is merged with the one to the left
13
+ def merge_left?
14
+ !!@merge_left
15
+ end
16
+
17
+ # True if this cell is merged with the one above
18
+ def merge_up?
19
+ !!@merge_up
20
+ end
21
+
22
+ def to_html
23
+ return nil if @colspan == 0 && @rowspan == 0
24
+
25
+ if @rowspan && @rowspan > 1
26
+ rowspan = " rowspan=#{@rowspan}"
27
+ end
28
+ if @colspan && @colspan > 1
29
+ colspan = " colspan=#{@colspan}"
30
+ end
31
+
32
+ "<td#{rowspan}#{colspan}>#{@children.map(&:to_html).join}</td>"
33
+ end
34
+
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,13 @@
1
+ # A table row node
2
+
3
+ module Swordfish
4
+ module Node
5
+ class TableRow < Base
6
+
7
+ def to_html
8
+ "<tr>#{@children.map(&:to_html).join}</tr>"
9
+ end
10
+
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,25 @@
1
+ # A generic text node
2
+
3
+ module Swordfish
4
+ module Node
5
+ class Text < Base
6
+
7
+ # Override Base append because a text node should never have children
8
+ def append(node)
9
+ raise BadContentError
10
+ end
11
+
12
+ def to_html
13
+ html = @content
14
+ html = "<i>#{html}</i>" if @style.italic?
15
+ html = "<b>#{html}</b>" if @style.bold?
16
+ html = "<u>#{html}</u>" if @style.underline?
17
+ html = "<strike>#{html}</strike>" if @style.strikethrough?
18
+ html = "<sup>#{html}</sup>" if @style.superscript?
19
+ html = "<sub>#{html}</sub>" if @style.subscript?
20
+ html
21
+ end
22
+
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,42 @@
1
+ # Swordfish::Stylesheet represents formatting applied to a node
2
+
3
+ module Swordfish
4
+ class Stylesheet
5
+
6
+ # Define all supported values here
7
+ SUPPORTED_STYLES = [
8
+ # Inline styles
9
+ :bold, :italic, :underline, :superscript, :subscript, :strikethrough,
10
+ # List enumeration styles
11
+ :bullet, :decimal, :lowerLetter, :lowerRoman
12
+ ]
13
+
14
+ # Initialize a stylesheet with an optional list of styles
15
+ def initialize(styles)
16
+ @styles = []
17
+ merge styles
18
+ end
19
+
20
+ # Take a style or list of styles and add them to an existing stylesheet
21
+ def merge(styles)
22
+ styles = [styles] unless styles.is_a?(Array)
23
+ @styles |= styles.select{|s| SUPPORTED_STYLES.include?(s)}
24
+ end
25
+
26
+ # For each supported style, define a boolean method to check its presence
27
+ # (i.e., :bold?, :italic?, etc.)
28
+ SUPPORTED_STYLES.each do |style|
29
+ define_method "#{style}?".to_sym do
30
+ has_style?(style)
31
+ end
32
+ end
33
+
34
+ private
35
+
36
+ # Check if a style is included in a stylesheet
37
+ def has_style?(style)
38
+ @styles.include? style
39
+ end
40
+
41
+ end
42
+ end
data/lib/swordfish.rb ADDED
@@ -0,0 +1,19 @@
1
+ require 'swordfish/document'
2
+ require 'swordfish/formats/docx'
3
+
4
+ module Swordfish
5
+
6
+ # Main entry point into the parser. Pass in a filepath and return a parsed document.
7
+ def self.open(filepath)
8
+ extension = filepath.split('.').last.downcase
9
+ case extension
10
+ when 'docx'
11
+ Swordfish::DOCX.open(filepath)
12
+ else
13
+ raise UnsupportedFormatError, "'#{extension}' is not a recognized file format"
14
+ end
15
+ end
16
+
17
+ class UnsupportedFormatError < LoadError
18
+ end
19
+ end
metadata ADDED
@@ -0,0 +1,101 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: swordfish
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Martin Posthumus
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-05-27 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ! '>='
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ! '>='
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: nokogiri
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ! '>='
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ! '>='
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rubyzip
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ! '>='
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ! '>='
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ description: A simple library for various word processor formats focusing primarily
56
+ around conversion to HTML
57
+ email: martin.posthumus@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - README.md
63
+ - lib/swordfish.rb
64
+ - lib/swordfish/document.rb
65
+ - lib/swordfish/formats/docx.rb
66
+ - lib/swordfish/nodes/base.rb
67
+ - lib/swordfish/nodes/hyperlink.rb
68
+ - lib/swordfish/nodes/list.rb
69
+ - lib/swordfish/nodes/list_item.rb
70
+ - lib/swordfish/nodes/paragraph.rb
71
+ - lib/swordfish/nodes/table.rb
72
+ - lib/swordfish/nodes/table_cell.rb
73
+ - lib/swordfish/nodes/table_row.rb
74
+ - lib/swordfish/nodes/text.rb
75
+ - lib/swordfish/stylesheet.rb
76
+ homepage: https://github.com/voikya/swordfish
77
+ licenses:
78
+ - MIT
79
+ metadata: {}
80
+ post_install_message:
81
+ rdoc_options: []
82
+ require_paths:
83
+ - lib
84
+ required_ruby_version: !ruby/object:Gem::Requirement
85
+ requirements:
86
+ - - ! '>='
87
+ - !ruby/object:Gem::Version
88
+ version: 1.9.3
89
+ required_rubygems_version: !ruby/object:Gem::Requirement
90
+ requirements:
91
+ - - ! '>='
92
+ - !ruby/object:Gem::Version
93
+ version: '0'
94
+ requirements: []
95
+ rubyforge_project:
96
+ rubygems_version: 2.2.2
97
+ signing_key:
98
+ specification_version: 4
99
+ summary: A simple library for various word processor formats
100
+ test_files: []
101
+ has_rdoc: