swordfish 0.0.0 → 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/lib/swordfish/formats/docx.rb +32 -8
- data/lib/swordfish/nodes/base.rb +22 -1
- data/lib/swordfish/nodes/list.rb +21 -3
- data/lib/swordfish/nodes/paragraph.rb +2 -1
- data/lib/swordfish/nodes/text.rb +2 -0
- data/lib/swordfish/stylesheet.rb +1 -1
- data/lib/swordfish.rb +3 -3
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
MGMzODc2Y2FlNzUzMzdiMGNmMjlmODA1MjUwYjg3MWJhMzYwZWViOQ==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
ODgzMmMyNDBkNTEzZDg1M2NkNWFiY2RlN2VlZDBjYmE2M2I3NTNlYw==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
ZjY4M2JiYmZlZGM1MTMxY2FlYWI2MjVlOTg1YmU0OTMxZDZmOTY1MWRmZjc3
|
10
|
+
ZTFiYTJiZTAyODJiZjA0OWM1MjAzNDE0YWQyODVjYTY4ZjYwYzlmNmFmM2Y1
|
11
|
+
MzE5NThjZDQwMGRmNDJjNjc4YzI3NmZkNWM3Y2I3Nzc4ZjQ2MWQ=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
OTRhZmZkYjQzOWQ5NGIxOGQ1NmNiNGE1MWUwNTBjYTIyMWFmOWUxNDdkN2U1
|
14
|
+
MzBmYWZhM2UwOGYxNmFkN2M0ZjY1ZWI1Y2MzZjczMzgxMGU5MmY2M2MwZWVh
|
15
|
+
N2QxYjIzMTkzYTdkODkzMDg2Mjg3OWMwYTcwZGQ0ZmI4Yzk1MjM=
|
@@ -50,12 +50,16 @@ module Swordfish
|
|
50
50
|
@xml.xpath('//w:body').children.each do |node|
|
51
51
|
case node.name
|
52
52
|
when 'p'
|
53
|
-
if node.xpath('.//w:numPr').length == 0
|
53
|
+
if node.xpath('.//w:numPr').length == 0 && (@buffer.is_a?(Swordfish::Node::List) ? node.xpath('.//w:ind').length.zero? : true)
|
54
54
|
# Regular paragraph
|
55
|
+
# (The buffer check makes sure that this isn't an indented paragraph immediately after a list item,
|
56
|
+
# which means we're most likely dealing with a multi-paragraph list item)
|
55
57
|
flush
|
56
58
|
@swordfish_doc.append _node_parse_paragraph(node)
|
57
|
-
|
59
|
+
elsif node.xpath('.//w:numPr/ancestor::w:pPrChange').length.zero?
|
58
60
|
# List paragraph
|
61
|
+
# (must have a numPr node, but cannot have a pPrChange ancestor, since that means
|
62
|
+
# we are just looking at historical changes)
|
59
63
|
# (Don't flush because we need to first ensure the list is fully parsed)
|
60
64
|
_node_parse_list(node)
|
61
65
|
end
|
@@ -86,6 +90,12 @@ module Swordfish
|
|
86
90
|
elsif style_node['w:val'] == 'subscript'
|
87
91
|
swordfish_node.stylize :subscript
|
88
92
|
end
|
93
|
+
when 'rStyle'
|
94
|
+
if style_node['w:val'] == 'Strong'
|
95
|
+
swordfish_node.stylize :strong
|
96
|
+
elsif style_node['w:val'] == 'Emphasis'
|
97
|
+
swordfish_node.stylize :emphasis
|
98
|
+
end
|
89
99
|
end
|
90
100
|
end
|
91
101
|
end
|
@@ -142,9 +152,13 @@ module Swordfish
|
|
142
152
|
when 'r'
|
143
153
|
# A true run node
|
144
154
|
text = Swordfish::Node::Text.new
|
145
|
-
|
146
|
-
|
147
|
-
|
155
|
+
if run_xml.xpath('./w:t').length > 0
|
156
|
+
# Only examine the run if it includes text codes. The run may also include
|
157
|
+
# things like comment nodes, which should be ignored.
|
158
|
+
text.content = run_xml.xpath('./w:t')[0].content
|
159
|
+
get_styles_for_node(text, run_xml.xpath('./w:rPr')[0])
|
160
|
+
texts << text
|
161
|
+
end
|
148
162
|
when 'hyperlink'
|
149
163
|
# Hyperlink nodes are placed amongst other run nodes, but
|
150
164
|
# they themselves also contain runs. Hyperlinks include
|
@@ -174,6 +188,15 @@ module Swordfish
|
|
174
188
|
# since the only way to tell the list has been fully parsed is to encounter
|
175
189
|
# a non-list element.
|
176
190
|
|
191
|
+
# Handle paragraphs with no level, which represent multi-paragraph list items
|
192
|
+
if node.xpath(".//w:numPr/w:ilvl").length.zero?
|
193
|
+
para = Swordfish::Node::Paragraph.new
|
194
|
+
_node_parse_runs(node).each {|r| para.append(r)}
|
195
|
+
@buffer.last_list_item(:recurse => true).wrap_children(Swordfish::Node::Text, Swordfish::Node::Paragraph)
|
196
|
+
@buffer.last_list_item(:recurse => true).append para
|
197
|
+
return
|
198
|
+
end
|
199
|
+
|
177
200
|
# Get the list item's abstract numbering and level
|
178
201
|
list_item = Swordfish::Node::ListItem.new
|
179
202
|
_node_parse_runs(node).each {|r| list_item.append(r)}
|
@@ -184,22 +207,23 @@ module Swordfish
|
|
184
207
|
unless @buffer
|
185
208
|
@buffer = Swordfish::Node::List.new
|
186
209
|
@buffer.stylize @numbering[numbering_scheme][level].to_sym
|
210
|
+
@buffer_initial_value = level # Lists may have an arbitrary initial level
|
187
211
|
end
|
188
212
|
|
189
213
|
# Compare the level of this list item to the bottommost node in
|
190
214
|
# the build buffer to determine where in the hierarchy to add
|
191
215
|
# this node (i.e., are we dealing with list nesting or not?)
|
192
|
-
if @buffer.depth_of_final_node >= level
|
216
|
+
if @buffer.depth_of_final_node >= level || @buffer.children.empty?
|
193
217
|
# Add sibling to existing list
|
194
218
|
target = @buffer
|
195
|
-
level.times do
|
219
|
+
(level - @buffer_initial_value).times do
|
196
220
|
target = target.last_list_item.nested_list
|
197
221
|
end
|
198
222
|
target.append list_item
|
199
223
|
elsif @buffer.depth_of_final_node < level
|
200
224
|
# Add new nested list
|
201
225
|
target = @buffer
|
202
|
-
(level - 1).times do
|
226
|
+
(level - @buffer_initial_value- 1).times do
|
203
227
|
target = target.last_list_item.nested_list
|
204
228
|
end
|
205
229
|
list = Swordfish::Node::List.new
|
data/lib/swordfish/nodes/base.rb
CHANGED
@@ -5,7 +5,7 @@ module Swordfish
|
|
5
5
|
class Base
|
6
6
|
|
7
7
|
attr_accessor :content
|
8
|
-
|
8
|
+
attr_accessor :children
|
9
9
|
attr_reader :style
|
10
10
|
|
11
11
|
# Initialize with a blank stylesheet and no children
|
@@ -21,6 +21,11 @@ module Swordfish
|
|
21
21
|
@children.flatten!
|
22
22
|
end
|
23
23
|
|
24
|
+
# Replace a child node at a given index
|
25
|
+
def replace(node, idx)
|
26
|
+
@children[idx] = node
|
27
|
+
end
|
28
|
+
|
24
29
|
# Take a style or styles and add them to this node's stylesheet
|
25
30
|
def stylize(styles)
|
26
31
|
@style.merge styles
|
@@ -39,6 +44,22 @@ module Swordfish
|
|
39
44
|
end
|
40
45
|
end
|
41
46
|
|
47
|
+
# Delete all child nodes
|
48
|
+
def clear_children
|
49
|
+
@children = []
|
50
|
+
end
|
51
|
+
|
52
|
+
# Wrap all children of type child_class with a new node of type wrapper_class
|
53
|
+
def wrap_children(child_class, wrapper_class)
|
54
|
+
new_node = wrapper_class.new
|
55
|
+
new_node.append @children.select{|n| n.is_a? child_class}
|
56
|
+
unless new_node.children.empty?
|
57
|
+
idx = @children.find_index(new_node.children[0])
|
58
|
+
@children = @children - new_node.children
|
59
|
+
@children.insert idx, new_node
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
42
63
|
end
|
43
64
|
|
44
65
|
class BadContentError < Exception
|
data/lib/swordfish/nodes/list.rb
CHANGED
@@ -23,9 +23,27 @@ module Swordfish
|
|
23
23
|
depth
|
24
24
|
end
|
25
25
|
|
26
|
-
# Return the final child list
|
27
|
-
def
|
28
|
-
|
26
|
+
# Return the final child list
|
27
|
+
def last_list
|
28
|
+
node = self
|
29
|
+
while node.children && node.last_list_item.nested_list
|
30
|
+
node = node.last_list_item.nested_list
|
31
|
+
end
|
32
|
+
node
|
33
|
+
end
|
34
|
+
|
35
|
+
# Return the final child list item
|
36
|
+
def last_list_item(opts = {})
|
37
|
+
if opts[:recurse]
|
38
|
+
node = self
|
39
|
+
li = @children.last
|
40
|
+
while node.children && node = node.last_list_item.nested_list
|
41
|
+
li = node.children.last
|
42
|
+
end
|
43
|
+
li
|
44
|
+
else
|
45
|
+
@children.last
|
46
|
+
end
|
29
47
|
end
|
30
48
|
|
31
49
|
end
|
data/lib/swordfish/nodes/text.rb
CHANGED
@@ -17,6 +17,8 @@ module Swordfish
|
|
17
17
|
html = "<strike>#{html}</strike>" if @style.strikethrough?
|
18
18
|
html = "<sup>#{html}</sup>" if @style.superscript?
|
19
19
|
html = "<sub>#{html}</sub>" if @style.subscript?
|
20
|
+
html = "<strong>#{html}</strong>" if @style.strong?
|
21
|
+
html = "<em>#{html}</em>" if @style.emphasis?
|
20
22
|
html
|
21
23
|
end
|
22
24
|
|
data/lib/swordfish/stylesheet.rb
CHANGED
@@ -6,7 +6,7 @@ module Swordfish
|
|
6
6
|
# Define all supported values here
|
7
7
|
SUPPORTED_STYLES = [
|
8
8
|
# Inline styles
|
9
|
-
:bold, :italic, :underline, :superscript, :subscript, :strikethrough,
|
9
|
+
:bold, :italic, :underline, :superscript, :subscript, :strikethrough, :strong, :emphasis,
|
10
10
|
# List enumeration styles
|
11
11
|
:bullet, :decimal, :lowerLetter, :lowerRoman
|
12
12
|
]
|
data/lib/swordfish.rb
CHANGED
@@ -4,10 +4,10 @@ require 'swordfish/formats/docx'
|
|
4
4
|
module Swordfish
|
5
5
|
|
6
6
|
# Main entry point into the parser. Pass in a filepath and return a parsed document.
|
7
|
-
def self.open(filepath)
|
8
|
-
extension = filepath.split('.').last.downcase
|
7
|
+
def self.open(filepath, opts={})
|
8
|
+
extension = (opts[:extension] || filepath.split('.').last).downcase.to_sym
|
9
9
|
case extension
|
10
|
-
when
|
10
|
+
when :docx
|
11
11
|
Swordfish::DOCX.open(filepath)
|
12
12
|
else
|
13
13
|
raise UnsupportedFormatError, "'#{extension}' is not a recognized file format"
|