word-to-markdown 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/word-to-markdown.rb +29 -15
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0b8d7208877325f1e568f7f91255a7cfbc2df815
|
4
|
+
data.tar.gz: 4864e21eb71649017c61f677b24e611650a5aed0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 63bea7811559b150d55388090f1fd79df3a4f2a562cfe767bb3675a6c2af5e398cad2c00383ffb00c4536e66aebbbdc470a378eea4e3deec9de686cfd314082f
|
7
|
+
data.tar.gz: cd2c2593502aedef4fa2baddd245d671312b4521877401d1a07e60693d66d1fd15c3e7b14d5746bb7661a479daee7e00cc1c27e2a385ed73ee2846e01151a7f3
|
data/lib/word-to-markdown.rb
CHANGED
@@ -11,10 +11,11 @@ class WordToMarkdown
|
|
11
11
|
MIN_HEADING_SIZE = 20
|
12
12
|
|
13
13
|
LI_SELECTORS = %w[
|
14
|
-
MsoListParagraphCxSpFirst
|
15
|
-
MsoListParagraphCxSpMiddle
|
16
|
-
MsoListParagraphCxSpLast
|
17
|
-
MsoListParagraph
|
14
|
+
.MsoListParagraphCxSpFirst
|
15
|
+
.MsoListParagraphCxSpMiddle
|
16
|
+
.MsoListParagraphCxSpLast
|
17
|
+
.MsoListParagraph
|
18
|
+
li
|
18
19
|
]
|
19
20
|
|
20
21
|
attr_reader :path, :doc
|
@@ -143,7 +144,21 @@ class WordToMarkdown
|
|
143
144
|
|
144
145
|
# CSS selector to select non-symantic lists
|
145
146
|
def li_selectors
|
146
|
-
|
147
|
+
LI_SELECTORS.join(",")
|
148
|
+
end
|
149
|
+
|
150
|
+
# Returns an array of all indented values
|
151
|
+
def indents
|
152
|
+
@indents ||= doc.css(li_selectors).map{ |el| el.indent }.uniq.sort
|
153
|
+
end
|
154
|
+
|
155
|
+
# Determine the indent level given an indent value
|
156
|
+
#
|
157
|
+
# level - the true indent, e.g., 2.5 (from 2.5em)
|
158
|
+
#
|
159
|
+
# Returns an integer representing the indent level
|
160
|
+
def indent(level)
|
161
|
+
indents.find_index level
|
147
162
|
end
|
148
163
|
|
149
164
|
# Try to make semantic markup explicit where implied by the export
|
@@ -160,25 +175,24 @@ class WordToMarkdown
|
|
160
175
|
list_type = "ul"
|
161
176
|
end
|
162
177
|
|
178
|
+
# calculate indent level
|
179
|
+
current_indent = indent(node.indent)
|
180
|
+
|
163
181
|
# Determine parent node for this li, creating it if necessary
|
164
|
-
if
|
182
|
+
if current_indent > indent_level || indent_level == 0 && node.parent.css(".indent#{current_indent}").empty?
|
165
183
|
list = Nokogiri::XML::Node.new list_type, @doc
|
166
|
-
list.classes = ["list", "indent#{
|
167
|
-
|
168
|
-
list.parent = node.parent
|
169
|
-
else
|
170
|
-
list.parent = node.parent.css(".indent#{node.indent-1} li").last
|
171
|
-
end
|
184
|
+
list.classes = ["list", "indent#{current_indent}"]
|
185
|
+
list.parent = node.parent.css(".indent#{current_indent-1} li").last || node.parent
|
172
186
|
else
|
173
|
-
list = node.parent.css(".indent#{
|
187
|
+
list = node.parent.css(".indent#{current_indent}").last
|
174
188
|
end
|
175
189
|
|
176
190
|
# Note our current nesting depth
|
177
|
-
indent_level =
|
191
|
+
indent_level = current_indent
|
178
192
|
|
179
193
|
# Convert list paragraphs to actual numbered and unnumbered lists
|
180
194
|
node.node_name = "li"
|
181
|
-
node.parent = list
|
195
|
+
node.parent = list if list
|
182
196
|
|
183
197
|
# Scrub unicode bullets
|
184
198
|
span = node.css("span:first")[1]
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: word-to-markdown
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben Balter
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-03-
|
11
|
+
date: 2014-03-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: reverse_markdown
|