word-to-markdown 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/word-to-markdown.rb +29 -15
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0b8d7208877325f1e568f7f91255a7cfbc2df815
|
4
|
+
data.tar.gz: 4864e21eb71649017c61f677b24e611650a5aed0
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 63bea7811559b150d55388090f1fd79df3a4f2a562cfe767bb3675a6c2af5e398cad2c00383ffb00c4536e66aebbbdc470a378eea4e3deec9de686cfd314082f
|
7
|
+
data.tar.gz: cd2c2593502aedef4fa2baddd245d671312b4521877401d1a07e60693d66d1fd15c3e7b14d5746bb7661a479daee7e00cc1c27e2a385ed73ee2846e01151a7f3
|
data/lib/word-to-markdown.rb
CHANGED
@@ -11,10 +11,11 @@ class WordToMarkdown
|
|
11
11
|
MIN_HEADING_SIZE = 20
|
12
12
|
|
13
13
|
LI_SELECTORS = %w[
|
14
|
-
MsoListParagraphCxSpFirst
|
15
|
-
MsoListParagraphCxSpMiddle
|
16
|
-
MsoListParagraphCxSpLast
|
17
|
-
MsoListParagraph
|
14
|
+
.MsoListParagraphCxSpFirst
|
15
|
+
.MsoListParagraphCxSpMiddle
|
16
|
+
.MsoListParagraphCxSpLast
|
17
|
+
.MsoListParagraph
|
18
|
+
li
|
18
19
|
]
|
19
20
|
|
20
21
|
attr_reader :path, :doc
|
@@ -143,7 +144,21 @@ class WordToMarkdown
|
|
143
144
|
|
144
145
|
# CSS selector to select non-symantic lists
|
145
146
|
def li_selectors
|
146
|
-
|
147
|
+
LI_SELECTORS.join(",")
|
148
|
+
end
|
149
|
+
|
150
|
+
# Returns an array of all indented values
|
151
|
+
def indents
|
152
|
+
@indents ||= doc.css(li_selectors).map{ |el| el.indent }.uniq.sort
|
153
|
+
end
|
154
|
+
|
155
|
+
# Determine the indent level given an indent value
|
156
|
+
#
|
157
|
+
# level - the true indent, e.g., 2.5 (from 2.5em)
|
158
|
+
#
|
159
|
+
# Returns an integer representing the indent level
|
160
|
+
def indent(level)
|
161
|
+
indents.find_index level
|
147
162
|
end
|
148
163
|
|
149
164
|
# Try to make semantic markup explicit where implied by the export
|
@@ -160,25 +175,24 @@ class WordToMarkdown
|
|
160
175
|
list_type = "ul"
|
161
176
|
end
|
162
177
|
|
178
|
+
# calculate indent level
|
179
|
+
current_indent = indent(node.indent)
|
180
|
+
|
163
181
|
# Determine parent node for this li, creating it if necessary
|
164
|
-
if
|
182
|
+
if current_indent > indent_level || indent_level == 0 && node.parent.css(".indent#{current_indent}").empty?
|
165
183
|
list = Nokogiri::XML::Node.new list_type, @doc
|
166
|
-
list.classes = ["list", "indent#{
|
167
|
-
|
168
|
-
list.parent = node.parent
|
169
|
-
else
|
170
|
-
list.parent = node.parent.css(".indent#{node.indent-1} li").last
|
171
|
-
end
|
184
|
+
list.classes = ["list", "indent#{current_indent}"]
|
185
|
+
list.parent = node.parent.css(".indent#{current_indent-1} li").last || node.parent
|
172
186
|
else
|
173
|
-
list = node.parent.css(".indent#{
|
187
|
+
list = node.parent.css(".indent#{current_indent}").last
|
174
188
|
end
|
175
189
|
|
176
190
|
# Note our current nesting depth
|
177
|
-
indent_level =
|
191
|
+
indent_level = current_indent
|
178
192
|
|
179
193
|
# Convert list paragraphs to actual numbered and unnumbered lists
|
180
194
|
node.node_name = "li"
|
181
|
-
node.parent = list
|
195
|
+
node.parent = list if list
|
182
196
|
|
183
197
|
# Scrub unicode bullets
|
184
198
|
span = node.css("span:first")[1]
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: word-to-markdown
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben Balter
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-03-
|
11
|
+
date: 2014-03-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: reverse_markdown
|