word-to-markdown 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/word-to-markdown.rb +29 -15
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 81c1f8bce02e417b5338908ad224b5eede8170f1
4
- data.tar.gz: 48ddedc28faa4f8de21b0038e8f758585fa376e5
3
+ metadata.gz: 0b8d7208877325f1e568f7f91255a7cfbc2df815
4
+ data.tar.gz: 4864e21eb71649017c61f677b24e611650a5aed0
5
5
  SHA512:
6
- metadata.gz: 28a0c7229327a22874ee65dd7bf748c853deb9aafe83ad05ca7d228d701847f9707e7655dd354ddda4aa5b5400ca151ec0a67590aaf7bcc1fe83ade50bc4befc
7
- data.tar.gz: 6f1e0ae2c3cca7ee0c9bce22439b6f21c79ad572851fa6056593f1fb0c1d26bee289bb586cdfab11102314cb7261e3ae0c7e515c04185fcb6d52bbc5e036c3b6
6
+ metadata.gz: 63bea7811559b150d55388090f1fd79df3a4f2a562cfe767bb3675a6c2af5e398cad2c00383ffb00c4536e66aebbbdc470a378eea4e3deec9de686cfd314082f
7
+ data.tar.gz: cd2c2593502aedef4fa2baddd245d671312b4521877401d1a07e60693d66d1fd15c3e7b14d5746bb7661a479daee7e00cc1c27e2a385ed73ee2846e01151a7f3
@@ -11,10 +11,11 @@ class WordToMarkdown
11
11
  MIN_HEADING_SIZE = 20
12
12
 
13
13
  LI_SELECTORS = %w[
14
- MsoListParagraphCxSpFirst
15
- MsoListParagraphCxSpMiddle
16
- MsoListParagraphCxSpLast
17
- MsoListParagraph
14
+ .MsoListParagraphCxSpFirst
15
+ .MsoListParagraphCxSpMiddle
16
+ .MsoListParagraphCxSpLast
17
+ .MsoListParagraph
18
+ li
18
19
  ]
19
20
 
20
21
  attr_reader :path, :doc
@@ -143,7 +144,21 @@ class WordToMarkdown
143
144
 
144
145
  # CSS selector to select non-symantic lists
145
146
  def li_selectors
146
- ".#{LI_SELECTORS.join(",.")}"
147
+ LI_SELECTORS.join(",")
148
+ end
149
+
150
+ # Returns an array of all indented values
151
+ def indents
152
+ @indents ||= doc.css(li_selectors).map{ |el| el.indent }.uniq.sort
153
+ end
154
+
155
+ # Determine the indent level given an indent value
156
+ #
157
+ # level - the true indent, e.g., 2.5 (from 2.5em)
158
+ #
159
+ # Returns an integer representing the indent level
160
+ def indent(level)
161
+ indents.find_index level
147
162
  end
148
163
 
149
164
  # Try to make semantic markup explicit where implied by the export
@@ -160,25 +175,24 @@ class WordToMarkdown
160
175
  list_type = "ul"
161
176
  end
162
177
 
178
+ # calculate indent level
179
+ current_indent = indent(node.indent)
180
+
163
181
  # Determine parent node for this li, creating it if necessary
164
- if node.indent > indent_level
182
+ if current_indent > indent_level || indent_level == 0 && node.parent.css(".indent#{current_indent}").empty?
165
183
  list = Nokogiri::XML::Node.new list_type, @doc
166
- list.classes = ["list", "indent#{node.indent}"]
167
- if node.indent == 1
168
- list.parent = node.parent
169
- else
170
- list.parent = node.parent.css(".indent#{node.indent-1} li").last
171
- end
184
+ list.classes = ["list", "indent#{current_indent}"]
185
+ list.parent = node.parent.css(".indent#{current_indent-1} li").last || node.parent
172
186
  else
173
- list = node.parent.css(".indent#{node.indent}").last
187
+ list = node.parent.css(".indent#{current_indent}").last
174
188
  end
175
189
 
176
190
  # Note our current nesting depth
177
- indent_level = node.indent
191
+ indent_level = current_indent
178
192
 
179
193
  # Convert list paragraphs to actual numbered and unnumbered lists
180
194
  node.node_name = "li"
181
- node.parent = list
195
+ node.parent = list if list
182
196
 
183
197
  # Scrub unicode bullets
184
198
  span = node.css("span:first")[1]
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: word-to-markdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Balter
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-03-27 00:00:00.000000000 Z
11
+ date: 2014-03-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: reverse_markdown