word-to-markdown 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/word-to-markdown.rb +29 -15
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 81c1f8bce02e417b5338908ad224b5eede8170f1
4
- data.tar.gz: 48ddedc28faa4f8de21b0038e8f758585fa376e5
3
+ metadata.gz: 0b8d7208877325f1e568f7f91255a7cfbc2df815
4
+ data.tar.gz: 4864e21eb71649017c61f677b24e611650a5aed0
5
5
  SHA512:
6
- metadata.gz: 28a0c7229327a22874ee65dd7bf748c853deb9aafe83ad05ca7d228d701847f9707e7655dd354ddda4aa5b5400ca151ec0a67590aaf7bcc1fe83ade50bc4befc
7
- data.tar.gz: 6f1e0ae2c3cca7ee0c9bce22439b6f21c79ad572851fa6056593f1fb0c1d26bee289bb586cdfab11102314cb7261e3ae0c7e515c04185fcb6d52bbc5e036c3b6
6
+ metadata.gz: 63bea7811559b150d55388090f1fd79df3a4f2a562cfe767bb3675a6c2af5e398cad2c00383ffb00c4536e66aebbbdc470a378eea4e3deec9de686cfd314082f
7
+ data.tar.gz: cd2c2593502aedef4fa2baddd245d671312b4521877401d1a07e60693d66d1fd15c3e7b14d5746bb7661a479daee7e00cc1c27e2a385ed73ee2846e01151a7f3
@@ -11,10 +11,11 @@ class WordToMarkdown
11
11
  MIN_HEADING_SIZE = 20
12
12
 
13
13
  LI_SELECTORS = %w[
14
- MsoListParagraphCxSpFirst
15
- MsoListParagraphCxSpMiddle
16
- MsoListParagraphCxSpLast
17
- MsoListParagraph
14
+ .MsoListParagraphCxSpFirst
15
+ .MsoListParagraphCxSpMiddle
16
+ .MsoListParagraphCxSpLast
17
+ .MsoListParagraph
18
+ li
18
19
  ]
19
20
 
20
21
  attr_reader :path, :doc
@@ -143,7 +144,21 @@ class WordToMarkdown
143
144
 
144
145
  # CSS selector to select non-symantic lists
145
146
  def li_selectors
146
- ".#{LI_SELECTORS.join(",.")}"
147
+ LI_SELECTORS.join(",")
148
+ end
149
+
150
+ # Returns an array of all indented values
151
+ def indents
152
+ @indents ||= doc.css(li_selectors).map{ |el| el.indent }.uniq.sort
153
+ end
154
+
155
+ # Determine the indent level given an indent value
156
+ #
157
+ # level - the true indent, e.g., 2.5 (from 2.5em)
158
+ #
159
+ # Returns an integer representing the indent level
160
+ def indent(level)
161
+ indents.find_index level
147
162
  end
148
163
 
149
164
  # Try to make semantic markup explicit where implied by the export
@@ -160,25 +175,24 @@ class WordToMarkdown
160
175
  list_type = "ul"
161
176
  end
162
177
 
178
+ # calculate indent level
179
+ current_indent = indent(node.indent)
180
+
163
181
  # Determine parent node for this li, creating it if necessary
164
- if node.indent > indent_level
182
+ if current_indent > indent_level || indent_level == 0 && node.parent.css(".indent#{current_indent}").empty?
165
183
  list = Nokogiri::XML::Node.new list_type, @doc
166
- list.classes = ["list", "indent#{node.indent}"]
167
- if node.indent == 1
168
- list.parent = node.parent
169
- else
170
- list.parent = node.parent.css(".indent#{node.indent-1} li").last
171
- end
184
+ list.classes = ["list", "indent#{current_indent}"]
185
+ list.parent = node.parent.css(".indent#{current_indent-1} li").last || node.parent
172
186
  else
173
- list = node.parent.css(".indent#{node.indent}").last
187
+ list = node.parent.css(".indent#{current_indent}").last
174
188
  end
175
189
 
176
190
  # Note our current nesting depth
177
- indent_level = node.indent
191
+ indent_level = current_indent
178
192
 
179
193
  # Convert list paragraphs to actual numbered and unnumbered lists
180
194
  node.node_name = "li"
181
- node.parent = list
195
+ node.parent = list if list
182
196
 
183
197
  # Scrub unicode bullets
184
198
  span = node.css("span:first")[1]
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: word-to-markdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Balter
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-03-27 00:00:00.000000000 Z
11
+ date: 2014-03-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: reverse_markdown