RubyGems - word-to-markdown - Versions diffs - 1.1.1 → 1.1.2 - Mend

word-to-markdown 1.1.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml +4 -4
data/bin/w2m +8 -3
data/lib/word-to-markdown/converter.rb +3 -4
data/lib/word-to-markdown/document.rb +11 -9
data/lib/word-to-markdown/version.rb +1 -1
metadata +2 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 8826b727f290781e7713325662d056482018d730
-  data.tar.gz: 8692363359366aac70359bf251ccba409f80bba7
+  metadata.gz: 08754da1501d6e4918d753a0727a6fc28fe17962
+  data.tar.gz: 4947e109b790a2c61575ce8349b0f11f51598320
 SHA512:
-  metadata.gz: 8db1e766ad6bfc341de71a4fc82d14f8a3d109005f2bda9d779af2bd7b0f722d160cf873bff98cf56f9fa7929ab9282e945d249fc220fa56beafe46bd962cf9c
-  data.tar.gz: fd84315c249e983a6bdf073a8573d2fbca82886ed05da15b8325ceca4f655b3181cee6ee20cf888c933a4b10794a5c5145ad86a8f9ebf8f9550163de9ce284da
+  metadata.gz: b70ac3cf257afc4eea9e591923bb43835d6f1a31b45bf0d6a39728acbbd55bc759e2d88f9f3d9456740d683552f4379aa046b321121cf5721f7114ab634d788c
+  data.tar.gz: f1e16e97e23ff20229f187474872496e1201409a5f606d25f5f62dba039ea2e8c66c20ead82278037861fe8d9a9256113f4064c894cf37015261d9b8635a83ca

data/bin/w2m CHANGED Viewed

@@ -2,10 +2,15 @@
 require 'word-to-markdown'
-if ARGV.size != 1
+if ARGV.size != 1 || ARGV[0] == "--help"
   puts "Usage: bundle exec w2m path/to/document.docx"
   exit 1
 end
-doc = WordToMarkdown.new ARGV[0]
-puts doc.to_s
+if ARGV[0] == "--version"
+  puts "WordToMarkdown v#{WordToMarkdown::VERSION}"
+  puts "LibreOffice #{WordToMarkdown.soffice_version}"
+else
+  doc = WordToMarkdown.new ARGV[0]
+  puts doc.to_s
+end

data/lib/word-to-markdown/converter.rb CHANGED Viewed

@@ -14,7 +14,6 @@ class WordToMarkdown
     end
     def convert!
       # Fonts and headings
       semanticize_font_styles!
       semanticize_headings!
@@ -95,18 +94,18 @@ class WordToMarkdown
     def remove_unicode_bullets_from_list_items!
       @document.tree.search("li span").each do |span|
-        span.content = span.content.gsub /^([#{UNICODE_BULLETS.join("")}]+)/, ""
+        span.inner_html = span.inner_html.gsub /^([#{UNICODE_BULLETS.join("")}]+)/, ""
       end
     end
     def remove_numbering_from_list_items!
       @document.tree.search("li span").each do |span|
-        span.content = span.content.gsub /^[a-zA-Z0-9]+\./m, ""
+        span.inner_html = span.inner_html.gsub /^[a-zA-Z0-9]+\./m, ""
       end
     end
     def remove_whitespace_from_list_items!
-      @document.tree.search("li span").each { |span| span.content.strip! }
+      @document.tree.search("li span").each { |span| span.inner_html.strip! }
     end
     def semanticize_table_headers!

data/lib/word-to-markdown/document.rb CHANGED Viewed

@@ -17,7 +17,7 @@ class WordToMarkdown
     def tree
       @tree ||= begin
-        tree = Nokogiri::HTML(normalize(raw_html))
+        tree = Nokogiri::HTML(normalized_html)
         tree.css("title").remove
         tree
       end
@@ -38,8 +38,8 @@ class WordToMarkdown
     # html - the raw html export
     #
     # Returns the encoding, defaulting to "UTF-8"
-    def encoding(html)
-      match = html.encode("UTF-8", :invalid => :replace, :replace => "").match(/charset=([^\"]+)/)
+    def encoding
+      match = raw_html.encode("UTF-8", :invalid => :replace, :replace => "").match(/charset=([^\"]+)/)
       if match
         match[1].sub("macintosh", "MacRoman")
       else
@@ -54,9 +54,9 @@ class WordToMarkdown
     # html - the raw html input from the export
     #
     # Returns the normalized html
-    def normalize(html)
-      encoding = encoding(html)
-      html = html.force_encoding(encoding).encode("UTF-8", :invalid => :replace, :replace => "")
+    def normalized_html
+      html = raw_html.force_encoding(encoding)
+      html = html.encode("UTF-8", :invalid => :replace, :replace => "")
       html = Premailer.new(html, :with_html_string => true, :input_encoding => "UTF-8").to_inline_css
       html.gsub! /\n|\r/," "         # Remove linebreaks
       html.gsub! /“|”/, '"'          # Straighten curly double quotes
@@ -71,9 +71,11 @@ class WordToMarkdown
     #
     # Returns the normalized markdown
     def scrub_whitespace(string)
-      string.sub!(/\A[[:space:]]+/,'')                # leading whitespace
-      string.sub!(/[[:space:]]+\z/,'')                # trailing whitespace
-      string.gsub!(/\n\n \n\n/,"\n\n")                # Quadruple line breaks
+      string.gsub!("&nbsp;", " ")                     # HTML encoded spaces
+      string.sub!(/\A[[:space:]]+/,'')                # document leading whitespace
+      string.sub!(/[[:space:]]+\z/,'')                # document trailing whitespace
+      string.gsub!(/([ ]+)$/, '')                     # line trailing whitespace
+      string.gsub!(/\n\n\n\n/,"\n\n")                 # Quadruple line breaks
       string.gsub!(/\u00A0/, "")                      # Unicode non-breaking spaces, injected as tabs
       string
     end

data/lib/word-to-markdown/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 class WordToMarkdown
-  VERSION = "1.1.1"
+  VERSION = "1.1.2"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: word-to-markdown
 version: !ruby/object:Gem::Version
-  version: 1.1.1
+  version: 1.1.2
 platform: ruby
 authors:
 - Ben Balter
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-01-10 00:00:00.000000000 Z
+date: 2015-03-21 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: reverse_markdown