word-to-markdown 1.1.1 → 1.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8826b727f290781e7713325662d056482018d730
4
- data.tar.gz: 8692363359366aac70359bf251ccba409f80bba7
3
+ metadata.gz: 08754da1501d6e4918d753a0727a6fc28fe17962
4
+ data.tar.gz: 4947e109b790a2c61575ce8349b0f11f51598320
5
5
  SHA512:
6
- metadata.gz: 8db1e766ad6bfc341de71a4fc82d14f8a3d109005f2bda9d779af2bd7b0f722d160cf873bff98cf56f9fa7929ab9282e945d249fc220fa56beafe46bd962cf9c
7
- data.tar.gz: fd84315c249e983a6bdf073a8573d2fbca82886ed05da15b8325ceca4f655b3181cee6ee20cf888c933a4b10794a5c5145ad86a8f9ebf8f9550163de9ce284da
6
+ metadata.gz: b70ac3cf257afc4eea9e591923bb43835d6f1a31b45bf0d6a39728acbbd55bc759e2d88f9f3d9456740d683552f4379aa046b321121cf5721f7114ab634d788c
7
+ data.tar.gz: f1e16e97e23ff20229f187474872496e1201409a5f606d25f5f62dba039ea2e8c66c20ead82278037861fe8d9a9256113f4064c894cf37015261d9b8635a83ca
data/bin/w2m CHANGED
@@ -2,10 +2,15 @@
2
2
 
3
3
  require 'word-to-markdown'
4
4
 
5
- if ARGV.size != 1
5
+ if ARGV.size != 1 || ARGV[0] == "--help"
6
6
  puts "Usage: bundle exec w2m path/to/document.docx"
7
7
  exit 1
8
8
  end
9
9
 
10
- doc = WordToMarkdown.new ARGV[0]
11
- puts doc.to_s
10
+ if ARGV[0] == "--version"
11
+ puts "WordToMarkdown v#{WordToMarkdown::VERSION}"
12
+ puts "LibreOffice #{WordToMarkdown.soffice_version}"
13
+ else
14
+ doc = WordToMarkdown.new ARGV[0]
15
+ puts doc.to_s
16
+ end
@@ -14,7 +14,6 @@ class WordToMarkdown
14
14
  end
15
15
 
16
16
  def convert!
17
-
18
17
  # Fonts and headings
19
18
  semanticize_font_styles!
20
19
  semanticize_headings!
@@ -95,18 +94,18 @@ class WordToMarkdown
95
94
 
96
95
  def remove_unicode_bullets_from_list_items!
97
96
  @document.tree.search("li span").each do |span|
98
- span.content = span.content.gsub /^([#{UNICODE_BULLETS.join("")}]+)/, ""
97
+ span.inner_html = span.inner_html.gsub /^([#{UNICODE_BULLETS.join("")}]+)/, ""
99
98
  end
100
99
  end
101
100
 
102
101
  def remove_numbering_from_list_items!
103
102
  @document.tree.search("li span").each do |span|
104
- span.content = span.content.gsub /^[a-zA-Z0-9]+\./m, ""
103
+ span.inner_html = span.inner_html.gsub /^[a-zA-Z0-9]+\./m, ""
105
104
  end
106
105
  end
107
106
 
108
107
  def remove_whitespace_from_list_items!
109
- @document.tree.search("li span").each { |span| span.content.strip! }
108
+ @document.tree.search("li span").each { |span| span.inner_html.strip! }
110
109
  end
111
110
 
112
111
  def semanticize_table_headers!
@@ -17,7 +17,7 @@ class WordToMarkdown
17
17
 
18
18
  def tree
19
19
  @tree ||= begin
20
- tree = Nokogiri::HTML(normalize(raw_html))
20
+ tree = Nokogiri::HTML(normalized_html)
21
21
  tree.css("title").remove
22
22
  tree
23
23
  end
@@ -38,8 +38,8 @@ class WordToMarkdown
38
38
  # html - the raw html export
39
39
  #
40
40
  # Returns the encoding, defaulting to "UTF-8"
41
- def encoding(html)
42
- match = html.encode("UTF-8", :invalid => :replace, :replace => "").match(/charset=([^\"]+)/)
41
+ def encoding
42
+ match = raw_html.encode("UTF-8", :invalid => :replace, :replace => "").match(/charset=([^\"]+)/)
43
43
  if match
44
44
  match[1].sub("macintosh", "MacRoman")
45
45
  else
@@ -54,9 +54,9 @@ class WordToMarkdown
54
54
  # html - the raw html input from the export
55
55
  #
56
56
  # Returns the normalized html
57
- def normalize(html)
58
- encoding = encoding(html)
59
- html = html.force_encoding(encoding).encode("UTF-8", :invalid => :replace, :replace => "")
57
+ def normalized_html
58
+ html = raw_html.force_encoding(encoding)
59
+ html = html.encode("UTF-8", :invalid => :replace, :replace => "")
60
60
  html = Premailer.new(html, :with_html_string => true, :input_encoding => "UTF-8").to_inline_css
61
61
  html.gsub! /\n|\r/," " # Remove linebreaks
62
62
  html.gsub! /“|”/, '"' # Straighten curly double quotes
@@ -71,9 +71,11 @@ class WordToMarkdown
71
71
  #
72
72
  # Returns the normalized markdown
73
73
  def scrub_whitespace(string)
74
- string.sub!(/\A[[:space:]]+/,'') # leading whitespace
75
- string.sub!(/[[:space:]]+\z/,'') # trailing whitespace
76
- string.gsub!(/\n\n \n\n/,"\n\n") # Quadruple line breaks
74
+ string.gsub!(" ", " ") # HTML encoded spaces
75
+ string.sub!(/\A[[:space:]]+/,'') # document leading whitespace
76
+ string.sub!(/[[:space:]]+\z/,'') # document trailing whitespace
77
+ string.gsub!(/([ ]+)$/, '') # line trailing whitespace
78
+ string.gsub!(/\n\n\n\n/,"\n\n") # Quadruple line breaks
77
79
  string.gsub!(/\u00A0/, "") # Unicode non-breaking spaces, injected as tabs
78
80
  string
79
81
  end
@@ -1,3 +1,3 @@
1
1
  class WordToMarkdown
2
- VERSION = "1.1.1"
2
+ VERSION = "1.1.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: word-to-markdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Balter
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-01-10 00:00:00.000000000 Z
11
+ date: 2015-03-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: reverse_markdown