word-to-markdown 1.1.4 → 1.1.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2ebb72caa54bd03cd393646e9458f228a47d4bb6
4
- data.tar.gz: 9a11db1413481a4abd7a04f38b4fe9b31f765f7e
3
+ metadata.gz: 9de91d876eb8b4dd4b895c3a36240cb270891592
4
+ data.tar.gz: 7a671dd33ed9faa1cab5e14cef87da8be3a34f61
5
5
  SHA512:
6
- metadata.gz: 330dc12f0a9b438bc5485f702594690131a0a0e6306a12e75996017d4dddbbf5591d8d35ea2450fbf54b9e4bf67420d18940901025d1c730dc58d3e8563e46ce
7
- data.tar.gz: 929e165957b4dbdce0becfb614539df57b0b7748ee204ee258bda450e68c4f0d55a112e09f308415f100d0d335275ab7a8ba8932865b49ce8ac81eb1eb626bc0
6
+ metadata.gz: 220ba6079d8383d369601a84ab32f278deb63ec498317b1b77c60208e8fa16ae9317ed57ce6bb9db9164ecc85ac759eeabe0dc95239e4b993dd777ba32c79c12
7
+ data.tar.gz: 34a838bfb0f659c3cc6a70b3488d01fba8f4b5f69097ba760c06ead8df31e50c05095f0d2e1a178ea9b5eedcff8551a32a1ff8ccb6e421016db21367df5b690a
data/bin/w2m CHANGED
@@ -9,7 +9,7 @@ end
9
9
 
10
10
  if ARGV[0] == "--version"
11
11
  puts "WordToMarkdown v#{WordToMarkdown::VERSION}"
12
- puts "LibreOffice #{WordToMarkdown.soffice_version}"
12
+ puts "LibreOffice v#{WordToMarkdown.soffice.version}"
13
13
  else
14
14
  doc = WordToMarkdown.new ARGV[0]
15
15
  puts doc.to_s
@@ -0,0 +1,31 @@
1
+ require 'sys/proctable'
2
+
3
+ module Cliver
4
+ class Dependency
5
+
6
+ include Sys
7
+
8
+ # Memoized shortcut for detect
9
+ # Returns the path to the detected dependency
10
+ # Raises an error if the dependency was not satisfied
11
+ def path
12
+ @detected_path ||= detect!
13
+ end
14
+
15
+ # Is the detected dependency currently open?
16
+ def open?
17
+ ProcTable.ps.any? { |p| p.comm == path }
18
+ end
19
+
20
+ # Returns the version of the resolved dependency
21
+ def version
22
+ return @detected_version if defined? @detected_version
23
+ version = installed_versions.find { |p, v| p == path }
24
+ @detected_version = version.nil? ? nil : version[1]
25
+ end
26
+
27
+ def major_version
28
+ version.split(".").first
29
+ end
30
+ end
31
+ end
@@ -1,22 +1,22 @@
1
1
  require 'descriptive_statistics'
2
2
  require 'reverse_markdown'
3
3
  require 'nokogiri-styles'
4
- require 'sys/proctable'
5
4
  require 'premailer'
6
5
  require 'rbconfig'
7
6
  require 'nokogiri'
7
+ require 'logger'
8
8
  require 'tmpdir'
9
+ require 'cliver'
9
10
  require 'open3'
10
11
 
11
12
  require_relative 'word-to-markdown/version'
12
13
  require_relative 'word-to-markdown/document'
13
14
  require_relative 'word-to-markdown/converter'
14
15
  require_relative 'nokogiri/xml/element'
16
+ require_relative 'cliver/dependency_ext'
15
17
 
16
18
  class WordToMarkdown
17
19
 
18
- include Sys
19
-
20
20
  attr_reader :document, :converter
21
21
 
22
22
  REVERSE_MARKDOWN_OPTIONS = {
@@ -24,6 +24,15 @@ class WordToMarkdown
24
24
  github_flavored: true
25
25
  }
26
26
 
27
+ SOFFICE_VERSION_REQUIREMENT = '> 4.0'
28
+
29
+ PATHS = [
30
+ "~/Applications/LibreOffice.app/Contents/MacOS",
31
+ "/Applications/LibreOffice.app/Contents/MacOS",
32
+ "/C/Program Files (x86)/LibreOffice 5/program",
33
+ "/C/Program Files (x86)/LibreOffice 4/program"
34
+ ]
35
+
27
36
  # Create a new WordToMarkdown object
28
37
  #
29
38
  # input - a HTML string or path to an HTML file
@@ -35,59 +44,36 @@ class WordToMarkdown
35
44
  converter.convert!
36
45
  end
37
46
 
38
- # source: https://stackoverflow.com/questions/11784109/detecting-operating-systems-in-ruby
39
- def self.os
40
- @os ||= (
41
- host_os = RbConfig::CONFIG['host_os']
42
- case host_os
43
- when /mswin|msys|mingw|cygwin|bccwin|wince|emc/
44
- :windows
45
- when /darwin|mac os/
46
- :macosx
47
- when /linux/
48
- :linux
49
- when /solaris|bsd/
50
- :unix
51
- else
52
- raise Error::WebDriverError, "unknown os: #{host_os.inspect}"
53
- end
54
- )
55
- end
56
-
57
- def self.soffice_path
58
- case os
59
- when :macosx
60
- %w[~/Applications /Applications]
61
- .map { |f| File.expand_path(File.join(f, "/LibreOffice.app/Contents/MacOS/soffice")) }
62
- .find { |f| File.file?(f) }
63
- when :windows
64
- 'C:\Program Files (x86)\LibreOffice 4\program\soffice.exe'
65
- else
66
- "soffice"
67
- end
68
- end
69
-
70
- def self.soffice?
71
- @soffice ||= !(soffice_path.nil? || soffice_version.nil?)
72
- end
73
-
74
- def self.soffice_open?
75
- ProcTable.ps.any? { |p| p.exe == soffice_path }
76
- end
77
-
78
47
  def self.run_command(*args)
79
- raise "LibreOffice executable not found" unless soffice?
80
- raise "LibreOffice already running" if soffice_open?
48
+ raise "LibreOffice already running" if soffice.open?
81
49
 
82
- output, status = Open3.capture2e(soffice_path, *args)
83
- raise "Command `#{soffice_path} #{args.join(" ")}` failed: #{output}" if status != 0
50
+ output, status = Open3.capture2e(soffice.path, *args)
51
+ logger.debug output
52
+ raise "Command `#{soffice_path} #{args.join(" ")}` failed: #{output}" if status.exitstatus != 0
84
53
  output
85
54
  end
86
55
 
87
- def self.soffice_version
88
- return if soffice_path.nil?
89
- output, status = Open3.capture2e(soffice_path, "--version")
90
- output.strip.sub "LibreOffice ", "" if status == 0
56
+ # Returns a Cliver::Dependency object representing our soffice dependency
57
+ #
58
+ # Attempts to resolve by looking at PATH followed by paths in the PATHS constant
59
+ #
60
+ # Methods used internally:
61
+ # path - returns the resolved path. Raises an error if not satisfied
62
+ # version - returns the resolved version
63
+ # open - is the dependency currently open/running?
64
+ def self.soffice
65
+ @@soffice_dependency ||= Cliver::Dependency.new(
66
+ "soffice", SOFFICE_VERSION_REQUIREMENT,
67
+ :path => "*:" + PATHS.join(":")
68
+ )
69
+ end
70
+
71
+ def self.logger
72
+ @@logger ||= begin
73
+ logger = Logger.new(STDOUT)
74
+ logger.level = Logger::ERROR unless ENV["DEBUG"]
75
+ logger
76
+ end
91
77
  end
92
78
 
93
79
  # Pretty print the class in console
@@ -7,7 +7,7 @@ class WordToMarkdown
7
7
  HEADING_DEPTH = 6 # Number of headings to guess, e.g., h6
8
8
  HEADING_STEP = 100/HEADING_DEPTH
9
9
  MIN_HEADING_SIZE = 20
10
- UNICODE_BULLETS = ["○", "o", "●", "\uF0B7", "\u2022", "\uF0A7"]
10
+ UNICODE_BULLETS = ["○", "o", "●", "\u2022", "\\p{C}"]
11
11
 
12
12
  def initialize(document)
13
13
  @document = document
@@ -93,13 +93,15 @@ class WordToMarkdown
93
93
  end
94
94
 
95
95
  def remove_unicode_bullets_from_list_items!
96
- @document.tree.search("li span").each do |span|
96
+ path = WordToMarkdown.soffice.major_version == "5" ? "li span span" : "li span"
97
+ @document.tree.search(path).each do |span|
97
98
  span.inner_html = span.inner_html.gsub /^([#{UNICODE_BULLETS.join("")}]+)/, ""
98
99
  end
99
100
  end
100
101
 
101
102
  def remove_numbering_from_list_items!
102
- @document.tree.search("li span").each do |span|
103
+ path = WordToMarkdown.soffice.major_version == "5" ? "li span span" : "li span"
104
+ @document.tree.search(path).each do |span|
103
105
  span.inner_html = span.inner_html.gsub /^[a-zA-Z0-9]+\./m, ""
104
106
  end
105
107
  end
@@ -2,6 +2,7 @@
2
2
  class WordToMarkdown
3
3
  class Document
4
4
  class NotFoundError < StandardError; end
5
+ class ConverstionError < StandardError; end
5
6
 
6
7
  attr_reader :path, :raw_html, :tmpdir
7
8
 
@@ -87,11 +88,20 @@ class WordToMarkdown
87
88
 
88
89
  def raw_html
89
90
  @raw_html ||= begin
90
- WordToMarkdown::run_command '--headless', '--convert-to', 'html', path, '--outdir', tmpdir
91
+ WordToMarkdown::run_command '--headless', '--convert-to', filter, path, '--outdir', tmpdir
92
+ raise ConverstionError, "Failed to convert #{path}" unless File.exists?(dest_path)
91
93
  html = File.read dest_path
92
94
  File.delete dest_path
93
95
  html
94
96
  end
95
97
  end
98
+
99
+ def filter
100
+ if WordToMarkdown.soffice.major_version == "5"
101
+ "html:XHTML Writer File:UTF8"
102
+ else
103
+ "html"
104
+ end
105
+ end
96
106
  end
97
107
  end
@@ -1,3 +1,3 @@
1
1
  class WordToMarkdown
2
- VERSION = "1.1.4"
2
+ VERSION = "1.1.5"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: word-to-markdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.4
4
+ version: 1.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Balter
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-08-05 00:00:00.000000000 Z
11
+ date: 2015-09-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: reverse_markdown
@@ -80,6 +80,20 @@ dependencies:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0.9'
83
+ - !ruby/object:Gem::Dependency
84
+ name: cliver
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0.3'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '0.3'
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: rake
85
99
  requirement: !ruby/object:Gem::Requirement
@@ -156,14 +170,14 @@ dependencies:
156
170
  requirements:
157
171
  - - "~>"
158
172
  - !ruby/object:Gem::Version
159
- version: '4.7'
173
+ version: '5.0'
160
174
  type: :development
161
175
  prerelease: false
162
176
  version_requirements: !ruby/object:Gem::Requirement
163
177
  requirements:
164
178
  - - "~>"
165
179
  - !ruby/object:Gem::Version
166
- version: '4.7'
180
+ version: '5.0'
167
181
  description: Ruby Gem to convert Word documents to markdown.
168
182
  email: ben.balter@github.com
169
183
  executables:
@@ -172,6 +186,7 @@ extensions: []
172
186
  extra_rdoc_files: []
173
187
  files:
174
188
  - bin/w2m
189
+ - lib/cliver/dependency_ext.rb
175
190
  - lib/nokogiri/xml/element.rb
176
191
  - lib/word-to-markdown.rb
177
192
  - lib/word-to-markdown/converter.rb
@@ -197,7 +212,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
197
212
  version: '0'
198
213
  requirements: []
199
214
  rubyforge_project:
200
- rubygems_version: 2.2.0
215
+ rubygems_version: 2.4.8
201
216
  signing_key:
202
217
  specification_version: 4
203
218
  summary: Ruby Gem to convert Word documents to markdown