word-to-markdown 1.1.4 → 1.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 2ebb72caa54bd03cd393646e9458f228a47d4bb6
4
- data.tar.gz: 9a11db1413481a4abd7a04f38b4fe9b31f765f7e
3
+ metadata.gz: 9de91d876eb8b4dd4b895c3a36240cb270891592
4
+ data.tar.gz: 7a671dd33ed9faa1cab5e14cef87da8be3a34f61
5
5
  SHA512:
6
- metadata.gz: 330dc12f0a9b438bc5485f702594690131a0a0e6306a12e75996017d4dddbbf5591d8d35ea2450fbf54b9e4bf67420d18940901025d1c730dc58d3e8563e46ce
7
- data.tar.gz: 929e165957b4dbdce0becfb614539df57b0b7748ee204ee258bda450e68c4f0d55a112e09f308415f100d0d335275ab7a8ba8932865b49ce8ac81eb1eb626bc0
6
+ metadata.gz: 220ba6079d8383d369601a84ab32f278deb63ec498317b1b77c60208e8fa16ae9317ed57ce6bb9db9164ecc85ac759eeabe0dc95239e4b993dd777ba32c79c12
7
+ data.tar.gz: 34a838bfb0f659c3cc6a70b3488d01fba8f4b5f69097ba760c06ead8df31e50c05095f0d2e1a178ea9b5eedcff8551a32a1ff8ccb6e421016db21367df5b690a
data/bin/w2m CHANGED
@@ -9,7 +9,7 @@ end
9
9
 
10
10
  if ARGV[0] == "--version"
11
11
  puts "WordToMarkdown v#{WordToMarkdown::VERSION}"
12
- puts "LibreOffice #{WordToMarkdown.soffice_version}"
12
+ puts "LibreOffice v#{WordToMarkdown.soffice.version}"
13
13
  else
14
14
  doc = WordToMarkdown.new ARGV[0]
15
15
  puts doc.to_s
@@ -0,0 +1,31 @@
1
+ require 'sys/proctable'
2
+
3
+ module Cliver
4
+ class Dependency
5
+
6
+ include Sys
7
+
8
+ # Memoized shortcut for detect
9
+ # Returns the path to the detected dependency
10
+ # Raises an error if the dependency was not satisfied
11
+ def path
12
+ @detected_path ||= detect!
13
+ end
14
+
15
+ # Is the detected dependency currently open?
16
+ def open?
17
+ ProcTable.ps.any? { |p| p.comm == path }
18
+ end
19
+
20
+ # Returns the version of the resolved dependency
21
+ def version
22
+ return @detected_version if defined? @detected_version
23
+ version = installed_versions.find { |p, v| p == path }
24
+ @detected_version = version.nil? ? nil : version[1]
25
+ end
26
+
27
+ def major_version
28
+ version.split(".").first
29
+ end
30
+ end
31
+ end
@@ -1,22 +1,22 @@
1
1
  require 'descriptive_statistics'
2
2
  require 'reverse_markdown'
3
3
  require 'nokogiri-styles'
4
- require 'sys/proctable'
5
4
  require 'premailer'
6
5
  require 'rbconfig'
7
6
  require 'nokogiri'
7
+ require 'logger'
8
8
  require 'tmpdir'
9
+ require 'cliver'
9
10
  require 'open3'
10
11
 
11
12
  require_relative 'word-to-markdown/version'
12
13
  require_relative 'word-to-markdown/document'
13
14
  require_relative 'word-to-markdown/converter'
14
15
  require_relative 'nokogiri/xml/element'
16
+ require_relative 'cliver/dependency_ext'
15
17
 
16
18
  class WordToMarkdown
17
19
 
18
- include Sys
19
-
20
20
  attr_reader :document, :converter
21
21
 
22
22
  REVERSE_MARKDOWN_OPTIONS = {
@@ -24,6 +24,15 @@ class WordToMarkdown
24
24
  github_flavored: true
25
25
  }
26
26
 
27
+ SOFFICE_VERSION_REQUIREMENT = '> 4.0'
28
+
29
+ PATHS = [
30
+ "~/Applications/LibreOffice.app/Contents/MacOS",
31
+ "/Applications/LibreOffice.app/Contents/MacOS",
32
+ "/C/Program Files (x86)/LibreOffice 5/program",
33
+ "/C/Program Files (x86)/LibreOffice 4/program"
34
+ ]
35
+
27
36
  # Create a new WordToMarkdown object
28
37
  #
29
38
  # input - a HTML string or path to an HTML file
@@ -35,59 +44,36 @@ class WordToMarkdown
35
44
  converter.convert!
36
45
  end
37
46
 
38
- # source: https://stackoverflow.com/questions/11784109/detecting-operating-systems-in-ruby
39
- def self.os
40
- @os ||= (
41
- host_os = RbConfig::CONFIG['host_os']
42
- case host_os
43
- when /mswin|msys|mingw|cygwin|bccwin|wince|emc/
44
- :windows
45
- when /darwin|mac os/
46
- :macosx
47
- when /linux/
48
- :linux
49
- when /solaris|bsd/
50
- :unix
51
- else
52
- raise Error::WebDriverError, "unknown os: #{host_os.inspect}"
53
- end
54
- )
55
- end
56
-
57
- def self.soffice_path
58
- case os
59
- when :macosx
60
- %w[~/Applications /Applications]
61
- .map { |f| File.expand_path(File.join(f, "/LibreOffice.app/Contents/MacOS/soffice")) }
62
- .find { |f| File.file?(f) }
63
- when :windows
64
- 'C:\Program Files (x86)\LibreOffice 4\program\soffice.exe'
65
- else
66
- "soffice"
67
- end
68
- end
69
-
70
- def self.soffice?
71
- @soffice ||= !(soffice_path.nil? || soffice_version.nil?)
72
- end
73
-
74
- def self.soffice_open?
75
- ProcTable.ps.any? { |p| p.exe == soffice_path }
76
- end
77
-
78
47
  def self.run_command(*args)
79
- raise "LibreOffice executable not found" unless soffice?
80
- raise "LibreOffice already running" if soffice_open?
48
+ raise "LibreOffice already running" if soffice.open?
81
49
 
82
- output, status = Open3.capture2e(soffice_path, *args)
83
- raise "Command `#{soffice_path} #{args.join(" ")}` failed: #{output}" if status != 0
50
+ output, status = Open3.capture2e(soffice.path, *args)
51
+ logger.debug output
52
+ raise "Command `#{soffice_path} #{args.join(" ")}` failed: #{output}" if status.exitstatus != 0
84
53
  output
85
54
  end
86
55
 
87
- def self.soffice_version
88
- return if soffice_path.nil?
89
- output, status = Open3.capture2e(soffice_path, "--version")
90
- output.strip.sub "LibreOffice ", "" if status == 0
56
+ # Returns a Cliver::Dependency object representing our soffice dependency
57
+ #
58
+ # Attempts to resolve by looking at PATH followed by paths in the PATHS constant
59
+ #
60
+ # Methods used internally:
61
+ # path - returns the resolved path. Raises an error if not satisfied
62
+ # version - returns the resolved version
63
+ # open - is the dependency currently open/running?
64
+ def self.soffice
65
+ @@soffice_dependency ||= Cliver::Dependency.new(
66
+ "soffice", SOFFICE_VERSION_REQUIREMENT,
67
+ :path => "*:" + PATHS.join(":")
68
+ )
69
+ end
70
+
71
+ def self.logger
72
+ @@logger ||= begin
73
+ logger = Logger.new(STDOUT)
74
+ logger.level = Logger::ERROR unless ENV["DEBUG"]
75
+ logger
76
+ end
91
77
  end
92
78
 
93
79
  # Pretty print the class in console
@@ -7,7 +7,7 @@ class WordToMarkdown
7
7
  HEADING_DEPTH = 6 # Number of headings to guess, e.g., h6
8
8
  HEADING_STEP = 100/HEADING_DEPTH
9
9
  MIN_HEADING_SIZE = 20
10
- UNICODE_BULLETS = ["○", "o", "●", "\uF0B7", "\u2022", "\uF0A7"]
10
+ UNICODE_BULLETS = ["○", "o", "●", "\u2022", "\\p{C}"]
11
11
 
12
12
  def initialize(document)
13
13
  @document = document
@@ -93,13 +93,15 @@ class WordToMarkdown
93
93
  end
94
94
 
95
95
  def remove_unicode_bullets_from_list_items!
96
- @document.tree.search("li span").each do |span|
96
+ path = WordToMarkdown.soffice.major_version == "5" ? "li span span" : "li span"
97
+ @document.tree.search(path).each do |span|
97
98
  span.inner_html = span.inner_html.gsub /^([#{UNICODE_BULLETS.join("")}]+)/, ""
98
99
  end
99
100
  end
100
101
 
101
102
  def remove_numbering_from_list_items!
102
- @document.tree.search("li span").each do |span|
103
+ path = WordToMarkdown.soffice.major_version == "5" ? "li span span" : "li span"
104
+ @document.tree.search(path).each do |span|
103
105
  span.inner_html = span.inner_html.gsub /^[a-zA-Z0-9]+\./m, ""
104
106
  end
105
107
  end
@@ -2,6 +2,7 @@
2
2
  class WordToMarkdown
3
3
  class Document
4
4
  class NotFoundError < StandardError; end
5
+ class ConverstionError < StandardError; end
5
6
 
6
7
  attr_reader :path, :raw_html, :tmpdir
7
8
 
@@ -87,11 +88,20 @@ class WordToMarkdown
87
88
 
88
89
  def raw_html
89
90
  @raw_html ||= begin
90
- WordToMarkdown::run_command '--headless', '--convert-to', 'html', path, '--outdir', tmpdir
91
+ WordToMarkdown::run_command '--headless', '--convert-to', filter, path, '--outdir', tmpdir
92
+ raise ConverstionError, "Failed to convert #{path}" unless File.exists?(dest_path)
91
93
  html = File.read dest_path
92
94
  File.delete dest_path
93
95
  html
94
96
  end
95
97
  end
98
+
99
+ def filter
100
+ if WordToMarkdown.soffice.major_version == "5"
101
+ "html:XHTML Writer File:UTF8"
102
+ else
103
+ "html"
104
+ end
105
+ end
96
106
  end
97
107
  end
@@ -1,3 +1,3 @@
1
1
  class WordToMarkdown
2
- VERSION = "1.1.4"
2
+ VERSION = "1.1.5"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: word-to-markdown
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.4
4
+ version: 1.1.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ben Balter
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-08-05 00:00:00.000000000 Z
11
+ date: 2015-09-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: reverse_markdown
@@ -80,6 +80,20 @@ dependencies:
80
80
  - - "~>"
81
81
  - !ruby/object:Gem::Version
82
82
  version: '0.9'
83
+ - !ruby/object:Gem::Dependency
84
+ name: cliver
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '0.3'
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '0.3'
83
97
  - !ruby/object:Gem::Dependency
84
98
  name: rake
85
99
  requirement: !ruby/object:Gem::Requirement
@@ -156,14 +170,14 @@ dependencies:
156
170
  requirements:
157
171
  - - "~>"
158
172
  - !ruby/object:Gem::Version
159
- version: '4.7'
173
+ version: '5.0'
160
174
  type: :development
161
175
  prerelease: false
162
176
  version_requirements: !ruby/object:Gem::Requirement
163
177
  requirements:
164
178
  - - "~>"
165
179
  - !ruby/object:Gem::Version
166
- version: '4.7'
180
+ version: '5.0'
167
181
  description: Ruby Gem to convert Word documents to markdown.
168
182
  email: ben.balter@github.com
169
183
  executables:
@@ -172,6 +186,7 @@ extensions: []
172
186
  extra_rdoc_files: []
173
187
  files:
174
188
  - bin/w2m
189
+ - lib/cliver/dependency_ext.rb
175
190
  - lib/nokogiri/xml/element.rb
176
191
  - lib/word-to-markdown.rb
177
192
  - lib/word-to-markdown/converter.rb
@@ -197,7 +212,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
197
212
  version: '0'
198
213
  requirements: []
199
214
  rubyforge_project:
200
- rubygems_version: 2.2.0
215
+ rubygems_version: 2.4.8
201
216
  signing_key:
202
217
  specification_version: 4
203
218
  summary: Ruby Gem to convert Word documents to markdown