word-to-markdown 1.1.4 → 1.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/w2m +1 -1
- data/lib/cliver/dependency_ext.rb +31 -0
- data/lib/word-to-markdown.rb +37 -51
- data/lib/word-to-markdown/converter.rb +5 -3
- data/lib/word-to-markdown/document.rb +11 -1
- data/lib/word-to-markdown/version.rb +1 -1
- metadata +20 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9de91d876eb8b4dd4b895c3a36240cb270891592
|
4
|
+
data.tar.gz: 7a671dd33ed9faa1cab5e14cef87da8be3a34f61
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 220ba6079d8383d369601a84ab32f278deb63ec498317b1b77c60208e8fa16ae9317ed57ce6bb9db9164ecc85ac759eeabe0dc95239e4b993dd777ba32c79c12
|
7
|
+
data.tar.gz: 34a838bfb0f659c3cc6a70b3488d01fba8f4b5f69097ba760c06ead8df31e50c05095f0d2e1a178ea9b5eedcff8551a32a1ff8ccb6e421016db21367df5b690a
|
data/bin/w2m
CHANGED
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'sys/proctable'
|
2
|
+
|
3
|
+
module Cliver
|
4
|
+
class Dependency
|
5
|
+
|
6
|
+
include Sys
|
7
|
+
|
8
|
+
# Memoized shortcut for detect
|
9
|
+
# Returns the path to the detected dependency
|
10
|
+
# Raises an error if the dependency was not satisfied
|
11
|
+
def path
|
12
|
+
@detected_path ||= detect!
|
13
|
+
end
|
14
|
+
|
15
|
+
# Is the detected dependency currently open?
|
16
|
+
def open?
|
17
|
+
ProcTable.ps.any? { |p| p.comm == path }
|
18
|
+
end
|
19
|
+
|
20
|
+
# Returns the version of the resolved dependency
|
21
|
+
def version
|
22
|
+
return @detected_version if defined? @detected_version
|
23
|
+
version = installed_versions.find { |p, v| p == path }
|
24
|
+
@detected_version = version.nil? ? nil : version[1]
|
25
|
+
end
|
26
|
+
|
27
|
+
def major_version
|
28
|
+
version.split(".").first
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
data/lib/word-to-markdown.rb
CHANGED
@@ -1,22 +1,22 @@
|
|
1
1
|
require 'descriptive_statistics'
|
2
2
|
require 'reverse_markdown'
|
3
3
|
require 'nokogiri-styles'
|
4
|
-
require 'sys/proctable'
|
5
4
|
require 'premailer'
|
6
5
|
require 'rbconfig'
|
7
6
|
require 'nokogiri'
|
7
|
+
require 'logger'
|
8
8
|
require 'tmpdir'
|
9
|
+
require 'cliver'
|
9
10
|
require 'open3'
|
10
11
|
|
11
12
|
require_relative 'word-to-markdown/version'
|
12
13
|
require_relative 'word-to-markdown/document'
|
13
14
|
require_relative 'word-to-markdown/converter'
|
14
15
|
require_relative 'nokogiri/xml/element'
|
16
|
+
require_relative 'cliver/dependency_ext'
|
15
17
|
|
16
18
|
class WordToMarkdown
|
17
19
|
|
18
|
-
include Sys
|
19
|
-
|
20
20
|
attr_reader :document, :converter
|
21
21
|
|
22
22
|
REVERSE_MARKDOWN_OPTIONS = {
|
@@ -24,6 +24,15 @@ class WordToMarkdown
|
|
24
24
|
github_flavored: true
|
25
25
|
}
|
26
26
|
|
27
|
+
SOFFICE_VERSION_REQUIREMENT = '> 4.0'
|
28
|
+
|
29
|
+
PATHS = [
|
30
|
+
"~/Applications/LibreOffice.app/Contents/MacOS",
|
31
|
+
"/Applications/LibreOffice.app/Contents/MacOS",
|
32
|
+
"/C/Program Files (x86)/LibreOffice 5/program",
|
33
|
+
"/C/Program Files (x86)/LibreOffice 4/program"
|
34
|
+
]
|
35
|
+
|
27
36
|
# Create a new WordToMarkdown object
|
28
37
|
#
|
29
38
|
# input - a HTML string or path to an HTML file
|
@@ -35,59 +44,36 @@ class WordToMarkdown
|
|
35
44
|
converter.convert!
|
36
45
|
end
|
37
46
|
|
38
|
-
# source: https://stackoverflow.com/questions/11784109/detecting-operating-systems-in-ruby
|
39
|
-
def self.os
|
40
|
-
@os ||= (
|
41
|
-
host_os = RbConfig::CONFIG['host_os']
|
42
|
-
case host_os
|
43
|
-
when /mswin|msys|mingw|cygwin|bccwin|wince|emc/
|
44
|
-
:windows
|
45
|
-
when /darwin|mac os/
|
46
|
-
:macosx
|
47
|
-
when /linux/
|
48
|
-
:linux
|
49
|
-
when /solaris|bsd/
|
50
|
-
:unix
|
51
|
-
else
|
52
|
-
raise Error::WebDriverError, "unknown os: #{host_os.inspect}"
|
53
|
-
end
|
54
|
-
)
|
55
|
-
end
|
56
|
-
|
57
|
-
def self.soffice_path
|
58
|
-
case os
|
59
|
-
when :macosx
|
60
|
-
%w[~/Applications /Applications]
|
61
|
-
.map { |f| File.expand_path(File.join(f, "/LibreOffice.app/Contents/MacOS/soffice")) }
|
62
|
-
.find { |f| File.file?(f) }
|
63
|
-
when :windows
|
64
|
-
'C:\Program Files (x86)\LibreOffice 4\program\soffice.exe'
|
65
|
-
else
|
66
|
-
"soffice"
|
67
|
-
end
|
68
|
-
end
|
69
|
-
|
70
|
-
def self.soffice?
|
71
|
-
@soffice ||= !(soffice_path.nil? || soffice_version.nil?)
|
72
|
-
end
|
73
|
-
|
74
|
-
def self.soffice_open?
|
75
|
-
ProcTable.ps.any? { |p| p.exe == soffice_path }
|
76
|
-
end
|
77
|
-
|
78
47
|
def self.run_command(*args)
|
79
|
-
raise "LibreOffice
|
80
|
-
raise "LibreOffice already running" if soffice_open?
|
48
|
+
raise "LibreOffice already running" if soffice.open?
|
81
49
|
|
82
|
-
output, status = Open3.capture2e(
|
83
|
-
|
50
|
+
output, status = Open3.capture2e(soffice.path, *args)
|
51
|
+
logger.debug output
|
52
|
+
raise "Command `#{soffice_path} #{args.join(" ")}` failed: #{output}" if status.exitstatus != 0
|
84
53
|
output
|
85
54
|
end
|
86
55
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
56
|
+
# Returns a Cliver::Dependency object representing our soffice dependency
|
57
|
+
#
|
58
|
+
# Attempts to resolve by looking at PATH followed by paths in the PATHS constant
|
59
|
+
#
|
60
|
+
# Methods used internally:
|
61
|
+
# path - returns the resolved path. Raises an error if not satisfied
|
62
|
+
# version - returns the resolved version
|
63
|
+
# open - is the dependency currently open/running?
|
64
|
+
def self.soffice
|
65
|
+
@@soffice_dependency ||= Cliver::Dependency.new(
|
66
|
+
"soffice", SOFFICE_VERSION_REQUIREMENT,
|
67
|
+
:path => "*:" + PATHS.join(":")
|
68
|
+
)
|
69
|
+
end
|
70
|
+
|
71
|
+
def self.logger
|
72
|
+
@@logger ||= begin
|
73
|
+
logger = Logger.new(STDOUT)
|
74
|
+
logger.level = Logger::ERROR unless ENV["DEBUG"]
|
75
|
+
logger
|
76
|
+
end
|
91
77
|
end
|
92
78
|
|
93
79
|
# Pretty print the class in console
|
@@ -7,7 +7,7 @@ class WordToMarkdown
|
|
7
7
|
HEADING_DEPTH = 6 # Number of headings to guess, e.g., h6
|
8
8
|
HEADING_STEP = 100/HEADING_DEPTH
|
9
9
|
MIN_HEADING_SIZE = 20
|
10
|
-
UNICODE_BULLETS = ["○", "o", "●", "\
|
10
|
+
UNICODE_BULLETS = ["○", "o", "●", "\u2022", "\\p{C}"]
|
11
11
|
|
12
12
|
def initialize(document)
|
13
13
|
@document = document
|
@@ -93,13 +93,15 @@ class WordToMarkdown
|
|
93
93
|
end
|
94
94
|
|
95
95
|
def remove_unicode_bullets_from_list_items!
|
96
|
-
|
96
|
+
path = WordToMarkdown.soffice.major_version == "5" ? "li span span" : "li span"
|
97
|
+
@document.tree.search(path).each do |span|
|
97
98
|
span.inner_html = span.inner_html.gsub /^([#{UNICODE_BULLETS.join("")}]+)/, ""
|
98
99
|
end
|
99
100
|
end
|
100
101
|
|
101
102
|
def remove_numbering_from_list_items!
|
102
|
-
|
103
|
+
path = WordToMarkdown.soffice.major_version == "5" ? "li span span" : "li span"
|
104
|
+
@document.tree.search(path).each do |span|
|
103
105
|
span.inner_html = span.inner_html.gsub /^[a-zA-Z0-9]+\./m, ""
|
104
106
|
end
|
105
107
|
end
|
@@ -2,6 +2,7 @@
|
|
2
2
|
class WordToMarkdown
|
3
3
|
class Document
|
4
4
|
class NotFoundError < StandardError; end
|
5
|
+
class ConverstionError < StandardError; end
|
5
6
|
|
6
7
|
attr_reader :path, :raw_html, :tmpdir
|
7
8
|
|
@@ -87,11 +88,20 @@ class WordToMarkdown
|
|
87
88
|
|
88
89
|
def raw_html
|
89
90
|
@raw_html ||= begin
|
90
|
-
WordToMarkdown::run_command '--headless', '--convert-to',
|
91
|
+
WordToMarkdown::run_command '--headless', '--convert-to', filter, path, '--outdir', tmpdir
|
92
|
+
raise ConverstionError, "Failed to convert #{path}" unless File.exists?(dest_path)
|
91
93
|
html = File.read dest_path
|
92
94
|
File.delete dest_path
|
93
95
|
html
|
94
96
|
end
|
95
97
|
end
|
98
|
+
|
99
|
+
def filter
|
100
|
+
if WordToMarkdown.soffice.major_version == "5"
|
101
|
+
"html:XHTML Writer File:UTF8"
|
102
|
+
else
|
103
|
+
"html"
|
104
|
+
end
|
105
|
+
end
|
96
106
|
end
|
97
107
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: word-to-markdown
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben Balter
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-09-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: reverse_markdown
|
@@ -80,6 +80,20 @@ dependencies:
|
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0.9'
|
83
|
+
- !ruby/object:Gem::Dependency
|
84
|
+
name: cliver
|
85
|
+
requirement: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: '0.3'
|
90
|
+
type: :runtime
|
91
|
+
prerelease: false
|
92
|
+
version_requirements: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0.3'
|
83
97
|
- !ruby/object:Gem::Dependency
|
84
98
|
name: rake
|
85
99
|
requirement: !ruby/object:Gem::Requirement
|
@@ -156,14 +170,14 @@ dependencies:
|
|
156
170
|
requirements:
|
157
171
|
- - "~>"
|
158
172
|
- !ruby/object:Gem::Version
|
159
|
-
version: '
|
173
|
+
version: '5.0'
|
160
174
|
type: :development
|
161
175
|
prerelease: false
|
162
176
|
version_requirements: !ruby/object:Gem::Requirement
|
163
177
|
requirements:
|
164
178
|
- - "~>"
|
165
179
|
- !ruby/object:Gem::Version
|
166
|
-
version: '
|
180
|
+
version: '5.0'
|
167
181
|
description: Ruby Gem to convert Word documents to markdown.
|
168
182
|
email: ben.balter@github.com
|
169
183
|
executables:
|
@@ -172,6 +186,7 @@ extensions: []
|
|
172
186
|
extra_rdoc_files: []
|
173
187
|
files:
|
174
188
|
- bin/w2m
|
189
|
+
- lib/cliver/dependency_ext.rb
|
175
190
|
- lib/nokogiri/xml/element.rb
|
176
191
|
- lib/word-to-markdown.rb
|
177
192
|
- lib/word-to-markdown/converter.rb
|
@@ -197,7 +212,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
197
212
|
version: '0'
|
198
213
|
requirements: []
|
199
214
|
rubyforge_project:
|
200
|
-
rubygems_version: 2.
|
215
|
+
rubygems_version: 2.4.8
|
201
216
|
signing_key:
|
202
217
|
specification_version: 4
|
203
218
|
summary: Ruby Gem to convert Word documents to markdown
|