word-to-markdown 1.1.8 → 1.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +21 -5
- data/lib/cliver/dependency_ext.rb +4 -3
- data/lib/word-to-markdown/converter.rb +3 -2
- data/lib/word-to-markdown/document.rb +4 -1
- data/lib/word-to-markdown/version.rb +1 -1
- data/lib/word-to-markdown.rb +5 -4
- metadata +52 -32
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: f2b816a2ad9402eb1c45f74f806482502608337ac9127d072647bd4f1f97cd39
|
|
4
|
+
data.tar.gz: ff35f5c9f2e89c0e781ea864552f6f20bafd23083301c4d72c5c95c7aae4f38c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: e74c055913709cd0fa871ba95cf22b22a86089bb18fd9e88cd27d1dec3ec3b4927708fc9a591ba278b3e5cdc98150178cd17547ba76579702893645554370c83
|
|
7
|
+
data.tar.gz: 9b01d816e8f95d43fb19dc828213e3db4f7b0897957c6e3eba42b4f4fe888d3564db3b3730b416f9eb10837ca4911fd7083c73b3556c3313dd12cefe2f8ae597
|
data/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
A Ruby gem to liberate content from [the jail that is Word documents](http://ben.balter.com/2012/10/19/we-ve-been-trained-to-make-paper/#jailbreaking-content)
|
|
4
4
|
|
|
5
|
-
[](https://github.com/benbalter/word-to-markdown/actions/workflows/ci.yml) [](http://badge.fury.io/rb/word-to-markdown) [](http://inch-ci.org/github/benbalter/word-to-markdown) [](https://ci.appveyor.com/project/benbalter/word-to-markdown/branch/master) [](https://codeclimate.com/github/benbalter/word-to-markdown/maintainability) [](https://codeclimate.com/github/benbalter/word-to-markdown/test_coverage)
|
|
6
6
|
|
|
7
7
|
## The problem
|
|
8
8
|
|
|
@@ -14,7 +14,9 @@ A Ruby gem to liberate content from [the jail that is Word documents](http://ben
|
|
|
14
14
|
|
|
15
15
|
**[Read more](http://ben.balter.com/2014/03/31/word-versus-markdown-more-than-mere-semantics/)**
|
|
16
16
|
|
|
17
|
-
|
|
17
|
+
## Just want to convert a Microsoft Word (or Google) document to Markdown?
|
|
18
|
+
|
|
19
|
+
You can use this **[hosted service](https://word2md.com/)** (or check out [its source](https://github.com/benbalter/word-to-markdown-server)).
|
|
18
20
|
|
|
19
21
|
## Install
|
|
20
22
|
|
|
@@ -71,8 +73,22 @@ Word-to-markdown requires `soffice` a command line interface to LibreOffice that
|
|
|
71
73
|
script/cibuild
|
|
72
74
|
```
|
|
73
75
|
|
|
74
|
-
##
|
|
76
|
+
## Docker
|
|
77
|
+
|
|
78
|
+
First, create the `Gemfile.lock` by installing the dependencies:
|
|
79
|
+
|
|
80
|
+
```
|
|
81
|
+
bundle install
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
Everything you need to run the executable locally:
|
|
85
|
+
|
|
86
|
+
```
|
|
87
|
+
docker-compose build
|
|
88
|
+
docker-compose run --rm app bundle exec w2m --help
|
|
89
|
+
docker-compose run --rm app bundle exec w2m test/fixtures/em.docx
|
|
90
|
+
```
|
|
75
91
|
|
|
76
|
-
|
|
92
|
+
## Hosted service
|
|
77
93
|
|
|
78
|
-
A live version runs at [
|
|
94
|
+
[Word-to-markdown-server](https://github.com/benbalter/word-to-markdown-server) contains a lightweight server for converting Word Documents as a service. A live version runs at [word2md.com](https://word2md.com).
|
|
@@ -24,14 +24,15 @@ module Cliver
|
|
|
24
24
|
|
|
25
25
|
# Returns the version of the resolved dependency
|
|
26
26
|
def version
|
|
27
|
-
return @
|
|
27
|
+
return @version if defined? @version
|
|
28
28
|
return if Gem.win_platform?
|
|
29
|
+
|
|
29
30
|
version = installed_versions.find { |p, _v| p == path }
|
|
30
|
-
@
|
|
31
|
+
@version = version.nil? ? nil : version[1]
|
|
31
32
|
end
|
|
32
33
|
|
|
33
34
|
def major_version
|
|
34
|
-
version
|
|
35
|
+
version&.split('.')&.first
|
|
35
36
|
end
|
|
36
37
|
end
|
|
37
38
|
end
|
|
@@ -58,7 +58,7 @@ class WordToMarkdown
|
|
|
58
58
|
@document.tree.css('[style]').each do |element|
|
|
59
59
|
sizes.push element.font_size.round(-1) unless element.font_size.nil?
|
|
60
60
|
end
|
|
61
|
-
sizes.uniq.sort
|
|
61
|
+
sizes.uniq.sort.extend(DescriptiveStatistics)
|
|
62
62
|
end
|
|
63
63
|
end
|
|
64
64
|
|
|
@@ -68,6 +68,7 @@ class WordToMarkdown
|
|
|
68
68
|
# @return [String, nil] the heading tag (e.g., H1), or nil
|
|
69
69
|
def guess_heading(node)
|
|
70
70
|
return nil if node.font_size.nil?
|
|
71
|
+
|
|
71
72
|
[*1...HEADING_DEPTH].each do |heading|
|
|
72
73
|
return "h#{heading}" if node.font_size >= h(heading)
|
|
73
74
|
end
|
|
@@ -109,7 +110,7 @@ class WordToMarkdown
|
|
|
109
110
|
def remove_unicode_bullets_from_list_items!
|
|
110
111
|
path = WordToMarkdown.soffice.major_version == '5' ? 'li span span' : 'li span'
|
|
111
112
|
@document.tree.search(path).each do |span|
|
|
112
|
-
span.inner_html = span.inner_html.gsub(/^([#{UNICODE_BULLETS.join
|
|
113
|
+
span.inner_html = span.inner_html.gsub(/^([#{UNICODE_BULLETS.join}]+)/, '')
|
|
113
114
|
end
|
|
114
115
|
end
|
|
115
116
|
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
class WordToMarkdown
|
|
4
4
|
class Document
|
|
5
5
|
class NotFoundError < StandardError; end
|
|
6
|
+
|
|
6
7
|
class ConversionError < StandardError; end
|
|
7
8
|
|
|
8
9
|
attr_reader :path, :tmpdir
|
|
@@ -44,7 +45,7 @@ class WordToMarkdown
|
|
|
44
45
|
#
|
|
45
46
|
# @return [String] the encoding, defaulting to "UTF-8"
|
|
46
47
|
def encoding
|
|
47
|
-
match = raw_html.encode('UTF-8', invalid: :replace, replace: '').match(/charset=([
|
|
48
|
+
match = raw_html.encode('UTF-8', invalid: :replace, replace: '').match(/charset=([^"]+)/)
|
|
48
49
|
if match
|
|
49
50
|
match[1].sub('macintosh', 'MacRoman')
|
|
50
51
|
else
|
|
@@ -81,6 +82,7 @@ class WordToMarkdown
|
|
|
81
82
|
string.gsub!(/([ ]+)$/, '') # line trailing whitespace
|
|
82
83
|
string.gsub!(/\n\n\n\n/, "\n\n") # Quadruple line breaks
|
|
83
84
|
string.delete!(' ') # Unicode non-breaking spaces, injected as tabs
|
|
85
|
+
string.gsub!(/\*\*\ +(?!\*|_)([[:punct:]])/, '**\1') # Remove extra space after bold
|
|
84
86
|
string
|
|
85
87
|
end
|
|
86
88
|
|
|
@@ -95,6 +97,7 @@ class WordToMarkdown
|
|
|
95
97
|
@raw_html ||= begin
|
|
96
98
|
WordToMarkdown.run_command '--headless', '--convert-to', filter, path, '--outdir', tmpdir
|
|
97
99
|
raise ConversionError, "Failed to convert #{path}" unless File.exist?(dest_path)
|
|
100
|
+
|
|
98
101
|
html = File.read dest_path
|
|
99
102
|
File.delete dest_path
|
|
100
103
|
html
|
data/lib/word-to-markdown.rb
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
require 'descriptive_statistics'
|
|
3
|
+
require 'descriptive_statistics/safe'
|
|
4
4
|
require 'reverse_markdown'
|
|
5
5
|
require 'nokogiri-styles'
|
|
6
6
|
require 'premailer'
|
|
@@ -22,12 +22,12 @@ class WordToMarkdown
|
|
|
22
22
|
|
|
23
23
|
# Options to be passed to Reverse Markdown
|
|
24
24
|
REVERSE_MARKDOWN_OPTIONS = {
|
|
25
|
-
unknown_tags:
|
|
25
|
+
unknown_tags: :bypass,
|
|
26
26
|
github_flavored: true
|
|
27
27
|
}.freeze
|
|
28
28
|
|
|
29
29
|
# Minimum version of LibreOffice Required
|
|
30
|
-
SOFFICE_VERSION_REQUIREMENT = '> 4.0'
|
|
30
|
+
SOFFICE_VERSION_REQUIREMENT = '> 4.0'
|
|
31
31
|
|
|
32
32
|
# Paths to look for LibreOffice, in order of preference
|
|
33
33
|
PATHS = [
|
|
@@ -66,6 +66,7 @@ class WordToMarkdown
|
|
|
66
66
|
output, status = Open3.capture2e(soffice.path, *args)
|
|
67
67
|
logger.debug output
|
|
68
68
|
raise "Command `#{soffice.path} #{args.join(' ')}` failed: #{output}" if status.exitstatus != 0
|
|
69
|
+
|
|
69
70
|
output
|
|
70
71
|
end
|
|
71
72
|
|
|
@@ -85,7 +86,7 @@ class WordToMarkdown
|
|
|
85
86
|
# @return Logger instance
|
|
86
87
|
def logger
|
|
87
88
|
@logger ||= begin
|
|
88
|
-
logger = Logger.new(
|
|
89
|
+
logger = Logger.new($stdout)
|
|
89
90
|
logger.level = Logger::ERROR unless ENV['DEBUG']
|
|
90
91
|
logger
|
|
91
92
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: word-to-markdown
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.1.
|
|
4
|
+
version: 1.1.9
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ben Balter
|
|
8
|
-
autorequire:
|
|
8
|
+
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2025-01-08 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: cliver
|
|
@@ -70,16 +70,22 @@ dependencies:
|
|
|
70
70
|
name: reverse_markdown
|
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
|
72
72
|
requirements:
|
|
73
|
-
- - "
|
|
73
|
+
- - ">="
|
|
74
74
|
- !ruby/object:Gem::Version
|
|
75
|
-
version: '1
|
|
75
|
+
version: '1'
|
|
76
|
+
- - "<"
|
|
77
|
+
- !ruby/object:Gem::Version
|
|
78
|
+
version: '3'
|
|
76
79
|
type: :runtime
|
|
77
80
|
prerelease: false
|
|
78
81
|
version_requirements: !ruby/object:Gem::Requirement
|
|
79
82
|
requirements:
|
|
80
|
-
- - "
|
|
83
|
+
- - ">="
|
|
81
84
|
- !ruby/object:Gem::Version
|
|
82
|
-
version: '1
|
|
85
|
+
version: '1'
|
|
86
|
+
- - "<"
|
|
87
|
+
- !ruby/object:Gem::Version
|
|
88
|
+
version: '3'
|
|
83
89
|
- !ruby/object:Gem::Dependency
|
|
84
90
|
name: sys-proctable
|
|
85
91
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -94,20 +100,6 @@ dependencies:
|
|
|
94
100
|
- - "~>"
|
|
95
101
|
- !ruby/object:Gem::Version
|
|
96
102
|
version: '1.0'
|
|
97
|
-
- !ruby/object:Gem::Dependency
|
|
98
|
-
name: bundler
|
|
99
|
-
requirement: !ruby/object:Gem::Requirement
|
|
100
|
-
requirements:
|
|
101
|
-
- - "~>"
|
|
102
|
-
- !ruby/object:Gem::Version
|
|
103
|
-
version: '1.6'
|
|
104
|
-
type: :development
|
|
105
|
-
prerelease: false
|
|
106
|
-
version_requirements: !ruby/object:Gem::Requirement
|
|
107
|
-
requirements:
|
|
108
|
-
- - "~>"
|
|
109
|
-
- !ruby/object:Gem::Version
|
|
110
|
-
version: '1.6'
|
|
111
103
|
- !ruby/object:Gem::Dependency
|
|
112
104
|
name: minitest
|
|
113
105
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -156,42 +148,70 @@ dependencies:
|
|
|
156
148
|
requirements:
|
|
157
149
|
- - "~>"
|
|
158
150
|
- !ruby/object:Gem::Version
|
|
159
|
-
version: '
|
|
151
|
+
version: '13.0'
|
|
160
152
|
type: :development
|
|
161
153
|
prerelease: false
|
|
162
154
|
version_requirements: !ruby/object:Gem::Requirement
|
|
163
155
|
requirements:
|
|
164
156
|
- - "~>"
|
|
165
157
|
- !ruby/object:Gem::Version
|
|
166
|
-
version: '
|
|
158
|
+
version: '13.0'
|
|
167
159
|
- !ruby/object:Gem::Dependency
|
|
168
160
|
name: rubocop
|
|
169
161
|
requirement: !ruby/object:Gem::Requirement
|
|
170
162
|
requirements:
|
|
171
163
|
- - "~>"
|
|
172
164
|
- !ruby/object:Gem::Version
|
|
173
|
-
version: '0
|
|
165
|
+
version: '1.0'
|
|
166
|
+
type: :development
|
|
167
|
+
prerelease: false
|
|
168
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
169
|
+
requirements:
|
|
170
|
+
- - "~>"
|
|
171
|
+
- !ruby/object:Gem::Version
|
|
172
|
+
version: '1.0'
|
|
173
|
+
- !ruby/object:Gem::Dependency
|
|
174
|
+
name: rubocop-minitest
|
|
175
|
+
requirement: !ruby/object:Gem::Requirement
|
|
176
|
+
requirements:
|
|
177
|
+
- - "~>"
|
|
178
|
+
- !ruby/object:Gem::Version
|
|
179
|
+
version: '0.3'
|
|
180
|
+
type: :development
|
|
181
|
+
prerelease: false
|
|
182
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
183
|
+
requirements:
|
|
184
|
+
- - "~>"
|
|
185
|
+
- !ruby/object:Gem::Version
|
|
186
|
+
version: '0.3'
|
|
187
|
+
- !ruby/object:Gem::Dependency
|
|
188
|
+
name: rubocop-performance
|
|
189
|
+
requirement: !ruby/object:Gem::Requirement
|
|
190
|
+
requirements:
|
|
191
|
+
- - "~>"
|
|
192
|
+
- !ruby/object:Gem::Version
|
|
193
|
+
version: '1.5'
|
|
174
194
|
type: :development
|
|
175
195
|
prerelease: false
|
|
176
196
|
version_requirements: !ruby/object:Gem::Requirement
|
|
177
197
|
requirements:
|
|
178
198
|
- - "~>"
|
|
179
199
|
- !ruby/object:Gem::Version
|
|
180
|
-
version: '
|
|
200
|
+
version: '1.5'
|
|
181
201
|
- !ruby/object:Gem::Dependency
|
|
182
202
|
name: shoulda
|
|
183
203
|
requirement: !ruby/object:Gem::Requirement
|
|
184
204
|
requirements:
|
|
185
205
|
- - "~>"
|
|
186
206
|
- !ruby/object:Gem::Version
|
|
187
|
-
version: '
|
|
207
|
+
version: '4.0'
|
|
188
208
|
type: :development
|
|
189
209
|
prerelease: false
|
|
190
210
|
version_requirements: !ruby/object:Gem::Requirement
|
|
191
211
|
requirements:
|
|
192
212
|
- - "~>"
|
|
193
213
|
- !ruby/object:Gem::Version
|
|
194
|
-
version: '
|
|
214
|
+
version: '4.0'
|
|
195
215
|
description: Ruby Gem to convert Word documents to markdown.
|
|
196
216
|
email: ben.balter@github.com
|
|
197
217
|
executables:
|
|
@@ -211,8 +231,9 @@ files:
|
|
|
211
231
|
homepage: https://github.com/benbalter/word-to-markdown
|
|
212
232
|
licenses:
|
|
213
233
|
- MIT
|
|
214
|
-
metadata:
|
|
215
|
-
|
|
234
|
+
metadata:
|
|
235
|
+
rubygems_mfa_required: 'true'
|
|
236
|
+
post_install_message:
|
|
216
237
|
rdoc_options: []
|
|
217
238
|
require_paths:
|
|
218
239
|
- lib
|
|
@@ -227,9 +248,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
227
248
|
- !ruby/object:Gem::Version
|
|
228
249
|
version: '0'
|
|
229
250
|
requirements: []
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
signing_key:
|
|
251
|
+
rubygems_version: 3.5.16
|
|
252
|
+
signing_key:
|
|
233
253
|
specification_version: 4
|
|
234
254
|
summary: Ruby Gem to convert Word documents to markdown
|
|
235
255
|
test_files: []
|