word-to-markdown 1.1.8 → 1.1.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +21 -5
- data/lib/cliver/dependency_ext.rb +4 -3
- data/lib/word-to-markdown/converter.rb +3 -2
- data/lib/word-to-markdown/document.rb +4 -1
- data/lib/word-to-markdown/version.rb +1 -1
- data/lib/word-to-markdown.rb +5 -4
- metadata +52 -32
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f2b816a2ad9402eb1c45f74f806482502608337ac9127d072647bd4f1f97cd39
|
4
|
+
data.tar.gz: ff35f5c9f2e89c0e781ea864552f6f20bafd23083301c4d72c5c95c7aae4f38c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e74c055913709cd0fa871ba95cf22b22a86089bb18fd9e88cd27d1dec3ec3b4927708fc9a591ba278b3e5cdc98150178cd17547ba76579702893645554370c83
|
7
|
+
data.tar.gz: 9b01d816e8f95d43fb19dc828213e3db4f7b0897957c6e3eba42b4f4fe888d3564db3b3730b416f9eb10837ca4911fd7083c73b3556c3313dd12cefe2f8ae597
|
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
A Ruby gem to liberate content from [the jail that is Word documents](http://ben.balter.com/2012/10/19/we-ve-been-trained-to-make-paper/#jailbreaking-content)
|
4
4
|
|
5
|
-
[![
|
5
|
+
[![CI](https://github.com/benbalter/word-to-markdown/actions/workflows/ci.yml/badge.svg)](https://github.com/benbalter/word-to-markdown/actions/workflows/ci.yml) [![Gem Version](https://badge.fury.io/rb/word-to-markdown.png)](http://badge.fury.io/rb/word-to-markdown) [![Inline docs](http://inch-ci.org/github/benbalter/word-to-markdown.png)](http://inch-ci.org/github/benbalter/word-to-markdown) [![Build status](https://ci.appveyor.com/api/projects/status/x2gnsfvli3q47a2e/branch/master?svg=true)](https://ci.appveyor.com/project/benbalter/word-to-markdown/branch/master) [![Maintainability](https://api.codeclimate.com/v1/badges/aae0d67ea7db185f1595/maintainability)](https://codeclimate.com/github/benbalter/word-to-markdown/maintainability) [![Test Coverage](https://api.codeclimate.com/v1/badges/aae0d67ea7db185f1595/test_coverage)](https://codeclimate.com/github/benbalter/word-to-markdown/test_coverage)
|
6
6
|
|
7
7
|
## The problem
|
8
8
|
|
@@ -14,7 +14,9 @@ A Ruby gem to liberate content from [the jail that is Word documents](http://ben
|
|
14
14
|
|
15
15
|
**[Read more](http://ben.balter.com/2014/03/31/word-versus-markdown-more-than-mere-semantics/)**
|
16
16
|
|
17
|
-
|
17
|
+
## Just want to convert a Microsoft Word (or Google) document to Markdown?
|
18
|
+
|
19
|
+
You can use this **[hosted service](https://word2md.com/)** (or check out [its source](https://github.com/benbalter/word-to-markdown-server)).
|
18
20
|
|
19
21
|
## Install
|
20
22
|
|
@@ -71,8 +73,22 @@ Word-to-markdown requires `soffice` a command line interface to LibreOffice that
|
|
71
73
|
script/cibuild
|
72
74
|
```
|
73
75
|
|
74
|
-
##
|
76
|
+
## Docker
|
77
|
+
|
78
|
+
First, create the `Gemfile.lock` by installing the dependencies:
|
79
|
+
|
80
|
+
```
|
81
|
+
bundle install
|
82
|
+
```
|
83
|
+
|
84
|
+
Everything you need to run the executable locally:
|
85
|
+
|
86
|
+
```
|
87
|
+
docker-compose build
|
88
|
+
docker-compose run --rm app bundle exec w2m --help
|
89
|
+
docker-compose run --rm app bundle exec w2m test/fixtures/em.docx
|
90
|
+
```
|
75
91
|
|
76
|
-
|
92
|
+
## Hosted service
|
77
93
|
|
78
|
-
A live version runs at [
|
94
|
+
[Word-to-markdown-server](https://github.com/benbalter/word-to-markdown-server) contains a lightweight server for converting Word Documents as a service. A live version runs at [word2md.com](https://word2md.com).
|
@@ -24,14 +24,15 @@ module Cliver
|
|
24
24
|
|
25
25
|
# Returns the version of the resolved dependency
|
26
26
|
def version
|
27
|
-
return @
|
27
|
+
return @version if defined? @version
|
28
28
|
return if Gem.win_platform?
|
29
|
+
|
29
30
|
version = installed_versions.find { |p, _v| p == path }
|
30
|
-
@
|
31
|
+
@version = version.nil? ? nil : version[1]
|
31
32
|
end
|
32
33
|
|
33
34
|
def major_version
|
34
|
-
version
|
35
|
+
version&.split('.')&.first
|
35
36
|
end
|
36
37
|
end
|
37
38
|
end
|
@@ -58,7 +58,7 @@ class WordToMarkdown
|
|
58
58
|
@document.tree.css('[style]').each do |element|
|
59
59
|
sizes.push element.font_size.round(-1) unless element.font_size.nil?
|
60
60
|
end
|
61
|
-
sizes.uniq.sort
|
61
|
+
sizes.uniq.sort.extend(DescriptiveStatistics)
|
62
62
|
end
|
63
63
|
end
|
64
64
|
|
@@ -68,6 +68,7 @@ class WordToMarkdown
|
|
68
68
|
# @return [String, nil] the heading tag (e.g., H1), or nil
|
69
69
|
def guess_heading(node)
|
70
70
|
return nil if node.font_size.nil?
|
71
|
+
|
71
72
|
[*1...HEADING_DEPTH].each do |heading|
|
72
73
|
return "h#{heading}" if node.font_size >= h(heading)
|
73
74
|
end
|
@@ -109,7 +110,7 @@ class WordToMarkdown
|
|
109
110
|
def remove_unicode_bullets_from_list_items!
|
110
111
|
path = WordToMarkdown.soffice.major_version == '5' ? 'li span span' : 'li span'
|
111
112
|
@document.tree.search(path).each do |span|
|
112
|
-
span.inner_html = span.inner_html.gsub(/^([#{UNICODE_BULLETS.join
|
113
|
+
span.inner_html = span.inner_html.gsub(/^([#{UNICODE_BULLETS.join}]+)/, '')
|
113
114
|
end
|
114
115
|
end
|
115
116
|
|
@@ -3,6 +3,7 @@
|
|
3
3
|
class WordToMarkdown
|
4
4
|
class Document
|
5
5
|
class NotFoundError < StandardError; end
|
6
|
+
|
6
7
|
class ConversionError < StandardError; end
|
7
8
|
|
8
9
|
attr_reader :path, :tmpdir
|
@@ -44,7 +45,7 @@ class WordToMarkdown
|
|
44
45
|
#
|
45
46
|
# @return [String] the encoding, defaulting to "UTF-8"
|
46
47
|
def encoding
|
47
|
-
match = raw_html.encode('UTF-8', invalid: :replace, replace: '').match(/charset=([
|
48
|
+
match = raw_html.encode('UTF-8', invalid: :replace, replace: '').match(/charset=([^"]+)/)
|
48
49
|
if match
|
49
50
|
match[1].sub('macintosh', 'MacRoman')
|
50
51
|
else
|
@@ -81,6 +82,7 @@ class WordToMarkdown
|
|
81
82
|
string.gsub!(/([ ]+)$/, '') # line trailing whitespace
|
82
83
|
string.gsub!(/\n\n\n\n/, "\n\n") # Quadruple line breaks
|
83
84
|
string.delete!(' ') # Unicode non-breaking spaces, injected as tabs
|
85
|
+
string.gsub!(/\*\*\ +(?!\*|_)([[:punct:]])/, '**\1') # Remove extra space after bold
|
84
86
|
string
|
85
87
|
end
|
86
88
|
|
@@ -95,6 +97,7 @@ class WordToMarkdown
|
|
95
97
|
@raw_html ||= begin
|
96
98
|
WordToMarkdown.run_command '--headless', '--convert-to', filter, path, '--outdir', tmpdir
|
97
99
|
raise ConversionError, "Failed to convert #{path}" unless File.exist?(dest_path)
|
100
|
+
|
98
101
|
html = File.read dest_path
|
99
102
|
File.delete dest_path
|
100
103
|
html
|
data/lib/word-to-markdown.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
require 'descriptive_statistics'
|
3
|
+
require 'descriptive_statistics/safe'
|
4
4
|
require 'reverse_markdown'
|
5
5
|
require 'nokogiri-styles'
|
6
6
|
require 'premailer'
|
@@ -22,12 +22,12 @@ class WordToMarkdown
|
|
22
22
|
|
23
23
|
# Options to be passed to Reverse Markdown
|
24
24
|
REVERSE_MARKDOWN_OPTIONS = {
|
25
|
-
unknown_tags:
|
25
|
+
unknown_tags: :bypass,
|
26
26
|
github_flavored: true
|
27
27
|
}.freeze
|
28
28
|
|
29
29
|
# Minimum version of LibreOffice Required
|
30
|
-
SOFFICE_VERSION_REQUIREMENT = '> 4.0'
|
30
|
+
SOFFICE_VERSION_REQUIREMENT = '> 4.0'
|
31
31
|
|
32
32
|
# Paths to look for LibreOffice, in order of preference
|
33
33
|
PATHS = [
|
@@ -66,6 +66,7 @@ class WordToMarkdown
|
|
66
66
|
output, status = Open3.capture2e(soffice.path, *args)
|
67
67
|
logger.debug output
|
68
68
|
raise "Command `#{soffice.path} #{args.join(' ')}` failed: #{output}" if status.exitstatus != 0
|
69
|
+
|
69
70
|
output
|
70
71
|
end
|
71
72
|
|
@@ -85,7 +86,7 @@ class WordToMarkdown
|
|
85
86
|
# @return Logger instance
|
86
87
|
def logger
|
87
88
|
@logger ||= begin
|
88
|
-
logger = Logger.new(
|
89
|
+
logger = Logger.new($stdout)
|
89
90
|
logger.level = Logger::ERROR unless ENV['DEBUG']
|
90
91
|
logger
|
91
92
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: word-to-markdown
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ben Balter
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-01-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: cliver
|
@@ -70,16 +70,22 @@ dependencies:
|
|
70
70
|
name: reverse_markdown
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
|
-
- - "
|
73
|
+
- - ">="
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: '1
|
75
|
+
version: '1'
|
76
|
+
- - "<"
|
77
|
+
- !ruby/object:Gem::Version
|
78
|
+
version: '3'
|
76
79
|
type: :runtime
|
77
80
|
prerelease: false
|
78
81
|
version_requirements: !ruby/object:Gem::Requirement
|
79
82
|
requirements:
|
80
|
-
- - "
|
83
|
+
- - ">="
|
81
84
|
- !ruby/object:Gem::Version
|
82
|
-
version: '1
|
85
|
+
version: '1'
|
86
|
+
- - "<"
|
87
|
+
- !ruby/object:Gem::Version
|
88
|
+
version: '3'
|
83
89
|
- !ruby/object:Gem::Dependency
|
84
90
|
name: sys-proctable
|
85
91
|
requirement: !ruby/object:Gem::Requirement
|
@@ -94,20 +100,6 @@ dependencies:
|
|
94
100
|
- - "~>"
|
95
101
|
- !ruby/object:Gem::Version
|
96
102
|
version: '1.0'
|
97
|
-
- !ruby/object:Gem::Dependency
|
98
|
-
name: bundler
|
99
|
-
requirement: !ruby/object:Gem::Requirement
|
100
|
-
requirements:
|
101
|
-
- - "~>"
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
version: '1.6'
|
104
|
-
type: :development
|
105
|
-
prerelease: false
|
106
|
-
version_requirements: !ruby/object:Gem::Requirement
|
107
|
-
requirements:
|
108
|
-
- - "~>"
|
109
|
-
- !ruby/object:Gem::Version
|
110
|
-
version: '1.6'
|
111
103
|
- !ruby/object:Gem::Dependency
|
112
104
|
name: minitest
|
113
105
|
requirement: !ruby/object:Gem::Requirement
|
@@ -156,42 +148,70 @@ dependencies:
|
|
156
148
|
requirements:
|
157
149
|
- - "~>"
|
158
150
|
- !ruby/object:Gem::Version
|
159
|
-
version: '
|
151
|
+
version: '13.0'
|
160
152
|
type: :development
|
161
153
|
prerelease: false
|
162
154
|
version_requirements: !ruby/object:Gem::Requirement
|
163
155
|
requirements:
|
164
156
|
- - "~>"
|
165
157
|
- !ruby/object:Gem::Version
|
166
|
-
version: '
|
158
|
+
version: '13.0'
|
167
159
|
- !ruby/object:Gem::Dependency
|
168
160
|
name: rubocop
|
169
161
|
requirement: !ruby/object:Gem::Requirement
|
170
162
|
requirements:
|
171
163
|
- - "~>"
|
172
164
|
- !ruby/object:Gem::Version
|
173
|
-
version: '0
|
165
|
+
version: '1.0'
|
166
|
+
type: :development
|
167
|
+
prerelease: false
|
168
|
+
version_requirements: !ruby/object:Gem::Requirement
|
169
|
+
requirements:
|
170
|
+
- - "~>"
|
171
|
+
- !ruby/object:Gem::Version
|
172
|
+
version: '1.0'
|
173
|
+
- !ruby/object:Gem::Dependency
|
174
|
+
name: rubocop-minitest
|
175
|
+
requirement: !ruby/object:Gem::Requirement
|
176
|
+
requirements:
|
177
|
+
- - "~>"
|
178
|
+
- !ruby/object:Gem::Version
|
179
|
+
version: '0.3'
|
180
|
+
type: :development
|
181
|
+
prerelease: false
|
182
|
+
version_requirements: !ruby/object:Gem::Requirement
|
183
|
+
requirements:
|
184
|
+
- - "~>"
|
185
|
+
- !ruby/object:Gem::Version
|
186
|
+
version: '0.3'
|
187
|
+
- !ruby/object:Gem::Dependency
|
188
|
+
name: rubocop-performance
|
189
|
+
requirement: !ruby/object:Gem::Requirement
|
190
|
+
requirements:
|
191
|
+
- - "~>"
|
192
|
+
- !ruby/object:Gem::Version
|
193
|
+
version: '1.5'
|
174
194
|
type: :development
|
175
195
|
prerelease: false
|
176
196
|
version_requirements: !ruby/object:Gem::Requirement
|
177
197
|
requirements:
|
178
198
|
- - "~>"
|
179
199
|
- !ruby/object:Gem::Version
|
180
|
-
version: '
|
200
|
+
version: '1.5'
|
181
201
|
- !ruby/object:Gem::Dependency
|
182
202
|
name: shoulda
|
183
203
|
requirement: !ruby/object:Gem::Requirement
|
184
204
|
requirements:
|
185
205
|
- - "~>"
|
186
206
|
- !ruby/object:Gem::Version
|
187
|
-
version: '
|
207
|
+
version: '4.0'
|
188
208
|
type: :development
|
189
209
|
prerelease: false
|
190
210
|
version_requirements: !ruby/object:Gem::Requirement
|
191
211
|
requirements:
|
192
212
|
- - "~>"
|
193
213
|
- !ruby/object:Gem::Version
|
194
|
-
version: '
|
214
|
+
version: '4.0'
|
195
215
|
description: Ruby Gem to convert Word documents to markdown.
|
196
216
|
email: ben.balter@github.com
|
197
217
|
executables:
|
@@ -211,8 +231,9 @@ files:
|
|
211
231
|
homepage: https://github.com/benbalter/word-to-markdown
|
212
232
|
licenses:
|
213
233
|
- MIT
|
214
|
-
metadata:
|
215
|
-
|
234
|
+
metadata:
|
235
|
+
rubygems_mfa_required: 'true'
|
236
|
+
post_install_message:
|
216
237
|
rdoc_options: []
|
217
238
|
require_paths:
|
218
239
|
- lib
|
@@ -227,9 +248,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
227
248
|
- !ruby/object:Gem::Version
|
228
249
|
version: '0'
|
229
250
|
requirements: []
|
230
|
-
|
231
|
-
|
232
|
-
signing_key:
|
251
|
+
rubygems_version: 3.5.16
|
252
|
+
signing_key:
|
233
253
|
specification_version: 4
|
234
254
|
summary: Ruby Gem to convert Word documents to markdown
|
235
255
|
test_files: []
|