ghostwriter 1.2.1 → 1.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/.ruby-version +1 -1
- data/.simplecov +10 -0
- data/Gemfile +13 -1
- data/RELEASE_NOTES.md +29 -0
- data/Rakefile +4 -0
- data/{dirt-textify.gemspec → ghostwriter.gemspec} +5 -7
- data/lib/ghostwriter/version.rb +1 -1
- data/lib/ghostwriter/writer.rb +76 -45
- metadata +8 -76
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c2bacc9e2d98409f7220a3b21d0cc57f18ff9c6768fb3175c0f06fdbef767e95
|
4
|
+
data.tar.gz: 5dba28996b1589fb94c0edbf7b3897a7610267438389ed57dbb009377544be93
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 101966376b10eec407d0a5b98bdb11ddfa3830c4532bc77b5e4ad4aeba41e05925c72dd78d25b6fc34f507f7ea9a32809757f735f8d6b6aa15d079a436c0db9b
|
7
|
+
data.tar.gz: dc870788a2971023a8276369d2953aa68f801edf31eeefd7bd6513400d453cf6e02fa728856f67a892e91fd8535c5607cca1cb244302d18e76bdc1c089b208e3
|
data/.rubocop.yml
CHANGED
@@ -1 +1 @@
|
|
1
|
-
inherit_from:
|
1
|
+
inherit_from: ~/.config/rubocop/config.yml
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
ruby-
|
1
|
+
ruby-3.3.0
|
data/.simplecov
ADDED
data/Gemfile
CHANGED
@@ -2,5 +2,17 @@
|
|
2
2
|
|
3
3
|
source 'https://rubygems.org'
|
4
4
|
|
5
|
-
#
|
5
|
+
# Gem dependencies in ghostwriter.gemspec
|
6
6
|
gemspec
|
7
|
+
|
8
|
+
group :development do
|
9
|
+
gem 'bundler', '~> 2.4'
|
10
|
+
gem 'rake', '~> 13.2'
|
11
|
+
gem 'rubocop', '~> 1.69'
|
12
|
+
gem 'rubocop-performance', '~> 1.23'
|
13
|
+
end
|
14
|
+
|
15
|
+
group :test do
|
16
|
+
gem 'rspec', '~> 3.13'
|
17
|
+
gem 'simplecov', '~> 0.22'
|
18
|
+
end
|
data/RELEASE_NOTES.md
CHANGED
@@ -1,5 +1,34 @@
|
|
1
1
|
# Release Notes
|
2
2
|
|
3
|
+
## 1.4.0 (2023-09-13)
|
4
|
+
|
5
|
+
### Major
|
6
|
+
|
7
|
+
* none
|
8
|
+
|
9
|
+
### Minor
|
10
|
+
|
11
|
+
* Support for graceful handling of nested tables
|
12
|
+
* Increased minimum ruby to 3.3
|
13
|
+
|
14
|
+
### Bugfixes
|
15
|
+
|
16
|
+
* none
|
17
|
+
|
18
|
+
## 1.3.0 (2023-09-13)
|
19
|
+
|
20
|
+
### Major
|
21
|
+
|
22
|
+
* none
|
23
|
+
|
24
|
+
### Minor
|
25
|
+
|
26
|
+
* Increased minimum Ruby to 3.1
|
27
|
+
|
28
|
+
### Bugfixes
|
29
|
+
|
30
|
+
* none
|
31
|
+
|
3
32
|
## 1.2.1 (2021-10-29)
|
4
33
|
|
5
34
|
### Major
|
data/Rakefile
CHANGED
@@ -19,6 +19,10 @@ Gem::Specification.new do |spec|
|
|
19
19
|
spec.homepage = 'https://github.com/TenjinInc/ghostwriter'
|
20
20
|
spec.license = 'MIT'
|
21
21
|
|
22
|
+
spec.metadata = {
|
23
|
+
'rubygems_mfa_required' => 'true'
|
24
|
+
}
|
25
|
+
|
22
26
|
spec.files = `git ls-files -z`.split("\x0").reject do |f|
|
23
27
|
f.match(%r{^(test|spec|features)/})
|
24
28
|
end
|
@@ -27,13 +31,7 @@ Gem::Specification.new do |spec|
|
|
27
31
|
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
28
32
|
spec.require_paths = ['lib']
|
29
33
|
|
30
|
-
spec.required_ruby_version = '>=
|
34
|
+
spec.required_ruby_version = '>= 3.3'
|
31
35
|
|
32
36
|
spec.add_dependency 'nokogiri', '>= 1.12'
|
33
|
-
|
34
|
-
spec.add_development_dependency 'bundler', '~> 2.2'
|
35
|
-
spec.add_development_dependency 'rake', '~> 13.0'
|
36
|
-
spec.add_development_dependency 'rspec', '~> 3.3'
|
37
|
-
spec.add_development_dependency 'rubocop', '~> 1.22'
|
38
|
-
spec.add_development_dependency 'rubocop-performance', '~> 1.11'
|
39
37
|
end
|
data/lib/ghostwriter/version.rb
CHANGED
data/lib/ghostwriter/writer.rb
CHANGED
@@ -49,6 +49,12 @@ module Ghostwriter
|
|
49
49
|
|
50
50
|
private
|
51
51
|
|
52
|
+
def simple_replace(doc, tag, replacement)
|
53
|
+
doc.search(tag).each do |node|
|
54
|
+
node.replace(node.inner_html + replacement)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
52
58
|
def normalize_lines(doc)
|
53
59
|
doc.text.strip.split("\n").collect(&:strip).join("\n").concat("\n")
|
54
60
|
end
|
@@ -114,78 +120,103 @@ module Ghostwriter
|
|
114
120
|
end
|
115
121
|
end
|
116
122
|
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
123
|
+
# Methods for processing lists
|
124
|
+
module ListWriter
|
125
|
+
def replace_lists(doc)
|
126
|
+
doc.search('ol').each do |list_node|
|
127
|
+
replace_list_items(list_node, @ol_marker, after_marker: '.', increment: true)
|
128
|
+
end
|
121
129
|
|
122
|
-
|
123
|
-
|
124
|
-
|
130
|
+
doc.search('ul').each do |list_node|
|
131
|
+
replace_list_items(list_node, @ul_marker)
|
132
|
+
end
|
125
133
|
|
126
|
-
|
127
|
-
|
134
|
+
doc.search('ul, ol').each do |list_node|
|
135
|
+
list_node.replace("#{ list_node.inner_html }\n")
|
136
|
+
end
|
128
137
|
end
|
129
|
-
end
|
130
138
|
|
131
|
-
|
132
|
-
|
133
|
-
|
139
|
+
def replace_list_items(list_node, marker, after_marker: '', increment: false)
|
140
|
+
list_node.search('./li').each do |list_item|
|
141
|
+
list_item.replace("#{ marker }#{ after_marker } #{ list_item.inner_html }\n")
|
134
142
|
|
135
|
-
|
143
|
+
marker = marker.next if increment
|
144
|
+
end
|
136
145
|
end
|
137
146
|
end
|
138
147
|
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
148
|
+
# Methods for processing tables
|
149
|
+
module TableWriter
|
150
|
+
def replace_tables(doc)
|
151
|
+
doc.css('table').each do |table|
|
152
|
+
# TODO: nokogiri chokes on table:not(table table), but support may come later https://github.com/sparklemotion/nokogiri/issues/3207
|
153
|
+
next if complex? table
|
143
154
|
|
144
|
-
|
155
|
+
# remove whitespace between nodes
|
156
|
+
table.search('//text()[normalize-space()=""]').remove
|
145
157
|
|
146
|
-
|
147
|
-
replace_table_nodes(row, column_sizes)
|
158
|
+
column_sizes = calculate_column_sizes(table)
|
148
159
|
|
149
|
-
|
160
|
+
fancy_rows! table, column_sizes
|
161
|
+
|
162
|
+
add_table_header_underline(table, column_sizes)
|
163
|
+
|
164
|
+
table.replace("\n#{ table.inner_html }\n")
|
150
165
|
end
|
151
166
|
|
152
|
-
|
167
|
+
doc.css('table table').each do |table|
|
168
|
+
simple_rows! table
|
169
|
+
end
|
170
|
+
end
|
153
171
|
|
154
|
-
|
172
|
+
# complex just means it contains a table or is itself nested
|
173
|
+
def complex?(table)
|
174
|
+
!table.css('table').empty? || !table.ancestors('table').empty?
|
155
175
|
end
|
156
|
-
end
|
157
176
|
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
177
|
+
def fancy_rows!(table, column_sizes)
|
178
|
+
table.search('./thead/tr', './tbody/tr', './tr').each do |row|
|
179
|
+
replace_table_nodes(row, column_sizes)
|
180
|
+
|
181
|
+
row.replace("#{ row.inner_html }#{ @table_column }\n")
|
162
182
|
end
|
163
183
|
end
|
164
184
|
|
165
|
-
|
166
|
-
|
185
|
+
def simple_rows!(table)
|
186
|
+
table.search('./thead/tr', './tbody/tr', './tr').each do |row|
|
187
|
+
row.replace("\n#{ row.inner_html }\n")
|
188
|
+
end
|
189
|
+
end
|
167
190
|
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
191
|
+
def calculate_column_sizes(table)
|
192
|
+
column_sizes = table.search('tr').collect do |row|
|
193
|
+
row.search('th', 'td').collect do |node|
|
194
|
+
node.text.length
|
195
|
+
end
|
196
|
+
end
|
172
197
|
|
173
|
-
|
198
|
+
column_sizes.transpose.collect(&:max)
|
174
199
|
end
|
175
|
-
end
|
176
200
|
|
177
|
-
|
178
|
-
|
179
|
-
|
201
|
+
def add_table_header_underline(table, column_sizes)
|
202
|
+
table.search('./thead').each do |thead|
|
203
|
+
lines = column_sizes.collect { |len| @table_row * (len + 2) }
|
204
|
+
underline_row = "#{ table_corner }#{ lines.join(@table_corner) }#{ @table_corner }"
|
180
205
|
|
181
|
-
|
206
|
+
thead.replace("#{ thead.inner_html }#{ underline_row }\n")
|
207
|
+
end
|
182
208
|
end
|
183
|
-
end
|
184
209
|
|
185
|
-
|
186
|
-
|
187
|
-
|
210
|
+
def replace_table_nodes(row, column_sizes)
|
211
|
+
row.search('th', 'td').each_with_index do |node, i|
|
212
|
+
new_content = node.text.ljust(column_sizes[i] + 1)
|
213
|
+
|
214
|
+
node.replace("#{ @table_column } #{ new_content }")
|
215
|
+
end
|
188
216
|
end
|
189
217
|
end
|
218
|
+
|
219
|
+
include ListWriter
|
220
|
+
include TableWriter
|
190
221
|
end
|
191
222
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ghostwriter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Robin Miller
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-01-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -24,76 +24,6 @@ dependencies:
|
|
24
24
|
- - ">="
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.12'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: bundler
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - "~>"
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: '2.2'
|
34
|
-
type: :development
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - "~>"
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: '2.2'
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: rake
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - "~>"
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '13.0'
|
48
|
-
type: :development
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - "~>"
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '13.0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: rspec
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - "~>"
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '3.3'
|
62
|
-
type: :development
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - "~>"
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '3.3'
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: rubocop
|
71
|
-
requirement: !ruby/object:Gem::Requirement
|
72
|
-
requirements:
|
73
|
-
- - "~>"
|
74
|
-
- !ruby/object:Gem::Version
|
75
|
-
version: '1.22'
|
76
|
-
type: :development
|
77
|
-
prerelease: false
|
78
|
-
version_requirements: !ruby/object:Gem::Requirement
|
79
|
-
requirements:
|
80
|
-
- - "~>"
|
81
|
-
- !ruby/object:Gem::Version
|
82
|
-
version: '1.22'
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: rubocop-performance
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
86
|
-
requirements:
|
87
|
-
- - "~>"
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: '1.11'
|
90
|
-
type: :development
|
91
|
-
prerelease: false
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
93
|
-
requirements:
|
94
|
-
- - "~>"
|
95
|
-
- !ruby/object:Gem::Version
|
96
|
-
version: '1.11'
|
97
27
|
description: |
|
98
28
|
Converts HTML to plain text, preserving as much legibility and functionality as possible.
|
99
29
|
|
@@ -108,6 +38,7 @@ files:
|
|
108
38
|
- ".rspec"
|
109
39
|
- ".rubocop.yml"
|
110
40
|
- ".ruby-version"
|
41
|
+
- ".simplecov"
|
111
42
|
- ".travis.yml"
|
112
43
|
- CODE_OF_CONDUCT.md
|
113
44
|
- Gemfile
|
@@ -117,14 +48,15 @@ files:
|
|
117
48
|
- Rakefile
|
118
49
|
- bin/console
|
119
50
|
- bin/setup
|
120
|
-
-
|
51
|
+
- ghostwriter.gemspec
|
121
52
|
- lib/ghostwriter.rb
|
122
53
|
- lib/ghostwriter/version.rb
|
123
54
|
- lib/ghostwriter/writer.rb
|
124
55
|
homepage: https://github.com/TenjinInc/ghostwriter
|
125
56
|
licenses:
|
126
57
|
- MIT
|
127
|
-
metadata:
|
58
|
+
metadata:
|
59
|
+
rubygems_mfa_required: 'true'
|
128
60
|
post_install_message:
|
129
61
|
rdoc_options: []
|
130
62
|
require_paths:
|
@@ -133,14 +65,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
133
65
|
requirements:
|
134
66
|
- - ">="
|
135
67
|
- !ruby/object:Gem::Version
|
136
|
-
version: '
|
68
|
+
version: '3.3'
|
137
69
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
138
70
|
requirements:
|
139
71
|
- - ">="
|
140
72
|
- !ruby/object:Gem::Version
|
141
73
|
version: '0'
|
142
74
|
requirements: []
|
143
|
-
rubygems_version: 3.
|
75
|
+
rubygems_version: 3.5.3
|
144
76
|
signing_key:
|
145
77
|
specification_version: 4
|
146
78
|
summary: Converts HTML to plain text
|