ghostwriter 1.2.1 → 1.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ccebf53b35ad212e0c7e353a3cb4d79e1f91c63cec6490c5fff7ff56b72eed70
4
- data.tar.gz: 5b3d36839dcef24d80605421970eb39e17efe3ec24d3126d753ac7a5039512a5
3
+ metadata.gz: c2bacc9e2d98409f7220a3b21d0cc57f18ff9c6768fb3175c0f06fdbef767e95
4
+ data.tar.gz: 5dba28996b1589fb94c0edbf7b3897a7610267438389ed57dbb009377544be93
5
5
  SHA512:
6
- metadata.gz: d6039d9d2b3c1d6606da533c108c8087faa8b985ac0bf6adac7bc1ad4310cc601f76840ff32e83255847bedbd77dbc6cc280054f6eb4975dbe815a98b2a07373
7
- data.tar.gz: d005669ca03f3ff465c351df0e47eba0372ca6061057102e14b0d879ddb0c5191a88bfaa389fa82a865787e6ff3b5d3fb351ae8b6351f108aef404a3bd66dd7a
6
+ metadata.gz: 101966376b10eec407d0a5b98bdb11ddfa3830c4532bc77b5e4ad4aeba41e05925c72dd78d25b6fc34f507f7ea9a32809757f735f8d6b6aa15d079a436c0db9b
7
+ data.tar.gz: dc870788a2971023a8276369d2953aa68f801edf31eeefd7bd6513400d453cf6e02fa728856f67a892e91fd8535c5607cca1cb244302d18e76bdc1c089b208e3
data/.rubocop.yml CHANGED
@@ -1 +1 @@
1
- inherit_from: ../.rubocop.yml
1
+ inherit_from: ~/.config/rubocop/config.yml
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- ruby-2.7.1
1
+ ruby-3.3.0
data/.simplecov ADDED
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ SimpleCov.start do
4
+ coverage_dir '.coverage'
5
+ enable_coverage :branch
6
+
7
+ root __dir__
8
+
9
+ # add_filter 'spec/'
10
+ end
data/Gemfile CHANGED
@@ -2,5 +2,17 @@
2
2
 
3
3
  source 'https://rubygems.org'
4
4
 
5
- # Specify gem dependencies in ghostwriter.gemspec
5
+ # Gem dependencies in ghostwriter.gemspec
6
6
  gemspec
7
+
8
+ group :development do
9
+ gem 'bundler', '~> 2.4'
10
+ gem 'rake', '~> 13.2'
11
+ gem 'rubocop', '~> 1.69'
12
+ gem 'rubocop-performance', '~> 1.23'
13
+ end
14
+
15
+ group :test do
16
+ gem 'rspec', '~> 3.13'
17
+ gem 'simplecov', '~> 0.22'
18
+ end
data/RELEASE_NOTES.md CHANGED
@@ -1,5 +1,34 @@
1
1
  # Release Notes
2
2
 
3
+ ## 1.4.0 (2023-09-13)
4
+
5
+ ### Major
6
+
7
+ * none
8
+
9
+ ### Minor
10
+
11
+ * Support for graceful handling of nested tables
12
+ * Increased minimum ruby to 3.3
13
+
14
+ ### Bugfixes
15
+
16
+ * none
17
+
18
+ ## 1.3.0 (2023-09-13)
19
+
20
+ ### Major
21
+
22
+ * none
23
+
24
+ ### Minor
25
+
26
+ * Increased minimum Ruby to 3.1
27
+
28
+ ### Bugfixes
29
+
30
+ * none
31
+
3
32
  ## 1.2.1 (2021-10-29)
4
33
 
5
34
  ### Major
data/Rakefile CHANGED
@@ -1,5 +1,9 @@
1
+ #!/usr/bin/env rake
1
2
  # frozen_string_literal: true
2
3
 
4
+ # No longer require bundle exec
5
+ Gem.use_gemdeps 'Gemfile'
6
+
3
7
  require 'bundler/gem_tasks'
4
8
  require 'rspec/core/rake_task'
5
9
 
@@ -19,6 +19,10 @@ Gem::Specification.new do |spec|
19
19
  spec.homepage = 'https://github.com/TenjinInc/ghostwriter'
20
20
  spec.license = 'MIT'
21
21
 
22
+ spec.metadata = {
23
+ 'rubygems_mfa_required' => 'true'
24
+ }
25
+
22
26
  spec.files = `git ls-files -z`.split("\x0").reject do |f|
23
27
  f.match(%r{^(test|spec|features)/})
24
28
  end
@@ -27,13 +31,7 @@ Gem::Specification.new do |spec|
27
31
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
28
32
  spec.require_paths = ['lib']
29
33
 
30
- spec.required_ruby_version = '>= 2.7'
34
+ spec.required_ruby_version = '>= 3.3'
31
35
 
32
36
  spec.add_dependency 'nokogiri', '>= 1.12'
33
-
34
- spec.add_development_dependency 'bundler', '~> 2.2'
35
- spec.add_development_dependency 'rake', '~> 13.0'
36
- spec.add_development_dependency 'rspec', '~> 3.3'
37
- spec.add_development_dependency 'rubocop', '~> 1.22'
38
- spec.add_development_dependency 'rubocop-performance', '~> 1.11'
39
37
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Ghostwriter
4
- VERSION = '1.2.1'
4
+ VERSION = '1.4.0'
5
5
  end
@@ -49,6 +49,12 @@ module Ghostwriter
49
49
 
50
50
  private
51
51
 
52
+ def simple_replace(doc, tag, replacement)
53
+ doc.search(tag).each do |node|
54
+ node.replace(node.inner_html + replacement)
55
+ end
56
+ end
57
+
52
58
  def normalize_lines(doc)
53
59
  doc.text.strip.split("\n").collect(&:strip).join("\n").concat("\n")
54
60
  end
@@ -114,78 +120,103 @@ module Ghostwriter
114
120
  end
115
121
  end
116
122
 
117
- def replace_lists(doc)
118
- doc.search('ol').each do |list_node|
119
- replace_list_items(list_node, @ol_marker, after_marker: '.', increment: true)
120
- end
123
+ # Methods for processing lists
124
+ module ListWriter
125
+ def replace_lists(doc)
126
+ doc.search('ol').each do |list_node|
127
+ replace_list_items(list_node, @ol_marker, after_marker: '.', increment: true)
128
+ end
121
129
 
122
- doc.search('ul').each do |list_node|
123
- replace_list_items(list_node, @ul_marker)
124
- end
130
+ doc.search('ul').each do |list_node|
131
+ replace_list_items(list_node, @ul_marker)
132
+ end
125
133
 
126
- doc.search('ul, ol').each do |list_node|
127
- list_node.replace("#{ list_node.inner_html }\n")
134
+ doc.search('ul, ol').each do |list_node|
135
+ list_node.replace("#{ list_node.inner_html }\n")
136
+ end
128
137
  end
129
- end
130
138
 
131
- def replace_list_items(list_node, marker, after_marker: '', increment: false)
132
- list_node.search('./li').each do |list_item|
133
- list_item.replace("#{ marker }#{ after_marker } #{ list_item.inner_html }\n")
139
+ def replace_list_items(list_node, marker, after_marker: '', increment: false)
140
+ list_node.search('./li').each do |list_item|
141
+ list_item.replace("#{ marker }#{ after_marker } #{ list_item.inner_html }\n")
134
142
 
135
- marker = marker.next if increment
143
+ marker = marker.next if increment
144
+ end
136
145
  end
137
146
  end
138
147
 
139
- def replace_tables(doc)
140
- doc.css('table').each do |table|
141
- # remove whitespace between nodes
142
- table.search('//text()[normalize-space()=""]').remove
148
+ # Methods for processing tables
149
+ module TableWriter
150
+ def replace_tables(doc)
151
+ doc.css('table').each do |table|
152
+ # TODO: nokogiri chokes on table:not(table table), but support may come later https://github.com/sparklemotion/nokogiri/issues/3207
153
+ next if complex? table
143
154
 
144
- column_sizes = calculate_column_sizes(table)
155
+ # remove whitespace between nodes
156
+ table.search('//text()[normalize-space()=""]').remove
145
157
 
146
- table.search('./thead/tr', './tbody/tr', './tr').each do |row|
147
- replace_table_nodes(row, column_sizes)
158
+ column_sizes = calculate_column_sizes(table)
148
159
 
149
- row.replace("#{ row.inner_html }#{ @table_column }\n")
160
+ fancy_rows! table, column_sizes
161
+
162
+ add_table_header_underline(table, column_sizes)
163
+
164
+ table.replace("\n#{ table.inner_html }\n")
150
165
  end
151
166
 
152
- add_table_header_underline(table, column_sizes)
167
+ doc.css('table table').each do |table|
168
+ simple_rows! table
169
+ end
170
+ end
153
171
 
154
- table.replace("\n#{ table.inner_html }\n")
172
+ # complex just means it contains a table or is itself nested
173
+ def complex?(table)
174
+ !table.css('table').empty? || !table.ancestors('table').empty?
155
175
  end
156
- end
157
176
 
158
- def calculate_column_sizes(table)
159
- column_sizes = table.search('tr').collect do |row|
160
- row.search('th', 'td').collect do |node|
161
- node.text.length
177
+ def fancy_rows!(table, column_sizes)
178
+ table.search('./thead/tr', './tbody/tr', './tr').each do |row|
179
+ replace_table_nodes(row, column_sizes)
180
+
181
+ row.replace("#{ row.inner_html }#{ @table_column }\n")
162
182
  end
163
183
  end
164
184
 
165
- column_sizes.transpose.collect(&:max)
166
- end
185
+ def simple_rows!(table)
186
+ table.search('./thead/tr', './tbody/tr', './tr').each do |row|
187
+ row.replace("\n#{ row.inner_html }\n")
188
+ end
189
+ end
167
190
 
168
- def add_table_header_underline(table, column_sizes)
169
- table.search('./thead').each do |thead|
170
- lines = column_sizes.collect { |len| @table_row * (len + 2) }
171
- underline_row = "#{ table_corner }#{ lines.join(@table_corner) }#{ @table_corner }"
191
+ def calculate_column_sizes(table)
192
+ column_sizes = table.search('tr').collect do |row|
193
+ row.search('th', 'td').collect do |node|
194
+ node.text.length
195
+ end
196
+ end
172
197
 
173
- thead.replace("#{ thead.inner_html }#{ underline_row }\n")
198
+ column_sizes.transpose.collect(&:max)
174
199
  end
175
- end
176
200
 
177
- def replace_table_nodes(row, column_sizes)
178
- row.search('th', 'td').each_with_index do |node, i|
179
- new_content = node.text.ljust(column_sizes[i] + 1)
201
+ def add_table_header_underline(table, column_sizes)
202
+ table.search('./thead').each do |thead|
203
+ lines = column_sizes.collect { |len| @table_row * (len + 2) }
204
+ underline_row = "#{ table_corner }#{ lines.join(@table_corner) }#{ @table_corner }"
180
205
 
181
- node.replace("#{ @table_column } #{ new_content }")
206
+ thead.replace("#{ thead.inner_html }#{ underline_row }\n")
207
+ end
182
208
  end
183
- end
184
209
 
185
- def simple_replace(doc, tag, replacement)
186
- doc.search(tag).each do |node|
187
- node.replace(node.inner_html + replacement)
210
+ def replace_table_nodes(row, column_sizes)
211
+ row.search('th', 'td').each_with_index do |node, i|
212
+ new_content = node.text.ljust(column_sizes[i] + 1)
213
+
214
+ node.replace("#{ @table_column } #{ new_content }")
215
+ end
188
216
  end
189
217
  end
218
+
219
+ include ListWriter
220
+ include TableWriter
190
221
  end
191
222
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ghostwriter
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.1
4
+ version: 1.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Robin Miller
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-10-29 00:00:00.000000000 Z
11
+ date: 2025-01-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -24,76 +24,6 @@ dependencies:
24
24
  - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.12'
27
- - !ruby/object:Gem::Dependency
28
- name: bundler
29
- requirement: !ruby/object:Gem::Requirement
30
- requirements:
31
- - - "~>"
32
- - !ruby/object:Gem::Version
33
- version: '2.2'
34
- type: :development
35
- prerelease: false
36
- version_requirements: !ruby/object:Gem::Requirement
37
- requirements:
38
- - - "~>"
39
- - !ruby/object:Gem::Version
40
- version: '2.2'
41
- - !ruby/object:Gem::Dependency
42
- name: rake
43
- requirement: !ruby/object:Gem::Requirement
44
- requirements:
45
- - - "~>"
46
- - !ruby/object:Gem::Version
47
- version: '13.0'
48
- type: :development
49
- prerelease: false
50
- version_requirements: !ruby/object:Gem::Requirement
51
- requirements:
52
- - - "~>"
53
- - !ruby/object:Gem::Version
54
- version: '13.0'
55
- - !ruby/object:Gem::Dependency
56
- name: rspec
57
- requirement: !ruby/object:Gem::Requirement
58
- requirements:
59
- - - "~>"
60
- - !ruby/object:Gem::Version
61
- version: '3.3'
62
- type: :development
63
- prerelease: false
64
- version_requirements: !ruby/object:Gem::Requirement
65
- requirements:
66
- - - "~>"
67
- - !ruby/object:Gem::Version
68
- version: '3.3'
69
- - !ruby/object:Gem::Dependency
70
- name: rubocop
71
- requirement: !ruby/object:Gem::Requirement
72
- requirements:
73
- - - "~>"
74
- - !ruby/object:Gem::Version
75
- version: '1.22'
76
- type: :development
77
- prerelease: false
78
- version_requirements: !ruby/object:Gem::Requirement
79
- requirements:
80
- - - "~>"
81
- - !ruby/object:Gem::Version
82
- version: '1.22'
83
- - !ruby/object:Gem::Dependency
84
- name: rubocop-performance
85
- requirement: !ruby/object:Gem::Requirement
86
- requirements:
87
- - - "~>"
88
- - !ruby/object:Gem::Version
89
- version: '1.11'
90
- type: :development
91
- prerelease: false
92
- version_requirements: !ruby/object:Gem::Requirement
93
- requirements:
94
- - - "~>"
95
- - !ruby/object:Gem::Version
96
- version: '1.11'
97
27
  description: |
98
28
  Converts HTML to plain text, preserving as much legibility and functionality as possible.
99
29
 
@@ -108,6 +38,7 @@ files:
108
38
  - ".rspec"
109
39
  - ".rubocop.yml"
110
40
  - ".ruby-version"
41
+ - ".simplecov"
111
42
  - ".travis.yml"
112
43
  - CODE_OF_CONDUCT.md
113
44
  - Gemfile
@@ -117,14 +48,15 @@ files:
117
48
  - Rakefile
118
49
  - bin/console
119
50
  - bin/setup
120
- - dirt-textify.gemspec
51
+ - ghostwriter.gemspec
121
52
  - lib/ghostwriter.rb
122
53
  - lib/ghostwriter/version.rb
123
54
  - lib/ghostwriter/writer.rb
124
55
  homepage: https://github.com/TenjinInc/ghostwriter
125
56
  licenses:
126
57
  - MIT
127
- metadata: {}
58
+ metadata:
59
+ rubygems_mfa_required: 'true'
128
60
  post_install_message:
129
61
  rdoc_options: []
130
62
  require_paths:
@@ -133,14 +65,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
133
65
  requirements:
134
66
  - - ">="
135
67
  - !ruby/object:Gem::Version
136
- version: '2.7'
68
+ version: '3.3'
137
69
  required_rubygems_version: !ruby/object:Gem::Requirement
138
70
  requirements:
139
71
  - - ">="
140
72
  - !ruby/object:Gem::Version
141
73
  version: '0'
142
74
  requirements: []
143
- rubygems_version: 3.1.2
75
+ rubygems_version: 3.5.3
144
76
  signing_key:
145
77
  specification_version: 4
146
78
  summary: Converts HTML to plain text