abbreviato 0.10.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/README.md +3 -2
- data/Rakefile +5 -27
- data/lib/abbreviato.rb +3 -1
- data/lib/abbreviato/abbreviato.rb +20 -6
- data/lib/abbreviato/truncated_sax_document.rb +20 -16
- data/lib/abbreviato/version.rb +3 -1
- metadata +27 -28
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: e2c3faae5fca5550963dcc873dae508f744c098e67be55be28ca20d38821f051
|
4
|
+
data.tar.gz: 1c5a437b8d174d975f9a047a422c29832120ac9aa1544b9df37f56f422b90f9e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f7b87ac3291fb6afb74056e60e3442b2963e4782240c4cfc3b65c01095446b26936ca6d88d4255d6f750268ea41773d9833b9642f04dfc6f07deef3840715221
|
7
|
+
data.tar.gz: 7eb14d424857d25c063cae759c121c7ef56674dab33e5e21fc55e27912d581848d9e323843141ee3a669c5ec9151a6dd25cf1951a666ba046977b293f4bfaf9d
|
data/README.md
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# abbreviato
|
2
2
|
|
3
|
-
[](https://github.com/zendesk/abbreviato/actions/workflows/actions.yml)
|
4
|
+
[](https://github.com/zendesk/abbreviato/actions/workflows/security.yml)
|
4
5
|
[](https://rubygems.org/gems/abbreviato)
|
5
6
|
|
6
7
|
*abbreviato* is a Ruby library for truncating HTML strings keeping the markup valid. It is a fork of [jorgemanrubia/truncato](https://github.com/jorgemanrubia/truncato) but focused on truncating to a bytesize, not on a per-character basis.
|
@@ -47,7 +48,7 @@ rspec spec/abbreviato/abbreviato_spec.rb:357
|
|
47
48
|
## Running all checks
|
48
49
|
|
49
50
|
```ruby
|
50
|
-
bundle exec
|
51
|
+
bundle exec rake spec
|
51
52
|
```
|
52
53
|
|
53
54
|
## Contribute
|
data/Rakefile
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'bundler/setup'
|
4
|
-
require 'wwtd/tasks'
|
5
4
|
require 'bundler/gem_tasks'
|
6
5
|
require 'bump/tasks'
|
7
6
|
|
@@ -10,19 +9,11 @@ Bundler::GemHelper.install_tasks
|
|
10
9
|
require 'rspec/core/rake_task'
|
11
10
|
RSpec::Core::RakeTask.new(:spec)
|
12
11
|
|
13
|
-
if %w[development test].include?(ENV[
|
14
|
-
def run_command(command)
|
15
|
-
result = `#{command}`
|
16
|
-
result.force_encoding('binary')
|
17
|
-
raise "Command #{command} failed: #{result}" unless $?.success?
|
18
|
-
|
19
|
-
result
|
20
|
-
end
|
21
|
-
|
12
|
+
if %w[development test].include?(ENV['RAILS_ENV'] ||= 'development')
|
22
13
|
require 'bundler/audit/task'
|
23
14
|
Bundler::Audit::Task.new
|
24
15
|
|
25
|
-
desc
|
16
|
+
desc 'Analyze for code duplication (large, identical syntax trees) with fuzzy matching.'
|
26
17
|
task :flay do
|
27
18
|
require 'flay'
|
28
19
|
flay = Flay.run(%w[bin config lib script])
|
@@ -35,23 +26,10 @@ if %w[development test].include?(ENV["RAILS_ENV"] ||= 'development')
|
|
35
26
|
require 'rubocop/rake_task'
|
36
27
|
RuboCop::RakeTask.new
|
37
28
|
|
38
|
-
|
39
|
-
puts "Running brakecheck..."
|
40
|
-
%w[brakecheck brakeman bundler-audit flay rubocop].each do |gem_name|
|
41
|
-
result = `brakecheck #{gem_name}`
|
42
|
-
result.force_encoding('binary')
|
43
|
-
if $?.success?
|
44
|
-
puts "✔ #{gem_name}"
|
45
|
-
else
|
46
|
-
raise "✘ #{gem_name}'s brakecheck failed: #{result}"
|
47
|
-
end
|
48
|
-
end
|
49
|
-
true
|
50
|
-
end
|
51
|
-
|
29
|
+
desc 'Analyze security vulnerabilities with brakeman'
|
52
30
|
task :brakeman do
|
53
|
-
|
31
|
+
`brakeman --exit-on-warn --exit-on-err --format plain --ensure-latest --table-width 999 --force-scan lib --ignore-config .brakeman.ignore`
|
54
32
|
end
|
55
33
|
end
|
56
34
|
|
57
|
-
task default: :
|
35
|
+
task default: :spec
|
data/lib/abbreviato.rb
CHANGED
@@ -1,8 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Abbreviato
|
2
4
|
DEFAULT_OPTIONS = {
|
3
|
-
|
4
|
-
|
5
|
-
|
5
|
+
max_length: 30,
|
6
|
+
tail: '…',
|
7
|
+
fragment: true
|
6
8
|
}.freeze
|
7
9
|
|
8
10
|
# Truncates the source XML string and returns the truncated XML and a boolean flag indicating
|
@@ -22,16 +24,28 @@ module Abbreviato
|
|
22
24
|
# @return [[String] the truncated string, [boolean] whether the string was truncated]
|
23
25
|
def self.truncate(source = '', user_options = {})
|
24
26
|
return [nil, false] if source.nil?
|
27
|
+
|
25
28
|
truncated_sax_document = TruncatedSaxDocument.new(DEFAULT_OPTIONS.merge(user_options))
|
26
29
|
parser = Nokogiri::HTML::SAX::Parser.new(truncated_sax_document)
|
27
30
|
parser.parse(source) { |context| context.replace_entities = false }
|
28
31
|
|
29
32
|
if truncated_sax_document.truncated && user_options[:truncate_incomplete_row]
|
33
|
+
parsed_results = [truncated_sax_document.truncated_string.strip, truncated_sax_document.truncated]
|
34
|
+
|
30
35
|
html_fragment = Nokogiri::HTML.fragment(truncated_sax_document.truncated_string.strip)
|
36
|
+
return parsed_results if html_fragment.nil?
|
37
|
+
|
31
38
|
last_table_in_doc = html_fragment.xpath('.//table').last
|
32
|
-
|
33
|
-
|
34
|
-
|
39
|
+
return parsed_results unless last_table_in_doc
|
40
|
+
|
41
|
+
first_row = last_table_in_doc.xpath('.//tr').first
|
42
|
+
return parsed_results unless first_row
|
43
|
+
|
44
|
+
cols_in_first_row = first_row.xpath('.//td').length
|
45
|
+
return parsed_results unless cols_in_first_row.positive?
|
46
|
+
|
47
|
+
last_table_in_doc.xpath('.//tr').each do |row|
|
48
|
+
row.remove if row.xpath('.//td').length != cols_in_first_row
|
35
49
|
end
|
36
50
|
|
37
51
|
return [html_fragment.to_html, truncated_sax_document.truncated]
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: false
|
2
|
+
|
1
3
|
require 'nokogiri'
|
2
4
|
require 'htmlentities'
|
3
5
|
|
@@ -9,19 +11,20 @@ class TruncatedSaxDocument < Nokogiri::XML::SAX::Document
|
|
9
11
|
VOID_TAGS = %w[area base br col command hr img input keygen link meta param source wbr].freeze
|
10
12
|
|
11
13
|
attr_reader :truncated_string,
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
14
|
+
:max_length,
|
15
|
+
:tail,
|
16
|
+
:ignored_levels,
|
17
|
+
:truncated
|
16
18
|
|
17
|
-
|
19
|
+
# FIXME: Call super to initialize state of the parent class.
|
20
|
+
def initialize(options) # rubocop:disable Lint/MissingSuper
|
18
21
|
@html_coder = HTMLEntities.new
|
19
22
|
|
20
23
|
@max_length = options[:max_length]
|
21
24
|
@tail = options[:tail] || ''
|
22
25
|
@fragment_mode = options[:fragment]
|
23
26
|
|
24
|
-
@truncated_string =
|
27
|
+
@truncated_string = ''
|
25
28
|
@closing_tags = []
|
26
29
|
@estimated_length = 0
|
27
30
|
@ignored_levels = 0
|
@@ -67,13 +70,13 @@ class TruncatedSaxDocument < Nokogiri::XML::SAX::Document
|
|
67
70
|
# Use encoded length, so > counts as 4 bytes, not 1 (which is what '>' would give)
|
68
71
|
encoded_string = @html_coder.encode(decoded_string, :named)
|
69
72
|
string_to_append = if encoded_string.bytesize > remaining_length
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
73
|
+
# This is the line which prevents HTML entities getting truncated - treat them as a single char
|
74
|
+
str = truncate_string(decoded_string)
|
75
|
+
str << tail if remaining_length - str.bytesize >= tail.bytesize
|
76
|
+
str
|
77
|
+
else
|
78
|
+
encoded_string
|
79
|
+
end
|
77
80
|
append_to_truncated_string(string_to_append)
|
78
81
|
end
|
79
82
|
|
@@ -115,7 +118,8 @@ class TruncatedSaxDocument < Nokogiri::XML::SAX::Document
|
|
115
118
|
# Note that any remaining end tags get added automatically (in `end_document`) as the document is closed
|
116
119
|
return if max_length_reached? || ignorable_tag?(name)
|
117
120
|
|
118
|
-
|
121
|
+
# FIXME: Style/GuardClause: Use a guard clause (return if single_tag_element?(name)) instead of wrapping the code inside a conditional expression. (https://rubystyle.guide#no-nested-conditionals)
|
122
|
+
unless single_tag_element?(name) # rubocop:disable Style/GuardClause
|
119
123
|
@closing_tags.pop
|
120
124
|
# Don't count the length when closing a tag - it was accommodated when
|
121
125
|
# the tag was opened
|
@@ -174,7 +178,7 @@ class TruncatedSaxDocument < Nokogiri::XML::SAX::Document
|
|
174
178
|
@truncated = true
|
175
179
|
truncate_length = remaining_length - tail.bytesize
|
176
180
|
truncated_string = ''
|
177
|
-
decoded_string.
|
181
|
+
decoded_string.chars.each do |char|
|
178
182
|
encoded_char = @html_coder.encode(char)
|
179
183
|
break if encoded_char.bytesize > truncate_length
|
180
184
|
|
@@ -206,6 +210,6 @@ class TruncatedSaxDocument < Nokogiri::XML::SAX::Document
|
|
206
210
|
end
|
207
211
|
|
208
212
|
def ignore_mode?
|
209
|
-
@ignored_levels
|
213
|
+
@ignored_levels.positive?
|
210
214
|
end
|
211
215
|
end
|
data/lib/abbreviato/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: abbreviato
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jorge Manrubia
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-06-03 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|
@@ -28,16 +28,16 @@ dependencies:
|
|
28
28
|
name: nokogiri
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
30
30
|
requirements:
|
31
|
-
- -
|
31
|
+
- - '='
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version: 1.10.
|
33
|
+
version: 1.10.10
|
34
34
|
type: :runtime
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
|
-
- -
|
38
|
+
- - '='
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version: 1.10.
|
40
|
+
version: 1.10.10
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: awesome_print
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
@@ -67,7 +67,7 @@ dependencies:
|
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
70
|
+
name: brakeman
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - ">="
|
@@ -81,7 +81,7 @@ dependencies:
|
|
81
81
|
- !ruby/object:Gem::Version
|
82
82
|
version: '0'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
84
|
+
name: bump
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
86
86
|
requirements:
|
87
87
|
- - ">="
|
@@ -95,7 +95,7 @@ dependencies:
|
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
97
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
98
|
+
name: bundler-audit
|
99
99
|
requirement: !ruby/object:Gem::Requirement
|
100
100
|
requirements:
|
101
101
|
- - ">="
|
@@ -109,7 +109,7 @@ dependencies:
|
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: '0'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
|
-
name:
|
112
|
+
name: byebug
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
114
114
|
requirements:
|
115
115
|
- - ">="
|
@@ -123,7 +123,7 @@ dependencies:
|
|
123
123
|
- !ruby/object:Gem::Version
|
124
124
|
version: '0'
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
|
-
name:
|
126
|
+
name: flay
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
128
128
|
requirements:
|
129
129
|
- - ">="
|
@@ -137,7 +137,7 @@ dependencies:
|
|
137
137
|
- !ruby/object:Gem::Version
|
138
138
|
version: '0'
|
139
139
|
- !ruby/object:Gem::Dependency
|
140
|
-
name:
|
140
|
+
name: rake
|
141
141
|
requirement: !ruby/object:Gem::Requirement
|
142
142
|
requirements:
|
143
143
|
- - ">="
|
@@ -151,35 +151,35 @@ dependencies:
|
|
151
151
|
- !ruby/object:Gem::Version
|
152
152
|
version: '0'
|
153
153
|
- !ruby/object:Gem::Dependency
|
154
|
-
name:
|
154
|
+
name: rspec
|
155
155
|
requirement: !ruby/object:Gem::Requirement
|
156
156
|
requirements:
|
157
|
-
- - "
|
157
|
+
- - ">="
|
158
158
|
- !ruby/object:Gem::Version
|
159
|
-
version:
|
159
|
+
version: '0'
|
160
160
|
type: :development
|
161
161
|
prerelease: false
|
162
162
|
version_requirements: !ruby/object:Gem::Requirement
|
163
163
|
requirements:
|
164
|
-
- - "
|
164
|
+
- - ">="
|
165
165
|
- !ruby/object:Gem::Version
|
166
|
-
version:
|
166
|
+
version: '0'
|
167
167
|
- !ruby/object:Gem::Dependency
|
168
|
-
name: rspec
|
168
|
+
name: rspec-benchmark
|
169
169
|
requirement: !ruby/object:Gem::Requirement
|
170
170
|
requirements:
|
171
|
-
- - "
|
171
|
+
- - ">="
|
172
172
|
- !ruby/object:Gem::Version
|
173
|
-
version:
|
173
|
+
version: '0'
|
174
174
|
type: :development
|
175
175
|
prerelease: false
|
176
176
|
version_requirements: !ruby/object:Gem::Requirement
|
177
177
|
requirements:
|
178
|
-
- - "
|
178
|
+
- - ">="
|
179
179
|
- !ruby/object:Gem::Version
|
180
|
-
version:
|
180
|
+
version: '0'
|
181
181
|
- !ruby/object:Gem::Dependency
|
182
|
-
name:
|
182
|
+
name: rubocop
|
183
183
|
requirement: !ruby/object:Gem::Requirement
|
184
184
|
requirements:
|
185
185
|
- - ">="
|
@@ -193,7 +193,7 @@ dependencies:
|
|
193
193
|
- !ruby/object:Gem::Version
|
194
194
|
version: '0'
|
195
195
|
- !ruby/object:Gem::Dependency
|
196
|
-
name: rubocop
|
196
|
+
name: rubocop-rake
|
197
197
|
requirement: !ruby/object:Gem::Requirement
|
198
198
|
requirements:
|
199
199
|
- - ">="
|
@@ -207,7 +207,7 @@ dependencies:
|
|
207
207
|
- !ruby/object:Gem::Version
|
208
208
|
version: '0'
|
209
209
|
- !ruby/object:Gem::Dependency
|
210
|
-
name:
|
210
|
+
name: rubocop-rspec
|
211
211
|
requirement: !ruby/object:Gem::Requirement
|
212
212
|
requirements:
|
213
213
|
- - ">="
|
@@ -247,15 +247,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
247
247
|
requirements:
|
248
248
|
- - ">="
|
249
249
|
- !ruby/object:Gem::Version
|
250
|
-
version:
|
250
|
+
version: 2.6.6
|
251
251
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
252
252
|
requirements:
|
253
253
|
- - ">="
|
254
254
|
- !ruby/object:Gem::Version
|
255
255
|
version: '0'
|
256
256
|
requirements: []
|
257
|
-
|
258
|
-
rubygems_version: 2.6.14.1
|
257
|
+
rubygems_version: 3.2.17
|
259
258
|
signing_key:
|
260
259
|
specification_version: 4
|
261
260
|
summary: A tool for efficiently truncating HTML strings to a specific bytesize
|