abbreviato 0.10.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/README.md +3 -2
- data/Rakefile +11 -28
- data/lib/abbreviato/abbreviato.rb +20 -6
- data/lib/abbreviato/truncated_sax_document.rb +20 -16
- data/lib/abbreviato/version.rb +3 -1
- data/lib/abbreviato.rb +3 -1
- metadata +7 -190
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 7f7e5ca577a1ab3a8e11f9b84e927b6b6df2ec44c29646c14238e16c9181e8b7
|
4
|
+
data.tar.gz: b1110b4f1b2fa1cb2f937aa5ebd7348b1bfb28ebddd998fa76499cb077e460d5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 778060cedfeb4842872ffd89f1d11d43e8d07002ceef98019d6e9a7138614ae5d2e76f982d10d00c9cd334c56329e97f5395527db34c7be13a844c7a1f6cf8e7
|
7
|
+
data.tar.gz: fc8dc066a6bc6f42f37d8dbf479ef91d44519a4058298fcc9220f71b0c4e3d854ff946ff2baea705dc92fc2ffee13284d6542b25a7a02185ef93bac9885c1cbc
|
data/README.md
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
# abbreviato
|
2
2
|
|
3
|
-
[![
|
3
|
+
[![Continuous Integration](https://github.com/zendesk/abbreviato/actions/workflows/actions.yml/badge.svg)](https://github.com/zendesk/abbreviato/actions/workflows/actions.yml)
|
4
|
+
[![Security](https://github.com/zendesk/abbreviato/actions/workflows/security.yml/badge.svg)](https://github.com/zendesk/abbreviato/actions/workflows/security.yml)
|
4
5
|
[![Gem Version](https://img.shields.io/gem/v/abbreviato.svg)](https://rubygems.org/gems/abbreviato)
|
5
6
|
|
6
7
|
*abbreviato* is a Ruby library for truncating HTML strings keeping the markup valid. It is a fork of [jorgemanrubia/truncato](https://github.com/jorgemanrubia/truncato) but focused on truncating to a bytesize, not on a per-character basis.
|
@@ -47,7 +48,7 @@ rspec spec/abbreviato/abbreviato_spec.rb:357
|
|
47
48
|
## Running all checks
|
48
49
|
|
49
50
|
```ruby
|
50
|
-
bundle exec
|
51
|
+
bundle exec rake spec
|
51
52
|
```
|
52
53
|
|
53
54
|
## Contribute
|
data/Rakefile
CHANGED
@@ -1,28 +1,21 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'bundler/setup'
|
4
|
-
require 'wwtd/tasks'
|
5
4
|
require 'bundler/gem_tasks'
|
6
|
-
require 'bump/tasks'
|
7
5
|
|
8
6
|
Bundler::GemHelper.install_tasks
|
9
7
|
|
8
|
+
# Pushing to rubygems is handled by a github workflow
|
9
|
+
ENV['gem_push'] = 'false'
|
10
|
+
|
10
11
|
require 'rspec/core/rake_task'
|
11
12
|
RSpec::Core::RakeTask.new(:spec)
|
12
13
|
|
13
|
-
if %w[development test].include?(ENV[
|
14
|
-
def run_command(command)
|
15
|
-
result = `#{command}`
|
16
|
-
result.force_encoding('binary')
|
17
|
-
raise "Command #{command} failed: #{result}" unless $?.success?
|
18
|
-
|
19
|
-
result
|
20
|
-
end
|
21
|
-
|
14
|
+
if %w[development test].include?(ENV['RAILS_ENV'] ||= 'development')
|
22
15
|
require 'bundler/audit/task'
|
23
16
|
Bundler::Audit::Task.new
|
24
17
|
|
25
|
-
desc
|
18
|
+
desc 'Analyze for code duplication (large, identical syntax trees) with fuzzy matching.'
|
26
19
|
task :flay do
|
27
20
|
require 'flay'
|
28
21
|
flay = Flay.run(%w[bin config lib script])
|
@@ -35,23 +28,13 @@ if %w[development test].include?(ENV["RAILS_ENV"] ||= 'development')
|
|
35
28
|
require 'rubocop/rake_task'
|
36
29
|
RuboCop::RakeTask.new
|
37
30
|
|
38
|
-
|
39
|
-
puts "Running brakecheck..."
|
40
|
-
%w[brakecheck brakeman bundler-audit flay rubocop].each do |gem_name|
|
41
|
-
result = `brakecheck #{gem_name}`
|
42
|
-
result.force_encoding('binary')
|
43
|
-
if $?.success?
|
44
|
-
puts "✔ #{gem_name}"
|
45
|
-
else
|
46
|
-
raise "✘ #{gem_name}'s brakecheck failed: #{result}"
|
47
|
-
end
|
48
|
-
end
|
49
|
-
true
|
50
|
-
end
|
51
|
-
|
31
|
+
desc 'Analyze security vulnerabilities with brakeman'
|
52
32
|
task :brakeman do
|
53
|
-
|
33
|
+
`brakeman --exit-on-warn --exit-on-err --format plain --ensure-latest --table-width 999 --force-scan lib --ignore-config .brakeman.ignore`
|
54
34
|
end
|
35
|
+
|
36
|
+
desc 'Run all linters'
|
37
|
+
task lint: %w[rubocop flay brakeman]
|
55
38
|
end
|
56
39
|
|
57
|
-
task default: :
|
40
|
+
task default: :spec
|
@@ -1,8 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Abbreviato
|
2
4
|
DEFAULT_OPTIONS = {
|
3
|
-
|
4
|
-
|
5
|
-
|
5
|
+
max_length: 30,
|
6
|
+
tail: '…',
|
7
|
+
fragment: true
|
6
8
|
}.freeze
|
7
9
|
|
8
10
|
# Truncates the source XML string and returns the truncated XML and a boolean flag indicating
|
@@ -22,16 +24,28 @@ module Abbreviato
|
|
22
24
|
# @return [[String] the truncated string, [boolean] whether the string was truncated]
|
23
25
|
def self.truncate(source = '', user_options = {})
|
24
26
|
return [nil, false] if source.nil?
|
27
|
+
|
25
28
|
truncated_sax_document = TruncatedSaxDocument.new(DEFAULT_OPTIONS.merge(user_options))
|
26
29
|
parser = Nokogiri::HTML::SAX::Parser.new(truncated_sax_document)
|
27
30
|
parser.parse(source) { |context| context.replace_entities = false }
|
28
31
|
|
29
32
|
if truncated_sax_document.truncated && user_options[:truncate_incomplete_row]
|
33
|
+
parsed_results = [truncated_sax_document.truncated_string.strip, truncated_sax_document.truncated]
|
34
|
+
|
30
35
|
html_fragment = Nokogiri::HTML.fragment(truncated_sax_document.truncated_string.strip)
|
36
|
+
return parsed_results if html_fragment.nil?
|
37
|
+
|
31
38
|
last_table_in_doc = html_fragment.xpath('.//table').last
|
32
|
-
|
33
|
-
|
34
|
-
|
39
|
+
return parsed_results unless last_table_in_doc
|
40
|
+
|
41
|
+
first_row = last_table_in_doc.xpath('.//tr').first
|
42
|
+
return parsed_results unless first_row
|
43
|
+
|
44
|
+
cols_in_first_row = first_row.xpath('.//td').length
|
45
|
+
return parsed_results unless cols_in_first_row.positive?
|
46
|
+
|
47
|
+
last_table_in_doc.xpath('.//tr').each do |row|
|
48
|
+
row.remove if row.xpath('.//td').length != cols_in_first_row
|
35
49
|
end
|
36
50
|
|
37
51
|
return [html_fragment.to_html, truncated_sax_document.truncated]
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: false
|
2
|
+
|
1
3
|
require 'nokogiri'
|
2
4
|
require 'htmlentities'
|
3
5
|
|
@@ -9,19 +11,20 @@ class TruncatedSaxDocument < Nokogiri::XML::SAX::Document
|
|
9
11
|
VOID_TAGS = %w[area base br col command hr img input keygen link meta param source wbr].freeze
|
10
12
|
|
11
13
|
attr_reader :truncated_string,
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
14
|
+
:max_length,
|
15
|
+
:tail,
|
16
|
+
:ignored_levels,
|
17
|
+
:truncated
|
16
18
|
|
17
|
-
|
19
|
+
# FIXME: Call super to initialize state of the parent class.
|
20
|
+
def initialize(options) # rubocop:disable Lint/MissingSuper
|
18
21
|
@html_coder = HTMLEntities.new
|
19
22
|
|
20
23
|
@max_length = options[:max_length]
|
21
24
|
@tail = options[:tail] || ''
|
22
25
|
@fragment_mode = options[:fragment]
|
23
26
|
|
24
|
-
@truncated_string =
|
27
|
+
@truncated_string = ''
|
25
28
|
@closing_tags = []
|
26
29
|
@estimated_length = 0
|
27
30
|
@ignored_levels = 0
|
@@ -67,13 +70,13 @@ class TruncatedSaxDocument < Nokogiri::XML::SAX::Document
|
|
67
70
|
# Use encoded length, so > counts as 4 bytes, not 1 (which is what '>' would give)
|
68
71
|
encoded_string = @html_coder.encode(decoded_string, :named)
|
69
72
|
string_to_append = if encoded_string.bytesize > remaining_length
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
73
|
+
# This is the line which prevents HTML entities getting truncated - treat them as a single char
|
74
|
+
str = truncate_string(decoded_string)
|
75
|
+
str << tail if remaining_length - str.bytesize >= tail.bytesize
|
76
|
+
str
|
77
|
+
else
|
78
|
+
encoded_string
|
79
|
+
end
|
77
80
|
append_to_truncated_string(string_to_append)
|
78
81
|
end
|
79
82
|
|
@@ -115,7 +118,8 @@ class TruncatedSaxDocument < Nokogiri::XML::SAX::Document
|
|
115
118
|
# Note that any remaining end tags get added automatically (in `end_document`) as the document is closed
|
116
119
|
return if max_length_reached? || ignorable_tag?(name)
|
117
120
|
|
118
|
-
|
121
|
+
# FIXME: Style/GuardClause: Use a guard clause (return if single_tag_element?(name)) instead of wrapping the code inside a conditional expression. (https://rubystyle.guide#no-nested-conditionals)
|
122
|
+
unless single_tag_element?(name) # rubocop:disable Style/GuardClause
|
119
123
|
@closing_tags.pop
|
120
124
|
# Don't count the length when closing a tag - it was accommodated when
|
121
125
|
# the tag was opened
|
@@ -174,7 +178,7 @@ class TruncatedSaxDocument < Nokogiri::XML::SAX::Document
|
|
174
178
|
@truncated = true
|
175
179
|
truncate_length = remaining_length - tail.bytesize
|
176
180
|
truncated_string = ''
|
177
|
-
decoded_string.
|
181
|
+
decoded_string.chars.each do |char|
|
178
182
|
encoded_char = @html_coder.encode(char)
|
179
183
|
break if encoded_char.bytesize > truncate_length
|
180
184
|
|
@@ -206,6 +210,6 @@ class TruncatedSaxDocument < Nokogiri::XML::SAX::Document
|
|
206
210
|
end
|
207
211
|
|
208
212
|
def ignore_mode?
|
209
|
-
@ignored_levels
|
213
|
+
@ignored_levels.positive?
|
210
214
|
end
|
211
215
|
end
|
data/lib/abbreviato/version.rb
CHANGED
data/lib/abbreviato.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: abbreviato
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jorge Manrubia
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-07-21 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|
@@ -26,200 +26,18 @@ dependencies:
|
|
26
26
|
version: 4.3.4
|
27
27
|
- !ruby/object:Gem::Dependency
|
28
28
|
name: nokogiri
|
29
|
-
requirement: !ruby/object:Gem::Requirement
|
30
|
-
requirements:
|
31
|
-
- - ">="
|
32
|
-
- !ruby/object:Gem::Version
|
33
|
-
version: 1.10.8
|
34
|
-
type: :runtime
|
35
|
-
prerelease: false
|
36
|
-
version_requirements: !ruby/object:Gem::Requirement
|
37
|
-
requirements:
|
38
|
-
- - ">="
|
39
|
-
- !ruby/object:Gem::Version
|
40
|
-
version: 1.10.8
|
41
|
-
- !ruby/object:Gem::Dependency
|
42
|
-
name: awesome_print
|
43
|
-
requirement: !ruby/object:Gem::Requirement
|
44
|
-
requirements:
|
45
|
-
- - ">="
|
46
|
-
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
48
|
-
type: :development
|
49
|
-
prerelease: false
|
50
|
-
version_requirements: !ruby/object:Gem::Requirement
|
51
|
-
requirements:
|
52
|
-
- - ">="
|
53
|
-
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
55
|
-
- !ruby/object:Gem::Dependency
|
56
|
-
name: benchmark-memory
|
57
|
-
requirement: !ruby/object:Gem::Requirement
|
58
|
-
requirements:
|
59
|
-
- - ">="
|
60
|
-
- !ruby/object:Gem::Version
|
61
|
-
version: '0'
|
62
|
-
type: :development
|
63
|
-
prerelease: false
|
64
|
-
version_requirements: !ruby/object:Gem::Requirement
|
65
|
-
requirements:
|
66
|
-
- - ">="
|
67
|
-
- !ruby/object:Gem::Version
|
68
|
-
version: '0'
|
69
|
-
- !ruby/object:Gem::Dependency
|
70
|
-
name: brakecheck
|
71
|
-
requirement: !ruby/object:Gem::Requirement
|
72
|
-
requirements:
|
73
|
-
- - ">="
|
74
|
-
- !ruby/object:Gem::Version
|
75
|
-
version: '0'
|
76
|
-
type: :development
|
77
|
-
prerelease: false
|
78
|
-
version_requirements: !ruby/object:Gem::Requirement
|
79
|
-
requirements:
|
80
|
-
- - ">="
|
81
|
-
- !ruby/object:Gem::Version
|
82
|
-
version: '0'
|
83
|
-
- !ruby/object:Gem::Dependency
|
84
|
-
name: brakeman
|
85
|
-
requirement: !ruby/object:Gem::Requirement
|
86
|
-
requirements:
|
87
|
-
- - ">="
|
88
|
-
- !ruby/object:Gem::Version
|
89
|
-
version: '0'
|
90
|
-
type: :development
|
91
|
-
prerelease: false
|
92
|
-
version_requirements: !ruby/object:Gem::Requirement
|
93
|
-
requirements:
|
94
|
-
- - ">="
|
95
|
-
- !ruby/object:Gem::Version
|
96
|
-
version: '0'
|
97
|
-
- !ruby/object:Gem::Dependency
|
98
|
-
name: bump
|
99
|
-
requirement: !ruby/object:Gem::Requirement
|
100
|
-
requirements:
|
101
|
-
- - ">="
|
102
|
-
- !ruby/object:Gem::Version
|
103
|
-
version: '0'
|
104
|
-
type: :development
|
105
|
-
prerelease: false
|
106
|
-
version_requirements: !ruby/object:Gem::Requirement
|
107
|
-
requirements:
|
108
|
-
- - ">="
|
109
|
-
- !ruby/object:Gem::Version
|
110
|
-
version: '0'
|
111
|
-
- !ruby/object:Gem::Dependency
|
112
|
-
name: bundler-audit
|
113
|
-
requirement: !ruby/object:Gem::Requirement
|
114
|
-
requirements:
|
115
|
-
- - ">="
|
116
|
-
- !ruby/object:Gem::Version
|
117
|
-
version: '0'
|
118
|
-
type: :development
|
119
|
-
prerelease: false
|
120
|
-
version_requirements: !ruby/object:Gem::Requirement
|
121
|
-
requirements:
|
122
|
-
- - ">="
|
123
|
-
- !ruby/object:Gem::Version
|
124
|
-
version: '0'
|
125
|
-
- !ruby/object:Gem::Dependency
|
126
|
-
name: byebug
|
127
|
-
requirement: !ruby/object:Gem::Requirement
|
128
|
-
requirements:
|
129
|
-
- - ">="
|
130
|
-
- !ruby/object:Gem::Version
|
131
|
-
version: '0'
|
132
|
-
type: :development
|
133
|
-
prerelease: false
|
134
|
-
version_requirements: !ruby/object:Gem::Requirement
|
135
|
-
requirements:
|
136
|
-
- - ">="
|
137
|
-
- !ruby/object:Gem::Version
|
138
|
-
version: '0'
|
139
|
-
- !ruby/object:Gem::Dependency
|
140
|
-
name: flay
|
141
|
-
requirement: !ruby/object:Gem::Requirement
|
142
|
-
requirements:
|
143
|
-
- - ">="
|
144
|
-
- !ruby/object:Gem::Version
|
145
|
-
version: '0'
|
146
|
-
type: :development
|
147
|
-
prerelease: false
|
148
|
-
version_requirements: !ruby/object:Gem::Requirement
|
149
|
-
requirements:
|
150
|
-
- - ">="
|
151
|
-
- !ruby/object:Gem::Version
|
152
|
-
version: '0'
|
153
|
-
- !ruby/object:Gem::Dependency
|
154
|
-
name: rake
|
155
|
-
requirement: !ruby/object:Gem::Requirement
|
156
|
-
requirements:
|
157
|
-
- - "~>"
|
158
|
-
- !ruby/object:Gem::Version
|
159
|
-
version: 13.0.1
|
160
|
-
type: :development
|
161
|
-
prerelease: false
|
162
|
-
version_requirements: !ruby/object:Gem::Requirement
|
163
|
-
requirements:
|
164
|
-
- - "~>"
|
165
|
-
- !ruby/object:Gem::Version
|
166
|
-
version: 13.0.1
|
167
|
-
- !ruby/object:Gem::Dependency
|
168
|
-
name: rspec
|
169
29
|
requirement: !ruby/object:Gem::Requirement
|
170
30
|
requirements:
|
171
31
|
- - "~>"
|
172
32
|
- !ruby/object:Gem::Version
|
173
|
-
version:
|
174
|
-
type: :
|
33
|
+
version: 1.15.0
|
34
|
+
type: :runtime
|
175
35
|
prerelease: false
|
176
36
|
version_requirements: !ruby/object:Gem::Requirement
|
177
37
|
requirements:
|
178
38
|
- - "~>"
|
179
39
|
- !ruby/object:Gem::Version
|
180
|
-
version:
|
181
|
-
- !ruby/object:Gem::Dependency
|
182
|
-
name: rspec-benchmark
|
183
|
-
requirement: !ruby/object:Gem::Requirement
|
184
|
-
requirements:
|
185
|
-
- - ">="
|
186
|
-
- !ruby/object:Gem::Version
|
187
|
-
version: '0'
|
188
|
-
type: :development
|
189
|
-
prerelease: false
|
190
|
-
version_requirements: !ruby/object:Gem::Requirement
|
191
|
-
requirements:
|
192
|
-
- - ">="
|
193
|
-
- !ruby/object:Gem::Version
|
194
|
-
version: '0'
|
195
|
-
- !ruby/object:Gem::Dependency
|
196
|
-
name: rubocop
|
197
|
-
requirement: !ruby/object:Gem::Requirement
|
198
|
-
requirements:
|
199
|
-
- - ">="
|
200
|
-
- !ruby/object:Gem::Version
|
201
|
-
version: '0'
|
202
|
-
type: :development
|
203
|
-
prerelease: false
|
204
|
-
version_requirements: !ruby/object:Gem::Requirement
|
205
|
-
requirements:
|
206
|
-
- - ">="
|
207
|
-
- !ruby/object:Gem::Version
|
208
|
-
version: '0'
|
209
|
-
- !ruby/object:Gem::Dependency
|
210
|
-
name: wwtd
|
211
|
-
requirement: !ruby/object:Gem::Requirement
|
212
|
-
requirements:
|
213
|
-
- - ">="
|
214
|
-
- !ruby/object:Gem::Version
|
215
|
-
version: '0'
|
216
|
-
type: :development
|
217
|
-
prerelease: false
|
218
|
-
version_requirements: !ruby/object:Gem::Requirement
|
219
|
-
requirements:
|
220
|
-
- - ">="
|
221
|
-
- !ruby/object:Gem::Version
|
222
|
-
version: '0'
|
40
|
+
version: 1.15.0
|
223
41
|
description: Truncate HTML to a specific bytesize, while keeping valid markup
|
224
42
|
email: jorge.manrubia@gmail.com
|
225
43
|
executables: []
|
@@ -247,15 +65,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
247
65
|
requirements:
|
248
66
|
- - ">="
|
249
67
|
- !ruby/object:Gem::Version
|
250
|
-
version: '
|
68
|
+
version: '2.7'
|
251
69
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
252
70
|
requirements:
|
253
71
|
- - ">="
|
254
72
|
- !ruby/object:Gem::Version
|
255
73
|
version: '0'
|
256
74
|
requirements: []
|
257
|
-
|
258
|
-
rubygems_version: 2.6.14.1
|
75
|
+
rubygems_version: 3.0.3.1
|
259
76
|
signing_key:
|
260
77
|
specification_version: 4
|
261
78
|
summary: A tool for efficiently truncating HTML strings to a specific bytesize
|