abbreviato 0.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 3c6f7e4cd1a30ddde713870703ee0b78bc26f384
4
+ data.tar.gz: 46253968fab63363c865b20c6b53230d7be607e7
5
+ SHA512:
6
+ metadata.gz: 64f8679f4bfd744e013f4ae0a13f9d712b5e7cf0c55f54e6ec15f84e27f0e054048e5bf5cafdd3ffe21ffaf66c9b539767be8ba9ee8aa1a1672c850999aaaac5
7
+ data.tar.gz: d63228f14b686c61066f2075cf5b8f670e7f87276d49024c0444370a0ca2df3fafa2a198425979b447d272f2277cd12a095be876279e788afc4ffe70eba7b412
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Jorge Manrubia
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,61 @@
1
+ # abbreviato
2
+
3
+ *abbreviato* is a Ruby library for truncating HTML strings keeping the markup valid. It is a fork of github.com/jorgemanrubia/truncato but focused on truncating to a bytesize, not on a per-character basis.
4
+
5
+ ## Installing
6
+
7
+ In your `Gemfile`
8
+
9
+ ```ruby
10
+ gem 'abbreviato'
11
+ ```
12
+
13
+ ## Usage
14
+
15
+ ```ruby
16
+ truncated_string, was_truncated = Abbreviato.truncate("<p>some text</p>", max_length: 4) #=> ["<p>s...</p>", true]
17
+ ```
18
+
19
+ The configuration options are:
20
+
21
+ * `max_length`: The size, in bytes, to truncate (`30` by default)
22
+ * `tail`: The string to append when the truncation occurs ('&hellip;' by default).
23
+ # `fragment`: Indicates whether the document to be truncated is an HTML fragment
24
+ # or an entire document (with `HTML`, `HEAD` & `BODY` tags). Setting to true prevents automatic
25
+ # addition of these tags if they are missing. Defaults to `true`.
26
+
27
+
28
+ ## Performance
29
+
30
+ Abbreviato was designed with performance in mind. Its main motivation was that existing libs couldn't truncate a multiple-MB document into a few-KB one in a reasonable time. It uses the [Nokogiri](http://nokogiri.org/) SAX parser.
31
+
32
+ ## Running the tests
33
+
34
+ ```ruby
35
+ bundle exec rake
36
+ ```
37
+
38
+ ## Running all checks
39
+
40
+ ```ruby
41
+ bundle exec wwtd
42
+ ```
43
+
44
+ ## Updating
45
+
46
+ Update the version
47
+ ```ruby
48
+ bundle exec bump patch
49
+ ```
50
+
51
+ Build
52
+ ```ruby
53
+ gem build abbreviato.gemspec
54
+
55
+ Publish
56
+ ```ruby
57
+ gem push abbreviato-x.y.z.gem
58
+ ```
59
+
60
+
61
+
data/Rakefile ADDED
@@ -0,0 +1,17 @@
1
+ require 'wwtd/tasks'
2
+ require 'bundler/gem_tasks'
3
+ require 'bump/tasks'
4
+ require 'bundler/setup'
5
+
6
+ Bundler::GemHelper.install_tasks
7
+
8
+ require 'rspec/core/rake_task'
9
+ RSpec::Core::RakeTask.new(:spec)
10
+
11
+ require 'rubocop/rake_task'
12
+ RuboCop::RakeTask.new
13
+
14
+ require 'bundler/audit/task'
15
+ Bundler::Audit::Task.new
16
+
17
+ task default: :wwtd
data/lib/abbreviato.rb ADDED
@@ -0,0 +1,3 @@
1
+ Dir[File.dirname(__FILE__) + '/abbreviato/**/*.rb'].each do |file|
2
+ require file
3
+ end
@@ -0,0 +1,27 @@
1
+ module Abbreviato
2
+ DEFAULT_OPTIONS = {
3
+ max_length: 30,
4
+ tail: '&hellip;',
5
+ fragment: true
6
+ }.freeze
7
+
8
+ # Truncates the source XML string and returns the truncated XML and a boolean flag indicating
9
+ # whether any truncation took place. It will keep a valid XML structure
10
+ # and insert a _tail_ text indicating the position where content was removed (...).
11
+ #
12
+ # @param [String] source the XML source to truncate
13
+ # @param [Hash] user_options truncation options
14
+ # @option user_options [Integer] :max_length Maximum length
15
+ # @option user_options [String] :tail Text to append when the truncation happens
16
+ # @option user_options [Boolean] :fragment Indicates whether the document to be truncated is an HTML fragment
17
+ # or an entire document (with `HTML`, `HEAD` & `BODY` tags). Setting to true prevents automatic addition of
18
+ # these tags if they are missing. Defaults to `true`.
19
+ # @return [[String] the truncated string, [boolean] whether the string was truncated]
20
+ def self.truncate(source = '', user_options = {})
21
+ return [nil, false] if source.nil?
22
+ truncated_sax_document = TruncatedSaxDocument.new(DEFAULT_OPTIONS.merge(user_options))
23
+ parser = Nokogiri::HTML::SAX::Parser.new(truncated_sax_document)
24
+ parser.parse(source) { |context| context.replace_entities = false }
25
+ [truncated_sax_document.truncated_string, truncated_sax_document.truncated]
26
+ end
27
+ end
@@ -0,0 +1,203 @@
1
+ require 'nokogiri'
2
+ require 'htmlentities'
3
+
4
+ class TruncatedSaxDocument < Nokogiri::XML::SAX::Document
5
+ IGNORABLE_TAGS = %w[html head body].freeze
6
+
7
+ # These don't have to be closed (which also impacts ongoing length calculations)
8
+ # http://www.456bereastreet.com/archive/201005/void_empty_elements_and_self-closing_start_tags_in_html/
9
+ VOID_TAGS = %w[area base br col command hr img input keygen link meta param source wbr].freeze
10
+
11
+ attr_reader :truncated_string,
12
+ :max_length,
13
+ :tail,
14
+ :ignored_levels,
15
+ :truncated
16
+
17
+ def initialize(options)
18
+ @html_coder = HTMLEntities.new
19
+
20
+ @max_length = options[:max_length]
21
+ @tail = options[:tail] || ''
22
+ @fragment_mode = options[:fragment]
23
+
24
+ @truncated_string = ""
25
+ @closing_tags = []
26
+ @estimated_length = 0
27
+ @ignored_levels = 0
28
+ @truncated = false
29
+ end
30
+
31
+ # This method is called when the parser encounters an open tag
32
+ def start_element(name, attributes)
33
+ if max_length_reached? || ignorable_tag?(name)
34
+ @truncated = true if max_length_reached?
35
+ return
36
+ end
37
+
38
+ # If already in ignore mode, go in deeper
39
+ if ignore_mode?
40
+ enter_ignored_level(name)
41
+ return
42
+ end
43
+
44
+ string_to_add = opening_tag(name, attributes)
45
+
46
+ # Abort if there is not enough space to add the combined opening tag and (potentially) the closing tag
47
+ length_of_tags = overridden_tag_length(name, string_to_add)
48
+ if length_of_tags > remaining_length
49
+ @truncated = true
50
+ enter_ignored_level(name)
51
+ return
52
+ end
53
+
54
+ # Save the tag so we can push it on at the end
55
+ @closing_tags.push name unless single_tag_element?(name)
56
+
57
+ append_to_truncated_string(string_to_add, length_of_tags)
58
+ end
59
+
60
+ # This method is called when the parser encounters characters between tags
61
+ def characters(decoded_string)
62
+ if max_length_reached? || ignore_mode?
63
+ @truncated = true
64
+ return
65
+ end
66
+
67
+ # Use encoded length, so &gt; counts as 4 bytes, not 1 (which is what '>' would give)
68
+ encoded_string = @html_coder.encode(decoded_string, :named)
69
+ string_to_append = if encoded_string.bytesize > remaining_length
70
+ # This is the line which prevents HTML entities getting truncated - treat them as a single char
71
+ str = @html_coder.encode(truncate_string(decoded_string), :named) || ''
72
+ str << tail if remaining_length - str.bytesize >= tail.bytesize
73
+ str
74
+ else
75
+ encoded_string
76
+ end
77
+ append_to_truncated_string(string_to_append)
78
+ end
79
+
80
+ # This method is called when the parser encounters a comment
81
+ def comment(string)
82
+ comment = comment_tag(string)
83
+ if comment.bytesize <= remaining_length
84
+ append_to_truncated_string(comment)
85
+ else
86
+ @truncated = true
87
+ end
88
+ end
89
+
90
+ # This method is called when the parser encounters cdata. In practice, this also
91
+ # gets called for this style of comment inside an element:
92
+ #
93
+ # <style><!--
94
+ # /* Font Definitions */
95
+ # @font-face
96
+ # {font-family:Wingdings;
97
+ # panose-1:5 0 0 0 0 0 0 0 0 0;}
98
+ # --></style>
99
+ #
100
+ def cdata_block(string)
101
+ if string.bytesize <= remaining_length
102
+ append_to_truncated_string(string)
103
+ else
104
+ @truncated = true
105
+ end
106
+ end
107
+
108
+ # This method is called when the parser encounters a closing tag
109
+ def end_element(name)
110
+ if ignore_mode?
111
+ exit_ignored_level(name)
112
+ return
113
+ end
114
+
115
+ # Note that any remaining end tags get added automatically (in `end_document`) as the document is closed
116
+ return if max_length_reached? || ignorable_tag?(name)
117
+
118
+ unless single_tag_element?(name)
119
+ @closing_tags.pop
120
+ # Don't count the length when closing a tag - it was accommodated when
121
+ # the tag was opened
122
+ append_to_truncated_string(closing_tag(name), 0)
123
+ end
124
+ end
125
+
126
+ def end_document
127
+ @closing_tags.reverse_each { |name| append_to_truncated_string(closing_tag(name), 0) }
128
+ end
129
+
130
+ private
131
+
132
+ def opening_tag(name, attributes)
133
+ attributes_string = attributes_to_string(attributes)
134
+ if single_tag_element? name
135
+ "<#{name}#{attributes_string}/>"
136
+ else
137
+ "<#{name}#{attributes_string}>"
138
+ end
139
+ end
140
+
141
+ def comment_tag(comment)
142
+ "<!--#{comment}-->"
143
+ end
144
+
145
+ def closing_tag(name)
146
+ "</#{name}>"
147
+ end
148
+
149
+ def remaining_length
150
+ max_length - @estimated_length
151
+ end
152
+
153
+ def single_tag_element?(name)
154
+ VOID_TAGS.include? name
155
+ end
156
+
157
+ def append_to_truncated_string(string, overridden_length = nil)
158
+ @truncated_string << string
159
+ @estimated_length += (overridden_length || string.bytesize)
160
+ end
161
+
162
+ def attributes_to_string(attributes)
163
+ attributes.inject(' ') do |string, attribute|
164
+ key, value = attribute
165
+ string << "#{key}='#{@html_coder.encode value}' "
166
+ end.rstrip
167
+ end
168
+
169
+ def max_length_reached?
170
+ @estimated_length >= max_length
171
+ end
172
+
173
+ def truncate_string(string)
174
+ @truncated = true
175
+ truncate_length = remaining_length - tail.bytesize
176
+ (string.byteslice(0, truncate_length) || '').scrub('')
177
+ end
178
+
179
+ def overridden_tag_length(tag_name, rendered_tag_with_attributes)
180
+ # Start with the opening tag
181
+ length = rendered_tag_with_attributes.bytesize
182
+
183
+ # Add on closing tag if necessary
184
+ length += closing_tag(tag_name).bytesize unless single_tag_element?(tag_name)
185
+ length
186
+ end
187
+
188
+ def ignorable_tag?(name)
189
+ @fragment_mode && IGNORABLE_TAGS.include?(name.downcase)
190
+ end
191
+
192
+ def enter_ignored_level(name)
193
+ @ignored_levels += 1 unless single_tag_element?(name)
194
+ end
195
+
196
+ def exit_ignored_level(name)
197
+ @ignored_levels -= 1 unless single_tag_element?(name)
198
+ end
199
+
200
+ def ignore_mode?
201
+ @ignored_levels > 0
202
+ end
203
+ end
@@ -0,0 +1,3 @@
1
+ module Abbreviato
2
+ VERSION = '0.8.1'.freeze
3
+ end
metadata ADDED
@@ -0,0 +1,262 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: abbreviato
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.8.1
5
+ platform: ruby
6
+ authors:
7
+ - Jorge Manrubia
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-09-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 1.6.8
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 1.6.8
27
+ - !ruby/object:Gem::Dependency
28
+ name: htmlentities
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 4.3.4
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 4.3.4
41
+ - !ruby/object:Gem::Dependency
42
+ name: awesome_print
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: benchmark-memory
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: brakecheck
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: brakeman
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: bump
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: bundler-audit
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: byebug
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: flay
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ - !ruby/object:Gem::Dependency
154
+ name: rake
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: 11.3.0
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: 11.3.0
167
+ - !ruby/object:Gem::Dependency
168
+ name: rspec
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - "~>"
172
+ - !ruby/object:Gem::Version
173
+ version: 3.5.0
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - "~>"
179
+ - !ruby/object:Gem::Version
180
+ version: 3.5.0
181
+ - !ruby/object:Gem::Dependency
182
+ name: rspec-benchmark
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - ">="
186
+ - !ruby/object:Gem::Version
187
+ version: '0'
188
+ type: :development
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - ">="
193
+ - !ruby/object:Gem::Version
194
+ version: '0'
195
+ - !ruby/object:Gem::Dependency
196
+ name: rubocop
197
+ requirement: !ruby/object:Gem::Requirement
198
+ requirements:
199
+ - - ">="
200
+ - !ruby/object:Gem::Version
201
+ version: '0'
202
+ type: :development
203
+ prerelease: false
204
+ version_requirements: !ruby/object:Gem::Requirement
205
+ requirements:
206
+ - - ">="
207
+ - !ruby/object:Gem::Version
208
+ version: '0'
209
+ - !ruby/object:Gem::Dependency
210
+ name: wwtd
211
+ requirement: !ruby/object:Gem::Requirement
212
+ requirements:
213
+ - - ">="
214
+ - !ruby/object:Gem::Version
215
+ version: '0'
216
+ type: :development
217
+ prerelease: false
218
+ version_requirements: !ruby/object:Gem::Requirement
219
+ requirements:
220
+ - - ">="
221
+ - !ruby/object:Gem::Version
222
+ version: '0'
223
+ description: Truncate HTML to a specific bytesize, while keeping valid markup
224
+ email: jorge.manrubia@gmail.com
225
+ executables: []
226
+ extensions: []
227
+ extra_rdoc_files:
228
+ - LICENSE.txt
229
+ - README.md
230
+ files:
231
+ - LICENSE.txt
232
+ - README.md
233
+ - Rakefile
234
+ - lib/abbreviato.rb
235
+ - lib/abbreviato/abbreviato.rb
236
+ - lib/abbreviato/truncated_sax_document.rb
237
+ - lib/abbreviato/version.rb
238
+ homepage: https://github.com/zendesk/abbreviato
239
+ licenses:
240
+ - MIT
241
+ metadata: {}
242
+ post_install_message:
243
+ rdoc_options: []
244
+ require_paths:
245
+ - lib
246
+ required_ruby_version: !ruby/object:Gem::Requirement
247
+ requirements:
248
+ - - ">="
249
+ - !ruby/object:Gem::Version
250
+ version: '0'
251
+ required_rubygems_version: !ruby/object:Gem::Requirement
252
+ requirements:
253
+ - - ">="
254
+ - !ruby/object:Gem::Version
255
+ version: '0'
256
+ requirements: []
257
+ rubyforge_project:
258
+ rubygems_version: 2.5.1
259
+ signing_key:
260
+ specification_version: 4
261
+ summary: A tool for efficiently truncating HTML strings to a specific bytesize
262
+ test_files: []