abbreviato 0.8.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 3c6f7e4cd1a30ddde713870703ee0b78bc26f384
4
+ data.tar.gz: 46253968fab63363c865b20c6b53230d7be607e7
5
+ SHA512:
6
+ metadata.gz: 64f8679f4bfd744e013f4ae0a13f9d712b5e7cf0c55f54e6ec15f84e27f0e054048e5bf5cafdd3ffe21ffaf66c9b539767be8ba9ee8aa1a1672c850999aaaac5
7
+ data.tar.gz: d63228f14b686c61066f2075cf5b8f670e7f87276d49024c0444370a0ca2df3fafa2a198425979b447d272f2277cd12a095be876279e788afc4ffe70eba7b412
data/LICENSE.txt ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2011 Jorge Manrubia
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,61 @@
1
+ # abbreviato
2
+
3
+ *abbreviato* is a Ruby library for truncating HTML strings keeping the markup valid. It is a fork of github.com/jorgemanrubia/truncato but focused on truncating to a bytesize, not on a per-character basis.
4
+
5
+ ## Installing
6
+
7
+ In your `Gemfile`
8
+
9
+ ```ruby
10
+ gem 'abbreviato'
11
+ ```
12
+
13
+ ## Usage
14
+
15
+ ```ruby
16
+ truncated_string, was_truncated = Abbreviato.truncate("<p>some text</p>", max_length: 4) #=> ["<p>s...</p>", true]
17
+ ```
18
+
19
+ The configuration options are:
20
+
21
+ * `max_length`: The size, in bytes, to truncate (`30` by default)
22
+ * `tail`: The string to append when the truncation occurs ('&hellip;' by default).
23
+ # `fragment`: Indicates whether the document to be truncated is an HTML fragment
24
+ # or an entire document (with `HTML`, `HEAD` & `BODY` tags). Setting to true prevents automatic
25
+ # addition of these tags if they are missing. Defaults to `true`.
26
+
27
+
28
+ ## Performance
29
+
30
+ Abbreviato was designed with performance in mind. Its main motivation was that existing libs couldn't truncate a multiple-MB document into a few-KB one in a reasonable time. It uses the [Nokogiri](http://nokogiri.org/) SAX parser.
31
+
32
+ ## Running the tests
33
+
34
+ ```ruby
35
+ bundle exec rake
36
+ ```
37
+
38
+ ## Running all checks
39
+
40
+ ```ruby
41
+ bundle exec wwtd
42
+ ```
43
+
44
+ ## Updating
45
+
46
+ Update the version
47
+ ```ruby
48
+ bundle exec bump patch
49
+ ```
50
+
51
+ Build
52
+ ```ruby
53
+ gem build abbreviato.gemspec
54
+
55
+ Publish
56
+ ```ruby
57
+ gem push abbreviato-x.y.z.gem
58
+ ```
59
+
60
+
61
+
data/Rakefile ADDED
@@ -0,0 +1,17 @@
1
+ require 'wwtd/tasks'
2
+ require 'bundler/gem_tasks'
3
+ require 'bump/tasks'
4
+ require 'bundler/setup'
5
+
6
+ Bundler::GemHelper.install_tasks
7
+
8
+ require 'rspec/core/rake_task'
9
+ RSpec::Core::RakeTask.new(:spec)
10
+
11
+ require 'rubocop/rake_task'
12
+ RuboCop::RakeTask.new
13
+
14
+ require 'bundler/audit/task'
15
+ Bundler::Audit::Task.new
16
+
17
+ task default: :wwtd
data/lib/abbreviato.rb ADDED
@@ -0,0 +1,3 @@
1
+ Dir[File.dirname(__FILE__) + '/abbreviato/**/*.rb'].each do |file|
2
+ require file
3
+ end
@@ -0,0 +1,27 @@
1
+ module Abbreviato
2
+ DEFAULT_OPTIONS = {
3
+ max_length: 30,
4
+ tail: '&hellip;',
5
+ fragment: true
6
+ }.freeze
7
+
8
+ # Truncates the source XML string and returns the truncated XML and a boolean flag indicating
9
+ # whether any truncation took place. It will keep a valid XML structure
10
+ # and insert a _tail_ text indicating the position where content was removed (...).
11
+ #
12
+ # @param [String] source the XML source to truncate
13
+ # @param [Hash] user_options truncation options
14
+ # @option user_options [Integer] :max_length Maximum length
15
+ # @option user_options [String] :tail Text to append when the truncation happens
16
+ # @option user_options [Boolean] :fragment Indicates whether the document to be truncated is an HTML fragment
17
+ # or an entire document (with `HTML`, `HEAD` & `BODY` tags). Setting to true prevents automatic addition of
18
+ # these tags if they are missing. Defaults to `true`.
19
+ # @return [[String] the truncated string, [boolean] whether the string was truncated]
20
+ def self.truncate(source = '', user_options = {})
21
+ return [nil, false] if source.nil?
22
+ truncated_sax_document = TruncatedSaxDocument.new(DEFAULT_OPTIONS.merge(user_options))
23
+ parser = Nokogiri::HTML::SAX::Parser.new(truncated_sax_document)
24
+ parser.parse(source) { |context| context.replace_entities = false }
25
+ [truncated_sax_document.truncated_string, truncated_sax_document.truncated]
26
+ end
27
+ end
@@ -0,0 +1,203 @@
1
+ require 'nokogiri'
2
+ require 'htmlentities'
3
+
4
+ class TruncatedSaxDocument < Nokogiri::XML::SAX::Document
5
+ IGNORABLE_TAGS = %w[html head body].freeze
6
+
7
+ # These don't have to be closed (which also impacts ongoing length calculations)
8
+ # http://www.456bereastreet.com/archive/201005/void_empty_elements_and_self-closing_start_tags_in_html/
9
+ VOID_TAGS = %w[area base br col command hr img input keygen link meta param source wbr].freeze
10
+
11
+ attr_reader :truncated_string,
12
+ :max_length,
13
+ :tail,
14
+ :ignored_levels,
15
+ :truncated
16
+
17
+ def initialize(options)
18
+ @html_coder = HTMLEntities.new
19
+
20
+ @max_length = options[:max_length]
21
+ @tail = options[:tail] || ''
22
+ @fragment_mode = options[:fragment]
23
+
24
+ @truncated_string = ""
25
+ @closing_tags = []
26
+ @estimated_length = 0
27
+ @ignored_levels = 0
28
+ @truncated = false
29
+ end
30
+
31
+ # This method is called when the parser encounters an open tag
32
+ def start_element(name, attributes)
33
+ if max_length_reached? || ignorable_tag?(name)
34
+ @truncated = true if max_length_reached?
35
+ return
36
+ end
37
+
38
+ # If already in ignore mode, go in deeper
39
+ if ignore_mode?
40
+ enter_ignored_level(name)
41
+ return
42
+ end
43
+
44
+ string_to_add = opening_tag(name, attributes)
45
+
46
+ # Abort if there is not enough space to add the combined opening tag and (potentially) the closing tag
47
+ length_of_tags = overridden_tag_length(name, string_to_add)
48
+ if length_of_tags > remaining_length
49
+ @truncated = true
50
+ enter_ignored_level(name)
51
+ return
52
+ end
53
+
54
+ # Save the tag so we can push it on at the end
55
+ @closing_tags.push name unless single_tag_element?(name)
56
+
57
+ append_to_truncated_string(string_to_add, length_of_tags)
58
+ end
59
+
60
+ # This method is called when the parser encounters characters between tags
61
+ def characters(decoded_string)
62
+ if max_length_reached? || ignore_mode?
63
+ @truncated = true
64
+ return
65
+ end
66
+
67
+ # Use encoded length, so &gt; counts as 4 bytes, not 1 (which is what '>' would give)
68
+ encoded_string = @html_coder.encode(decoded_string, :named)
69
+ string_to_append = if encoded_string.bytesize > remaining_length
70
+ # This is the line which prevents HTML entities getting truncated - treat them as a single char
71
+ str = @html_coder.encode(truncate_string(decoded_string), :named) || ''
72
+ str << tail if remaining_length - str.bytesize >= tail.bytesize
73
+ str
74
+ else
75
+ encoded_string
76
+ end
77
+ append_to_truncated_string(string_to_append)
78
+ end
79
+
80
+ # This method is called when the parser encounters a comment
81
+ def comment(string)
82
+ comment = comment_tag(string)
83
+ if comment.bytesize <= remaining_length
84
+ append_to_truncated_string(comment)
85
+ else
86
+ @truncated = true
87
+ end
88
+ end
89
+
90
+ # This method is called when the parser encounters cdata. In practice, this also
91
+ # gets called for this style of comment inside an element:
92
+ #
93
+ # <style><!--
94
+ # /* Font Definitions */
95
+ # @font-face
96
+ # {font-family:Wingdings;
97
+ # panose-1:5 0 0 0 0 0 0 0 0 0;}
98
+ # --></style>
99
+ #
100
+ def cdata_block(string)
101
+ if string.bytesize <= remaining_length
102
+ append_to_truncated_string(string)
103
+ else
104
+ @truncated = true
105
+ end
106
+ end
107
+
108
+ # This method is called when the parser encounters a closing tag
109
+ def end_element(name)
110
+ if ignore_mode?
111
+ exit_ignored_level(name)
112
+ return
113
+ end
114
+
115
+ # Note that any remaining end tags get added automatically (in `end_document`) as the document is closed
116
+ return if max_length_reached? || ignorable_tag?(name)
117
+
118
+ unless single_tag_element?(name)
119
+ @closing_tags.pop
120
+ # Don't count the length when closing a tag - it was accommodated when
121
+ # the tag was opened
122
+ append_to_truncated_string(closing_tag(name), 0)
123
+ end
124
+ end
125
+
126
+ def end_document
127
+ @closing_tags.reverse_each { |name| append_to_truncated_string(closing_tag(name), 0) }
128
+ end
129
+
130
+ private
131
+
132
+ def opening_tag(name, attributes)
133
+ attributes_string = attributes_to_string(attributes)
134
+ if single_tag_element? name
135
+ "<#{name}#{attributes_string}/>"
136
+ else
137
+ "<#{name}#{attributes_string}>"
138
+ end
139
+ end
140
+
141
+ def comment_tag(comment)
142
+ "<!--#{comment}-->"
143
+ end
144
+
145
+ def closing_tag(name)
146
+ "</#{name}>"
147
+ end
148
+
149
+ def remaining_length
150
+ max_length - @estimated_length
151
+ end
152
+
153
+ def single_tag_element?(name)
154
+ VOID_TAGS.include? name
155
+ end
156
+
157
+ def append_to_truncated_string(string, overridden_length = nil)
158
+ @truncated_string << string
159
+ @estimated_length += (overridden_length || string.bytesize)
160
+ end
161
+
162
+ def attributes_to_string(attributes)
163
+ attributes.inject(' ') do |string, attribute|
164
+ key, value = attribute
165
+ string << "#{key}='#{@html_coder.encode value}' "
166
+ end.rstrip
167
+ end
168
+
169
+ def max_length_reached?
170
+ @estimated_length >= max_length
171
+ end
172
+
173
+ def truncate_string(string)
174
+ @truncated = true
175
+ truncate_length = remaining_length - tail.bytesize
176
+ (string.byteslice(0, truncate_length) || '').scrub('')
177
+ end
178
+
179
+ def overridden_tag_length(tag_name, rendered_tag_with_attributes)
180
+ # Start with the opening tag
181
+ length = rendered_tag_with_attributes.bytesize
182
+
183
+ # Add on closing tag if necessary
184
+ length += closing_tag(tag_name).bytesize unless single_tag_element?(tag_name)
185
+ length
186
+ end
187
+
188
+ def ignorable_tag?(name)
189
+ @fragment_mode && IGNORABLE_TAGS.include?(name.downcase)
190
+ end
191
+
192
+ def enter_ignored_level(name)
193
+ @ignored_levels += 1 unless single_tag_element?(name)
194
+ end
195
+
196
+ def exit_ignored_level(name)
197
+ @ignored_levels -= 1 unless single_tag_element?(name)
198
+ end
199
+
200
+ def ignore_mode?
201
+ @ignored_levels > 0
202
+ end
203
+ end
@@ -0,0 +1,3 @@
1
+ module Abbreviato
2
+ VERSION = '0.8.1'.freeze
3
+ end
metadata ADDED
@@ -0,0 +1,262 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: abbreviato
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.8.1
5
+ platform: ruby
6
+ authors:
7
+ - Jorge Manrubia
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2013-09-10 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: nokogiri
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 1.6.8
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 1.6.8
27
+ - !ruby/object:Gem::Dependency
28
+ name: htmlentities
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 4.3.4
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 4.3.4
41
+ - !ruby/object:Gem::Dependency
42
+ name: awesome_print
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: benchmark-memory
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: brakecheck
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: brakeman
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - ">="
95
+ - !ruby/object:Gem::Version
96
+ version: '0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: bump
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - ">="
102
+ - !ruby/object:Gem::Version
103
+ version: '0'
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: '0'
111
+ - !ruby/object:Gem::Dependency
112
+ name: bundler-audit
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ">="
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: byebug
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ">="
130
+ - !ruby/object:Gem::Version
131
+ version: '0'
132
+ type: :development
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ">="
137
+ - !ruby/object:Gem::Version
138
+ version: '0'
139
+ - !ruby/object:Gem::Dependency
140
+ name: flay
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - ">="
151
+ - !ruby/object:Gem::Version
152
+ version: '0'
153
+ - !ruby/object:Gem::Dependency
154
+ name: rake
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: 11.3.0
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: 11.3.0
167
+ - !ruby/object:Gem::Dependency
168
+ name: rspec
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - "~>"
172
+ - !ruby/object:Gem::Version
173
+ version: 3.5.0
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - "~>"
179
+ - !ruby/object:Gem::Version
180
+ version: 3.5.0
181
+ - !ruby/object:Gem::Dependency
182
+ name: rspec-benchmark
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - ">="
186
+ - !ruby/object:Gem::Version
187
+ version: '0'
188
+ type: :development
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - ">="
193
+ - !ruby/object:Gem::Version
194
+ version: '0'
195
+ - !ruby/object:Gem::Dependency
196
+ name: rubocop
197
+ requirement: !ruby/object:Gem::Requirement
198
+ requirements:
199
+ - - ">="
200
+ - !ruby/object:Gem::Version
201
+ version: '0'
202
+ type: :development
203
+ prerelease: false
204
+ version_requirements: !ruby/object:Gem::Requirement
205
+ requirements:
206
+ - - ">="
207
+ - !ruby/object:Gem::Version
208
+ version: '0'
209
+ - !ruby/object:Gem::Dependency
210
+ name: wwtd
211
+ requirement: !ruby/object:Gem::Requirement
212
+ requirements:
213
+ - - ">="
214
+ - !ruby/object:Gem::Version
215
+ version: '0'
216
+ type: :development
217
+ prerelease: false
218
+ version_requirements: !ruby/object:Gem::Requirement
219
+ requirements:
220
+ - - ">="
221
+ - !ruby/object:Gem::Version
222
+ version: '0'
223
+ description: Truncate HTML to a specific bytesize, while keeping valid markup
224
+ email: jorge.manrubia@gmail.com
225
+ executables: []
226
+ extensions: []
227
+ extra_rdoc_files:
228
+ - LICENSE.txt
229
+ - README.md
230
+ files:
231
+ - LICENSE.txt
232
+ - README.md
233
+ - Rakefile
234
+ - lib/abbreviato.rb
235
+ - lib/abbreviato/abbreviato.rb
236
+ - lib/abbreviato/truncated_sax_document.rb
237
+ - lib/abbreviato/version.rb
238
+ homepage: https://github.com/zendesk/abbreviato
239
+ licenses:
240
+ - MIT
241
+ metadata: {}
242
+ post_install_message:
243
+ rdoc_options: []
244
+ require_paths:
245
+ - lib
246
+ required_ruby_version: !ruby/object:Gem::Requirement
247
+ requirements:
248
+ - - ">="
249
+ - !ruby/object:Gem::Version
250
+ version: '0'
251
+ required_rubygems_version: !ruby/object:Gem::Requirement
252
+ requirements:
253
+ - - ">="
254
+ - !ruby/object:Gem::Version
255
+ version: '0'
256
+ requirements: []
257
+ rubyforge_project:
258
+ rubygems_version: 2.5.1
259
+ signing_key:
260
+ specification_version: 4
261
+ summary: A tool for efficiently truncating HTML strings to a specific bytesize
262
+ test_files: []