truncato 0.7.4 → 0.7.5
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +5 -2
- data/VERSION +1 -1
- data/lib/truncato/truncated_sax_document.rb +69 -21
- data/lib/truncato/truncato.rb +2 -2
- data/spec/truncato/truncato_spec.rb +64 -5
- data/truncato.gemspec +8 -2
- metadata +35 -3
data/README.md
CHANGED
@@ -9,7 +9,7 @@ In your `Gemfile`
|
|
9
9
|
```ruby
|
10
10
|
gem 'truncato'
|
11
11
|
```
|
12
|
-
|
12
|
+
|
13
13
|
## Usage
|
14
14
|
|
15
15
|
```ruby
|
@@ -23,12 +23,15 @@ The configuration options are:
|
|
23
23
|
* `tail`: The string to append when the truncation occurs ('...' by default)
|
24
24
|
* `count_tags`: Boolean value indicating whether tags size should be considered when truncating (`true` by default)
|
25
25
|
* `filtered_attributes`: Array of attribute names that will be removed from the output. This allows you to make the truncated string shorter by excluding the content of attributes you can discard in some given context, e.g HTML `style` attribute.
|
26
|
+
* `tail_before_final_tag`: Boolean value indicating whether to apply a tail before the final closing tag (`false` by default)
|
27
|
+
* `comments`: Boolean value indicating whether to include comments in parsed results (`false` by default)
|
28
|
+
* `count_tail`: Boolean value indicating whether to include the tail within the bounds of the provided max length (`false` by default)
|
26
29
|
|
27
30
|
## Performance
|
28
31
|
|
29
32
|
Truncato was designed with performance in mind. Its main motivation was that existing libs couldn't truncate a multiple-MB document into a few-KB one in a reasonable time. It uses the [Nokogiri](http://nokogiri.org/) SAX parser.
|
30
33
|
|
31
|
-
There is a benchmark included that generates a synthetic XML of 4MB and truncates it to 400 KB. You can run the benchmark using
|
34
|
+
There is a benchmark included that generates a synthetic XML of 4MB and truncates it to 400 KB. You can run the benchmark using
|
32
35
|
|
33
36
|
```ruby
|
34
37
|
rake truncato:benchmark
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.7.
|
1
|
+
0.7.5
|
@@ -6,13 +6,13 @@ class TruncatedSaxDocument < Nokogiri::XML::SAX::Document
|
|
6
6
|
|
7
7
|
def initialize(options)
|
8
8
|
@html_coder = HTMLEntities.new
|
9
|
-
capture_options
|
9
|
+
capture_options options
|
10
10
|
init_parsing_state
|
11
11
|
end
|
12
12
|
|
13
13
|
def start_element name, attributes
|
14
14
|
return if @max_length_reached || artificial_root_name?(name)
|
15
|
-
@closing_tags.push name
|
15
|
+
@closing_tags.push name unless single_tag_element? name
|
16
16
|
append_to_truncated_string opening_tag(name, attributes), overriden_tag_length
|
17
17
|
end
|
18
18
|
|
@@ -23,10 +23,25 @@ class TruncatedSaxDocument < Nokogiri::XML::SAX::Document
|
|
23
23
|
append_to_truncated_string @html_coder.encode(string_to_append), string_to_append.length
|
24
24
|
end
|
25
25
|
|
26
|
+
def comment string
|
27
|
+
if @comments
|
28
|
+
return if @max_length_reached
|
29
|
+
remaining_length = max_length - @estimated_length - 1
|
30
|
+
string_to_append = comment_tag(string).length > remaining_length ? truncate_comment(comment_tag(string), remaining_length) : comment_tag(string)
|
31
|
+
append_to_truncated_string string_to_append
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def comment_tag comment
|
36
|
+
"<!--#{comment}-->"
|
37
|
+
end
|
38
|
+
|
26
39
|
def end_element name
|
27
40
|
return if @max_length_reached || artificial_root_name?(name)
|
28
|
-
|
29
|
-
|
41
|
+
unless single_tag_element? name
|
42
|
+
@closing_tags.pop
|
43
|
+
append_to_truncated_string closing_tag(name), overriden_tag_length
|
44
|
+
end
|
30
45
|
end
|
31
46
|
|
32
47
|
def end_document
|
@@ -36,40 +51,55 @@ class TruncatedSaxDocument < Nokogiri::XML::SAX::Document
|
|
36
51
|
private
|
37
52
|
|
38
53
|
def capture_options(options)
|
39
|
-
@max_length
|
40
|
-
@count_tags
|
41
|
-
@
|
42
|
-
@
|
54
|
+
@max_length = options[:max_length]
|
55
|
+
@count_tags = options [:count_tags]
|
56
|
+
@count_tail = options.fetch(:count_tail, false)
|
57
|
+
@tail = options[:tail]
|
58
|
+
@filtered_attributes = options[:filtered_attributes] || []
|
59
|
+
@tail_before_final_tag = options.fetch(:tail_before_final_tag, false)
|
60
|
+
@comments = options.fetch(:comments, false)
|
43
61
|
end
|
44
62
|
|
45
63
|
def init_parsing_state
|
46
|
-
@truncated_string
|
47
|
-
@closing_tags
|
48
|
-
@estimated_length
|
64
|
+
@truncated_string = ""
|
65
|
+
@closing_tags = []
|
66
|
+
@estimated_length = @count_tail ? tail_length : 0
|
49
67
|
@max_length_reached = false
|
50
68
|
end
|
51
69
|
|
70
|
+
def tail_length
|
71
|
+
tail.match(/^&\w+;$/).nil? ? tail.length : 1
|
72
|
+
end
|
73
|
+
|
74
|
+
def single_tag_element? name
|
75
|
+
["br", "img"].include? name
|
76
|
+
end
|
77
|
+
|
52
78
|
def append_to_truncated_string string, overriden_length=nil
|
53
79
|
@truncated_string << string
|
54
80
|
increase_estimated_length(overriden_length || string.length)
|
55
81
|
end
|
56
82
|
|
57
83
|
def opening_tag name, attributes
|
58
|
-
attributes_string = attributes_to_string
|
59
|
-
|
84
|
+
attributes_string = attributes_to_string attributes
|
85
|
+
if single_tag_element? name
|
86
|
+
"<#{name}#{attributes_string} />"
|
87
|
+
else
|
88
|
+
"<#{name}#{attributes_string}>"
|
89
|
+
end
|
60
90
|
end
|
61
91
|
|
62
|
-
def attributes_to_string
|
92
|
+
def attributes_to_string attributes
|
63
93
|
return "" if attributes.empty?
|
64
94
|
attributes_string = concatenate_attributes_declaration attributes
|
65
95
|
attributes_string.rstrip
|
66
96
|
end
|
67
97
|
|
68
|
-
def concatenate_attributes_declaration
|
98
|
+
def concatenate_attributes_declaration attributes
|
69
99
|
attributes.inject(' ') do |string, attribute|
|
70
100
|
key, value = attribute
|
71
|
-
next string if @filtered_attributes.include?
|
72
|
-
string << "#{key}='#{@html_coder.encode
|
101
|
+
next string if @filtered_attributes.include? key
|
102
|
+
string << "#{key}='#{@html_coder.encode value}' "
|
73
103
|
end
|
74
104
|
end
|
75
105
|
|
@@ -87,24 +117,42 @@ class TruncatedSaxDocument < Nokogiri::XML::SAX::Document
|
|
87
117
|
end
|
88
118
|
|
89
119
|
def truncate_string string, remaining_length
|
90
|
-
@
|
91
|
-
|
120
|
+
if @tail_before_final_tag
|
121
|
+
string[0..remaining_length]
|
122
|
+
else
|
123
|
+
@tail_appended = true
|
124
|
+
"#{string[0..remaining_length]}#{tail}"
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def truncate_comment string, remaining_length
|
129
|
+
if @tail_before_final_tag
|
130
|
+
string[0..remaining_length]
|
131
|
+
else
|
132
|
+
@tail_appended = true
|
133
|
+
"#{string[0..remaining_length]}#{tail}-->"
|
134
|
+
end
|
92
135
|
end
|
93
136
|
|
94
137
|
def close_truncated_document
|
138
|
+
append_tail_between_closing_tags if @tail_before_final_tag
|
95
139
|
append_to_truncated_string tail unless @tail_appended
|
96
140
|
append_closing_tags
|
97
141
|
end
|
98
142
|
|
99
143
|
def append_closing_tags
|
100
|
-
@closing_tags.reverse.each { |name| append_to_truncated_string closing_tag
|
144
|
+
@closing_tags.reverse.each { |name| append_to_truncated_string closing_tag name }
|
101
145
|
end
|
102
146
|
|
103
147
|
def overriden_tag_length
|
104
148
|
@count_tags ? nil : 0
|
105
149
|
end
|
106
150
|
|
107
|
-
def artificial_root_name?
|
151
|
+
def artificial_root_name? name
|
108
152
|
name == Truncato::ARTIFICIAL_ROOT_NAME
|
109
153
|
end
|
154
|
+
|
155
|
+
def append_tail_between_closing_tags
|
156
|
+
append_to_truncated_string closing_tag(@closing_tags.delete_at (@closing_tags.length - 1)) if @closing_tags.length > 1
|
157
|
+
end
|
110
158
|
end
|
data/lib/truncato/truncato.rb
CHANGED
@@ -26,7 +26,7 @@ module Truncato
|
|
26
26
|
private
|
27
27
|
|
28
28
|
def self.truncate_html source, options
|
29
|
-
self.do_truncate_html(source, options) ? self.do_truncate_html(
|
29
|
+
self.do_truncate_html(source, options) ? self.do_truncate_html(with_artificial_root(source), options) : nil
|
30
30
|
end
|
31
31
|
|
32
32
|
def self.do_truncate_html source, options
|
@@ -37,7 +37,7 @@ module Truncato
|
|
37
37
|
truncated_string.empty? ? nil : truncated_string
|
38
38
|
end
|
39
39
|
|
40
|
-
def self.
|
40
|
+
def self.with_artificial_root(source)
|
41
41
|
"<#{ARTIFICIAL_ROOT_NAME}>#{source}</#{ARTIFICIAL_ROOT_NAME}>"
|
42
42
|
end
|
43
43
|
|
@@ -33,6 +33,67 @@ describe "Truncato" do
|
|
33
33
|
expected: "<div>some text 0</div><div><p>some text 1</p><p>som...</p></div>"
|
34
34
|
end
|
35
35
|
|
36
|
+
describe "include tail as part of max_length" do
|
37
|
+
it_should_truncate "html text with a tag (counting tail)", with: {max_length: 4, count_tail: true, count_tags: false},
|
38
|
+
source: "<p>some text</p>",
|
39
|
+
expected: "<p>s...</p>"
|
40
|
+
|
41
|
+
it_should_truncate "html text with a tag (counting tail)", with: {max_length: 6, count_tail: true, count_tags: false}, source: "<p>some text</p>", expected: "<p>som...</p>"
|
42
|
+
|
43
|
+
it_should_truncate "html text with a tag (counting tail)", with: {max_length: 16, count_tail: true, count_tags: false},
|
44
|
+
source: "<p>some text</p><div><span>some other text</span></div>",
|
45
|
+
expected: "<p>some text</p><div><span>some...</span></div>"
|
46
|
+
|
47
|
+
it_should_truncate "html text with a tag (counting tail and including tail before final tag)", with: {max_length: 16, count_tail: true, count_tags: false, tail_before_final_tag: true},
|
48
|
+
source: "<p>some text</p><div><span>some other text</span></div>",
|
49
|
+
expected: "<p>some text</p><div><span>some</span>...</div>"
|
50
|
+
|
51
|
+
it_should_truncate "html text, counting special html characters as one character",
|
52
|
+
with: {max_length: 16, count_tail: true, count_tags: false, tail_before_final_tag: true, tail: '…'},
|
53
|
+
source: "<p>some text</p><div><span>some other text</span></div>",
|
54
|
+
expected: "<p>some text</p><div><span>some o</span>…</div>"
|
55
|
+
end
|
56
|
+
|
57
|
+
describe "insert tail between two or more final tags" do
|
58
|
+
it_should_truncate "html text as normal when tail_before_final_tag option is not set",
|
59
|
+
with: {max_length: 4, count_tags: false},
|
60
|
+
source: "<p><span>some text</span>some more text</p>",
|
61
|
+
expected: "<p><span>some...</span></p>"
|
62
|
+
|
63
|
+
it_should_truncate "html text when tail_before_final_tag: true by inserting tail before the final tag, and after any other closing tags",
|
64
|
+
with: {max_length: 4, count_tags: false, tail_before_final_tag: true},
|
65
|
+
source: "<p><span>some text</span>some more text</p>",
|
66
|
+
expected: "<p><span>some</span>...</p>"
|
67
|
+
end
|
68
|
+
|
69
|
+
describe "single html tag elements" do
|
70
|
+
it_should_truncate "html text with <br /> element without adding a closing tag", with: {max_length: 9},
|
71
|
+
source: "<div><p><br />some text 1</p><p>some text 2</p></div>",
|
72
|
+
expected: "<div><p><br />...</p></div>"
|
73
|
+
|
74
|
+
it_should_truncate "html text with <img /> element without adding a closing tag", with: {max_length: 9},
|
75
|
+
source: "<div><p><img src='some_path' />some text 1</p><p>some text 2</p></div>",
|
76
|
+
expected: "<div><p><img src='some_path' />...</p></div>"
|
77
|
+
end
|
78
|
+
|
79
|
+
describe "comment html element" do
|
80
|
+
it_should_truncate "html text and ignore <!-- a comment --> element by default", with: {max_length: 20},
|
81
|
+
source: "<!-- a comment --><p>some text 1</p>",
|
82
|
+
expected: "<p>some text 1</p>"
|
83
|
+
|
84
|
+
it_should_truncate "html text with <!-- a comment --> element", with: {max_length: 30, comments: true},
|
85
|
+
source: "<!-- a comment --><p>some text 1</p>",
|
86
|
+
expected: "<!-- a comment --><p>some text...</p>"
|
87
|
+
|
88
|
+
it_should_truncate "html text with <!-- a comment --> element that exceeds the max_length", with: {max_length: 5, comments: true},
|
89
|
+
source: "<!-- a comment --><p>some text 1</p>",
|
90
|
+
expected: "<!-- ...-->"
|
91
|
+
|
92
|
+
it_should_truncate "html text with <!-- a comment --> element with other elements that exceeds max_length", with: {max_length: 20, comments: true},
|
93
|
+
source: "<!-- a comment --><p>some text 1</p>",
|
94
|
+
expected: "<!-- a comment --><p>...</p>"
|
95
|
+
end
|
96
|
+
|
36
97
|
describe "html attributes" do
|
37
98
|
it_should_truncate "html text with 1 attributes", with: {max_length: 3, count_tags: false},
|
38
99
|
source: "<p attr1='1'>some text</p>",
|
@@ -54,15 +115,13 @@ describe "Truncato" do
|
|
54
115
|
source: "<p attr1='>some'>text</p>",
|
55
116
|
expected: "<p attr1='>some'>tex...</p>"
|
56
117
|
|
57
|
-
it_should_truncate "html text with 2 attributes filtering one of them", with: {max_length:
|
58
|
-
source: "<p attr1='1'>some text</p>",
|
59
|
-
expected: "<p attr1='1'>some text</p>"
|
118
|
+
it_should_truncate "html text with 2 attributes filtering one of them", with: {max_length: 90, count_tags: false, filtered_attributes: ['attr2']},
|
119
|
+
source: "<p attr1='1'>some text</p><p attr2='2'>filtered text</p>",
|
120
|
+
expected: "<p attr1='1'>some text</p><p>filtered text</p>"
|
60
121
|
|
61
122
|
it_should_truncate "html text with 2 attributes filtering all of them", with: {max_length: 3, count_tags: false, filtered_attributes: ['attr1', 'attr2']},
|
62
123
|
source: "<p attr1='1' attr2='2'>some text</p>",
|
63
124
|
expected: "<p>som...</p>"
|
64
125
|
end
|
65
126
|
|
66
|
-
|
67
127
|
end
|
68
|
-
|
data/truncato.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = "truncato"
|
8
|
-
s.version = "0.7.
|
8
|
+
s.version = "0.7.5"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Jorge Manrubia"]
|
12
|
-
s.date = "2013-04-
|
12
|
+
s.date = "2013-04-27"
|
13
13
|
s.description = "Ruby tool for truncating HTML strings keeping a valid HTML markup"
|
14
14
|
s.email = "jorge.manrubia@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
@@ -52,6 +52,8 @@ Gem::Specification.new do |s|
|
|
52
52
|
s.add_development_dependency(%q<bundler>, ["~> 1.3"])
|
53
53
|
s.add_development_dependency(%q<bundler>, ["~> 1.3"])
|
54
54
|
s.add_development_dependency(%q<jeweler>, ["~> 1.8.4"])
|
55
|
+
s.add_development_dependency(%q<bundler>, ["~> 1.3"])
|
56
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.8.4"])
|
55
57
|
else
|
56
58
|
s.add_dependency(%q<truncato>, [">= 0"])
|
57
59
|
s.add_dependency(%q<nokogiri>, ["~> 1.5.5"])
|
@@ -59,6 +61,8 @@ Gem::Specification.new do |s|
|
|
59
61
|
s.add_dependency(%q<bundler>, ["~> 1.3"])
|
60
62
|
s.add_dependency(%q<bundler>, ["~> 1.3"])
|
61
63
|
s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
|
64
|
+
s.add_dependency(%q<bundler>, ["~> 1.3"])
|
65
|
+
s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
|
62
66
|
end
|
63
67
|
else
|
64
68
|
s.add_dependency(%q<truncato>, [">= 0"])
|
@@ -67,6 +71,8 @@ Gem::Specification.new do |s|
|
|
67
71
|
s.add_dependency(%q<bundler>, ["~> 1.3"])
|
68
72
|
s.add_dependency(%q<bundler>, ["~> 1.3"])
|
69
73
|
s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
|
74
|
+
s.add_dependency(%q<bundler>, ["~> 1.3"])
|
75
|
+
s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
|
70
76
|
end
|
71
77
|
end
|
72
78
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: truncato
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-04-
|
12
|
+
date: 2013-04-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: truncato
|
@@ -107,6 +107,38 @@ dependencies:
|
|
107
107
|
- - ~>
|
108
108
|
- !ruby/object:Gem::Version
|
109
109
|
version: 1.8.4
|
110
|
+
- !ruby/object:Gem::Dependency
|
111
|
+
name: bundler
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
114
|
+
requirements:
|
115
|
+
- - ~>
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '1.3'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
none: false
|
122
|
+
requirements:
|
123
|
+
- - ~>
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '1.3'
|
126
|
+
- !ruby/object:Gem::Dependency
|
127
|
+
name: jeweler
|
128
|
+
requirement: !ruby/object:Gem::Requirement
|
129
|
+
none: false
|
130
|
+
requirements:
|
131
|
+
- - ~>
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: 1.8.4
|
134
|
+
type: :development
|
135
|
+
prerelease: false
|
136
|
+
version_requirements: !ruby/object:Gem::Requirement
|
137
|
+
none: false
|
138
|
+
requirements:
|
139
|
+
- - ~>
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: 1.8.4
|
110
142
|
description: Ruby tool for truncating HTML strings keeping a valid HTML markup
|
111
143
|
email: jorge.manrubia@gmail.com
|
112
144
|
executables: []
|
@@ -148,7 +180,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
148
180
|
version: '0'
|
149
181
|
segments:
|
150
182
|
- 0
|
151
|
-
hash:
|
183
|
+
hash: 3957383429942337949
|
152
184
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
153
185
|
none: false
|
154
186
|
requirements:
|