truncato 0.7.4 → 0.7.5

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -9,7 +9,7 @@ In your `Gemfile`
9
9
  ```ruby
10
10
  gem 'truncato'
11
11
  ```
12
-
12
+
13
13
  ## Usage
14
14
 
15
15
  ```ruby
@@ -23,12 +23,15 @@ The configuration options are:
23
23
  * `tail`: The string to append when the truncation occurs ('...' by default)
24
24
  * `count_tags`: Boolean value indicating whether tags size should be considered when truncating (`true` by default)
25
25
  * `filtered_attributes`: Array of attribute names that will be removed from the output. This allows you to make the truncated string shorter by excluding the content of attributes you can discard in some given context, e.g HTML `style` attribute.
26
+ * `tail_before_final_tag`: Boolean value indicating whether to apply a tail before the final closing tag (`false` by default)
27
+ * `comments`: Boolean value indicating whether to include comments in parsed results (`false` by default)
28
+ * `count_tail`: Boolean value indicating whether to include the tail within the bounds of the provided max length (`false` by default)
26
29
 
27
30
  ## Performance
28
31
 
29
32
  Truncato was designed with performance in mind. Its main motivation was that existing libs couldn't truncate a multiple-MB document into a few-KB one in a reasonable time. It uses the [Nokogiri](http://nokogiri.org/) SAX parser.
30
33
 
31
- There is a benchmark included that generates a synthetic XML of 4MB and truncates it to 400 KB. You can run the benchmark using
34
+ There is a benchmark included that generates a synthetic XML of 4MB and truncates it to 400 KB. You can run the benchmark using
32
35
 
33
36
  ```ruby
34
37
  rake truncato:benchmark
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.7.4
1
+ 0.7.5
@@ -6,13 +6,13 @@ class TruncatedSaxDocument < Nokogiri::XML::SAX::Document
6
6
 
7
7
  def initialize(options)
8
8
  @html_coder = HTMLEntities.new
9
- capture_options(options)
9
+ capture_options options
10
10
  init_parsing_state
11
11
  end
12
12
 
13
13
  def start_element name, attributes
14
14
  return if @max_length_reached || artificial_root_name?(name)
15
- @closing_tags.push name
15
+ @closing_tags.push name unless single_tag_element? name
16
16
  append_to_truncated_string opening_tag(name, attributes), overriden_tag_length
17
17
  end
18
18
 
@@ -23,10 +23,25 @@ class TruncatedSaxDocument < Nokogiri::XML::SAX::Document
23
23
  append_to_truncated_string @html_coder.encode(string_to_append), string_to_append.length
24
24
  end
25
25
 
26
+ def comment string
27
+ if @comments
28
+ return if @max_length_reached
29
+ remaining_length = max_length - @estimated_length - 1
30
+ string_to_append = comment_tag(string).length > remaining_length ? truncate_comment(comment_tag(string), remaining_length) : comment_tag(string)
31
+ append_to_truncated_string string_to_append
32
+ end
33
+ end
34
+
35
+ def comment_tag comment
36
+ "<!--#{comment}-->"
37
+ end
38
+
26
39
  def end_element name
27
40
  return if @max_length_reached || artificial_root_name?(name)
28
- @closing_tags.pop
29
- append_to_truncated_string closing_tag(name), overriden_tag_length
41
+ unless single_tag_element? name
42
+ @closing_tags.pop
43
+ append_to_truncated_string closing_tag(name), overriden_tag_length
44
+ end
30
45
  end
31
46
 
32
47
  def end_document
@@ -36,40 +51,55 @@ class TruncatedSaxDocument < Nokogiri::XML::SAX::Document
36
51
  private
37
52
 
38
53
  def capture_options(options)
39
- @max_length = options[:max_length]
40
- @count_tags = options [:count_tags]
41
- @tail = options[:tail]
42
- @filtered_attributes = options[:filtered_attributes] || []
54
+ @max_length = options[:max_length]
55
+ @count_tags = options [:count_tags]
56
+ @count_tail = options.fetch(:count_tail, false)
57
+ @tail = options[:tail]
58
+ @filtered_attributes = options[:filtered_attributes] || []
59
+ @tail_before_final_tag = options.fetch(:tail_before_final_tag, false)
60
+ @comments = options.fetch(:comments, false)
43
61
  end
44
62
 
45
63
  def init_parsing_state
46
- @truncated_string = ""
47
- @closing_tags = []
48
- @estimated_length = 0
64
+ @truncated_string = ""
65
+ @closing_tags = []
66
+ @estimated_length = @count_tail ? tail_length : 0
49
67
  @max_length_reached = false
50
68
  end
51
69
 
70
+ def tail_length
71
+ tail.match(/^&\w+;$/).nil? ? tail.length : 1
72
+ end
73
+
74
+ def single_tag_element? name
75
+ ["br", "img"].include? name
76
+ end
77
+
52
78
  def append_to_truncated_string string, overriden_length=nil
53
79
  @truncated_string << string
54
80
  increase_estimated_length(overriden_length || string.length)
55
81
  end
56
82
 
57
83
  def opening_tag name, attributes
58
- attributes_string = attributes_to_string(attributes)
59
- "<#{name}#{attributes_string}>"
84
+ attributes_string = attributes_to_string attributes
85
+ if single_tag_element? name
86
+ "<#{name}#{attributes_string} />"
87
+ else
88
+ "<#{name}#{attributes_string}>"
89
+ end
60
90
  end
61
91
 
62
- def attributes_to_string(attributes)
92
+ def attributes_to_string attributes
63
93
  return "" if attributes.empty?
64
94
  attributes_string = concatenate_attributes_declaration attributes
65
95
  attributes_string.rstrip
66
96
  end
67
97
 
68
- def concatenate_attributes_declaration(attributes)
98
+ def concatenate_attributes_declaration attributes
69
99
  attributes.inject(' ') do |string, attribute|
70
100
  key, value = attribute
71
- next string if @filtered_attributes.include?(key)
72
- string << "#{key}='#{@html_coder.encode(value)}' "
101
+ next string if @filtered_attributes.include? key
102
+ string << "#{key}='#{@html_coder.encode value}' "
73
103
  end
74
104
  end
75
105
 
@@ -87,24 +117,42 @@ class TruncatedSaxDocument < Nokogiri::XML::SAX::Document
87
117
  end
88
118
 
89
119
  def truncate_string string, remaining_length
90
- @tail_appended = true
91
- "#{string[0..remaining_length]}#{tail}"
120
+ if @tail_before_final_tag
121
+ string[0..remaining_length]
122
+ else
123
+ @tail_appended = true
124
+ "#{string[0..remaining_length]}#{tail}"
125
+ end
126
+ end
127
+
128
+ def truncate_comment string, remaining_length
129
+ if @tail_before_final_tag
130
+ string[0..remaining_length]
131
+ else
132
+ @tail_appended = true
133
+ "#{string[0..remaining_length]}#{tail}-->"
134
+ end
92
135
  end
93
136
 
94
137
  def close_truncated_document
138
+ append_tail_between_closing_tags if @tail_before_final_tag
95
139
  append_to_truncated_string tail unless @tail_appended
96
140
  append_closing_tags
97
141
  end
98
142
 
99
143
  def append_closing_tags
100
- @closing_tags.reverse.each { |name| append_to_truncated_string closing_tag(name) }
144
+ @closing_tags.reverse.each { |name| append_to_truncated_string closing_tag name }
101
145
  end
102
146
 
103
147
  def overriden_tag_length
104
148
  @count_tags ? nil : 0
105
149
  end
106
150
 
107
- def artificial_root_name?(name)
151
+ def artificial_root_name? name
108
152
  name == Truncato::ARTIFICIAL_ROOT_NAME
109
153
  end
154
+
155
+ def append_tail_between_closing_tags
156
+ append_to_truncated_string closing_tag(@closing_tags.delete_at (@closing_tags.length - 1)) if @closing_tags.length > 1
157
+ end
110
158
  end
@@ -26,7 +26,7 @@ module Truncato
26
26
  private
27
27
 
28
28
  def self.truncate_html source, options
29
- self.do_truncate_html(source, options) ? self.do_truncate_html(with_articial_root(source), options) : nil
29
+ self.do_truncate_html(source, options) ? self.do_truncate_html(with_artificial_root(source), options) : nil
30
30
  end
31
31
 
32
32
  def self.do_truncate_html source, options
@@ -37,7 +37,7 @@ module Truncato
37
37
  truncated_string.empty? ? nil : truncated_string
38
38
  end
39
39
 
40
- def self.with_articial_root(source)
40
+ def self.with_artificial_root(source)
41
41
  "<#{ARTIFICIAL_ROOT_NAME}>#{source}</#{ARTIFICIAL_ROOT_NAME}>"
42
42
  end
43
43
 
@@ -33,6 +33,67 @@ describe "Truncato" do
33
33
  expected: "<div>some text 0</div><div><p>some text 1</p><p>som...</p></div>"
34
34
  end
35
35
 
36
+ describe "include tail as part of max_length" do
37
+ it_should_truncate "html text with a tag (counting tail)", with: {max_length: 4, count_tail: true, count_tags: false},
38
+ source: "<p>some text</p>",
39
+ expected: "<p>s...</p>"
40
+
41
+ it_should_truncate "html text with a tag (counting tail)", with: {max_length: 6, count_tail: true, count_tags: false}, source: "<p>some text</p>", expected: "<p>som...</p>"
42
+
43
+ it_should_truncate "html text with a tag (counting tail)", with: {max_length: 16, count_tail: true, count_tags: false},
44
+ source: "<p>some text</p><div><span>some other text</span></div>",
45
+ expected: "<p>some text</p><div><span>some...</span></div>"
46
+
47
+ it_should_truncate "html text with a tag (counting tail and including tail before final tag)", with: {max_length: 16, count_tail: true, count_tags: false, tail_before_final_tag: true},
48
+ source: "<p>some text</p><div><span>some other text</span></div>",
49
+ expected: "<p>some text</p><div><span>some</span>...</div>"
50
+
51
+ it_should_truncate "html text, counting special html characters as one character",
52
+ with: {max_length: 16, count_tail: true, count_tags: false, tail_before_final_tag: true, tail: '&hellip;'},
53
+ source: "<p>some text</p><div><span>some other text</span></div>",
54
+ expected: "<p>some text</p><div><span>some o</span>&hellip;</div>"
55
+ end
56
+
57
+ describe "insert tail between two or more final tags" do
58
+ it_should_truncate "html text as normal when tail_before_final_tag option is not set",
59
+ with: {max_length: 4, count_tags: false},
60
+ source: "<p><span>some text</span>some more text</p>",
61
+ expected: "<p><span>some...</span></p>"
62
+
63
+ it_should_truncate "html text when tail_before_final_tag: true by inserting tail before the final tag, and after any other closing tags",
64
+ with: {max_length: 4, count_tags: false, tail_before_final_tag: true},
65
+ source: "<p><span>some text</span>some more text</p>",
66
+ expected: "<p><span>some</span>...</p>"
67
+ end
68
+
69
+ describe "single html tag elements" do
70
+ it_should_truncate "html text with <br /> element without adding a closing tag", with: {max_length: 9},
71
+ source: "<div><p><br />some text 1</p><p>some text 2</p></div>",
72
+ expected: "<div><p><br />...</p></div>"
73
+
74
+ it_should_truncate "html text with <img /> element without adding a closing tag", with: {max_length: 9},
75
+ source: "<div><p><img src='some_path' />some text 1</p><p>some text 2</p></div>",
76
+ expected: "<div><p><img src='some_path' />...</p></div>"
77
+ end
78
+
79
+ describe "comment html element" do
80
+ it_should_truncate "html text and ignore <!-- a comment --> element by default", with: {max_length: 20},
81
+ source: "<!-- a comment --><p>some text 1</p>",
82
+ expected: "<p>some text 1</p>"
83
+
84
+ it_should_truncate "html text with <!-- a comment --> element", with: {max_length: 30, comments: true},
85
+ source: "<!-- a comment --><p>some text 1</p>",
86
+ expected: "<!-- a comment --><p>some text...</p>"
87
+
88
+ it_should_truncate "html text with <!-- a comment --> element that exceeds the max_length", with: {max_length: 5, comments: true},
89
+ source: "<!-- a comment --><p>some text 1</p>",
90
+ expected: "<!-- ...-->"
91
+
92
+ it_should_truncate "html text with <!-- a comment --> element with other elements that exceeds max_length", with: {max_length: 20, comments: true},
93
+ source: "<!-- a comment --><p>some text 1</p>",
94
+ expected: "<!-- a comment --><p>...</p>"
95
+ end
96
+
36
97
  describe "html attributes" do
37
98
  it_should_truncate "html text with 1 attributes", with: {max_length: 3, count_tags: false},
38
99
  source: "<p attr1='1'>some text</p>",
@@ -54,15 +115,13 @@ describe "Truncato" do
54
115
  source: "<p attr1='&gt;some'>text</p>",
55
116
  expected: "<p attr1='&gt;some'>tex...</p>"
56
117
 
57
- it_should_truncate "html text with 2 attributes filtering one of them", with: {max_length: 30, count_tags: false, filtered_attributes: ['attr2']},
58
- source: "<p attr1='1'>some text</p>",
59
- expected: "<p attr1='1'>some text</p>"
118
+ it_should_truncate "html text with 2 attributes filtering one of them", with: {max_length: 90, count_tags: false, filtered_attributes: ['attr2']},
119
+ source: "<p attr1='1'>some text</p><p attr2='2'>filtered text</p>",
120
+ expected: "<p attr1='1'>some text</p><p>filtered text</p>"
60
121
 
61
122
  it_should_truncate "html text with 2 attributes filtering all of them", with: {max_length: 3, count_tags: false, filtered_attributes: ['attr1', 'attr2']},
62
123
  source: "<p attr1='1' attr2='2'>some text</p>",
63
124
  expected: "<p>som...</p>"
64
125
  end
65
126
 
66
-
67
127
  end
68
-
data/truncato.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = "truncato"
8
- s.version = "0.7.4"
8
+ s.version = "0.7.5"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Jorge Manrubia"]
12
- s.date = "2013-04-14"
12
+ s.date = "2013-04-27"
13
13
  s.description = "Ruby tool for truncating HTML strings keeping a valid HTML markup"
14
14
  s.email = "jorge.manrubia@gmail.com"
15
15
  s.extra_rdoc_files = [
@@ -52,6 +52,8 @@ Gem::Specification.new do |s|
52
52
  s.add_development_dependency(%q<bundler>, ["~> 1.3"])
53
53
  s.add_development_dependency(%q<bundler>, ["~> 1.3"])
54
54
  s.add_development_dependency(%q<jeweler>, ["~> 1.8.4"])
55
+ s.add_development_dependency(%q<bundler>, ["~> 1.3"])
56
+ s.add_development_dependency(%q<jeweler>, ["~> 1.8.4"])
55
57
  else
56
58
  s.add_dependency(%q<truncato>, [">= 0"])
57
59
  s.add_dependency(%q<nokogiri>, ["~> 1.5.5"])
@@ -59,6 +61,8 @@ Gem::Specification.new do |s|
59
61
  s.add_dependency(%q<bundler>, ["~> 1.3"])
60
62
  s.add_dependency(%q<bundler>, ["~> 1.3"])
61
63
  s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
64
+ s.add_dependency(%q<bundler>, ["~> 1.3"])
65
+ s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
62
66
  end
63
67
  else
64
68
  s.add_dependency(%q<truncato>, [">= 0"])
@@ -67,6 +71,8 @@ Gem::Specification.new do |s|
67
71
  s.add_dependency(%q<bundler>, ["~> 1.3"])
68
72
  s.add_dependency(%q<bundler>, ["~> 1.3"])
69
73
  s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
74
+ s.add_dependency(%q<bundler>, ["~> 1.3"])
75
+ s.add_dependency(%q<jeweler>, ["~> 1.8.4"])
70
76
  end
71
77
  end
72
78
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: truncato
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.4
4
+ version: 0.7.5
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-04-14 00:00:00.000000000 Z
12
+ date: 2013-04-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: truncato
@@ -107,6 +107,38 @@ dependencies:
107
107
  - - ~>
108
108
  - !ruby/object:Gem::Version
109
109
  version: 1.8.4
110
+ - !ruby/object:Gem::Dependency
111
+ name: bundler
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ~>
116
+ - !ruby/object:Gem::Version
117
+ version: '1.3'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ~>
124
+ - !ruby/object:Gem::Version
125
+ version: '1.3'
126
+ - !ruby/object:Gem::Dependency
127
+ name: jeweler
128
+ requirement: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ~>
132
+ - !ruby/object:Gem::Version
133
+ version: 1.8.4
134
+ type: :development
135
+ prerelease: false
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ~>
140
+ - !ruby/object:Gem::Version
141
+ version: 1.8.4
110
142
  description: Ruby tool for truncating HTML strings keeping a valid HTML markup
111
143
  email: jorge.manrubia@gmail.com
112
144
  executables: []
@@ -148,7 +180,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
148
180
  version: '0'
149
181
  segments:
150
182
  - 0
151
- hash: 1642571317414252409
183
+ hash: 3957383429942337949
152
184
  required_rubygems_version: !ruby/object:Gem::Requirement
153
185
  none: false
154
186
  requirements: