truncato 0.7.11 → 0.7.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +0 -2
- data/Rakefile +4 -24
- data/lib/truncato/truncated_sax_document.rb +7 -3
- data/lib/truncato/truncato.rb +13 -2
- data/lib/truncato/version.rb +1 -1
- metadata +12 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 98abd765f6cdb6de053cfe7322ab5df0de64f2a4c21c200fda656bdedb52cce3
|
4
|
+
data.tar.gz: ee299ddc104eb523cba8251d1da989a41bf3d25fa705612a1c9b50df7261816d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 82303b0a35c3e4465af5ab47f1bd1f5084ca22b8f98d52d36e1b7d0230a6b61f445ea02be3cb7e33d13c9d65acc1b36a990fdcc4705138f2c84b12102a7e1f37
|
7
|
+
data.tar.gz: 0ecb7576a3a1d9adeaf231f81c9f1385fcc05c020c4b37c13e5996a421fa7107372cbd068db862a664619ea91e983336aedd4ad1706c180b0ee643b1ac1cde3a
|
data/README.md
CHANGED
data/Rakefile
CHANGED
@@ -4,29 +4,9 @@ rescue LoadError
|
|
4
4
|
puts 'You must `gem install bundler` and `bundle install` to run rake tasks'
|
5
5
|
end
|
6
6
|
|
7
|
-
require
|
8
|
-
|
9
|
-
RDoc::Task.new(:rdoc) do |rdoc|
|
10
|
-
rdoc.rdoc_dir = 'rdoc'
|
11
|
-
rdoc.title = 'MailgunRails'
|
12
|
-
rdoc.options << '--line-numbers'
|
13
|
-
rdoc.rdoc_files.include('README.rdoc')
|
14
|
-
rdoc.rdoc_files.include('lib/**/*.rb')
|
15
|
-
end
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
Bundler::GemHelper.install_tasks
|
21
|
-
|
22
|
-
require 'rake/testtask'
|
23
|
-
|
24
|
-
Rake::TestTask.new(:test) do |t|
|
25
|
-
t.libs << 'lib'
|
26
|
-
t.libs << 'test'
|
27
|
-
t.pattern = 'test/**/*_test.rb'
|
28
|
-
t.verbose = false
|
29
|
-
end
|
7
|
+
require "rake"
|
8
|
+
require "rspec/core/rake_task"
|
30
9
|
|
10
|
+
RSpec::Core::RakeTask.new(:spec)
|
31
11
|
|
32
|
-
task default: :
|
12
|
+
task default: :spec
|
@@ -6,7 +6,7 @@ class TruncatedSaxDocument < Nokogiri::XML::SAX::Document
|
|
6
6
|
|
7
7
|
SINGLE_TAGS = %w{br img}
|
8
8
|
|
9
|
-
attr_reader :
|
9
|
+
attr_reader :max_length, :max_length_reached, :tail,
|
10
10
|
:count_tags, :filtered_attributes, :filtered_tags, :ignored_levels
|
11
11
|
|
12
12
|
def initialize(options)
|
@@ -54,6 +54,10 @@ class TruncatedSaxDocument < Nokogiri::XML::SAX::Document
|
|
54
54
|
close_truncated_document if max_length_reached
|
55
55
|
end
|
56
56
|
|
57
|
+
def truncated_string
|
58
|
+
@truncated_buffer.join
|
59
|
+
end
|
60
|
+
|
57
61
|
private
|
58
62
|
|
59
63
|
def capture_options(options)
|
@@ -78,7 +82,7 @@ class TruncatedSaxDocument < Nokogiri::XML::SAX::Document
|
|
78
82
|
end
|
79
83
|
|
80
84
|
def init_parsing_state
|
81
|
-
@
|
85
|
+
@truncated_buffer = []
|
82
86
|
@closing_tags = []
|
83
87
|
@estimated_length = @count_tail ? tail_length : 0
|
84
88
|
@max_length_reached = false
|
@@ -94,7 +98,7 @@ class TruncatedSaxDocument < Nokogiri::XML::SAX::Document
|
|
94
98
|
end
|
95
99
|
|
96
100
|
def append_to_truncated_string string, overriden_length=nil
|
97
|
-
@
|
101
|
+
@truncated_buffer << string
|
98
102
|
increase_estimated_length(overriden_length || string.length)
|
99
103
|
end
|
100
104
|
|
data/lib/truncato/truncato.rb
CHANGED
@@ -6,7 +6,7 @@ module Truncato
|
|
6
6
|
filtered_attributes: []
|
7
7
|
}
|
8
8
|
|
9
|
-
ARTIFICIAL_ROOT_NAME = '
|
9
|
+
ARTIFICIAL_ROOT_NAME = 'truncato-artificial-root'
|
10
10
|
|
11
11
|
# Truncates the source XML string and returns the truncated XML. It will keep a valid XML structure
|
12
12
|
# and insert a _tail_ text indicating the position where content were removed (...).
|
@@ -26,12 +26,23 @@ module Truncato
|
|
26
26
|
private
|
27
27
|
|
28
28
|
def self.truncate_html source, options
|
29
|
+
source = unicode_normalize(source)
|
29
30
|
self.do_truncate_html(source, options) ? self.do_truncate_html(with_artificial_root(source), options) : nil
|
30
31
|
end
|
31
32
|
|
33
|
+
def self.unicode_normalize(string)
|
34
|
+
string.unicode_normalize
|
35
|
+
rescue Encoding::CompatibilityError
|
36
|
+
# By relying on rescue we don't have to maintain a list of compatible encodings.
|
37
|
+
string
|
38
|
+
end
|
39
|
+
|
32
40
|
def self.do_truncate_html source, options
|
33
41
|
truncated_sax_document = TruncatedSaxDocument.new(options)
|
34
|
-
|
42
|
+
|
43
|
+
# Only nokogiri >= 1.17 accept Encoding object, older needs a String as encoding
|
44
|
+
parser = Nokogiri::HTML::SAX::Parser.new(truncated_sax_document, source.encoding.to_s)
|
45
|
+
|
35
46
|
parser.parse(source) { |context| context.replace_entities = false }
|
36
47
|
truncated_string = truncated_sax_document.truncated_string
|
37
48
|
truncated_string.empty? ? nil : truncated_string
|
data/lib/truncato/version.rb
CHANGED
metadata
CHANGED
@@ -1,11 +1,10 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: truncato
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.7.
|
4
|
+
version: 0.7.13
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jorge Manrubia
|
8
|
-
autorequire:
|
9
8
|
bindir: bin
|
10
9
|
cert_chain: []
|
11
10
|
date: 2013-09-10 00:00:00.000000000 Z
|
@@ -48,30 +47,30 @@ dependencies:
|
|
48
47
|
name: rspec
|
49
48
|
requirement: !ruby/object:Gem::Requirement
|
50
49
|
requirements:
|
51
|
-
- - "
|
50
|
+
- - ">="
|
52
51
|
- !ruby/object:Gem::Version
|
53
|
-
version:
|
52
|
+
version: '0'
|
54
53
|
type: :development
|
55
54
|
prerelease: false
|
56
55
|
version_requirements: !ruby/object:Gem::Requirement
|
57
56
|
requirements:
|
58
|
-
- - "
|
57
|
+
- - ">="
|
59
58
|
- !ruby/object:Gem::Version
|
60
|
-
version:
|
59
|
+
version: '0'
|
61
60
|
- !ruby/object:Gem::Dependency
|
62
61
|
name: rake
|
63
62
|
requirement: !ruby/object:Gem::Requirement
|
64
63
|
requirements:
|
65
|
-
- - "
|
64
|
+
- - ">="
|
66
65
|
- !ruby/object:Gem::Version
|
67
|
-
version:
|
66
|
+
version: '0'
|
68
67
|
type: :development
|
69
68
|
prerelease: false
|
70
69
|
version_requirements: !ruby/object:Gem::Requirement
|
71
70
|
requirements:
|
72
|
-
- - "
|
71
|
+
- - ">="
|
73
72
|
- !ruby/object:Gem::Version
|
74
|
-
version:
|
73
|
+
version: '0'
|
75
74
|
description: Ruby tool for truncating HTML strings keeping a valid HTML markup
|
76
75
|
email: jorge.manrubia@gmail.com
|
77
76
|
executables: []
|
@@ -90,8 +89,8 @@ files:
|
|
90
89
|
homepage: https://github.com/jorgemanrubia/truncato
|
91
90
|
licenses:
|
92
91
|
- MIT
|
93
|
-
metadata:
|
94
|
-
|
92
|
+
metadata:
|
93
|
+
allowed_push_host: https://rubygems.org
|
95
94
|
rdoc_options: []
|
96
95
|
require_paths:
|
97
96
|
- lib
|
@@ -106,9 +105,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
106
105
|
- !ruby/object:Gem::Version
|
107
106
|
version: '0'
|
108
107
|
requirements: []
|
109
|
-
|
110
|
-
rubygems_version: 2.7.6
|
111
|
-
signing_key:
|
108
|
+
rubygems_version: 3.6.2
|
112
109
|
specification_version: 4
|
113
110
|
summary: A tool for truncating HTML strings efficiently
|
114
111
|
test_files: []
|