string_tools 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +8 -8
- data/.gitignore +1 -0
- data/CHANGELOG.md +15 -0
- data/Gemfile +1 -0
- data/lib/string_tools/core_ext/string.rb +1 -1
- data/lib/string_tools/html.rb +97 -0
- data/lib/string_tools/version.rb +1 -1
- data/lib/string_tools.rb +30 -4
- data/string_tools.gemspec +3 -1
- metadata +37 -7
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
ZDQzNWJhM2NkOTkyMzVlYWQ4N2Y2YWM4OTEwZjQ3MTZjMjMyYzc0Ng==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MTZmZTMyYjcwY2NmYjcwMzZlZjg4MTk0NDU0ODAzZDY3NmVjZDQyNQ==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
ODUyNWM1ODVkZDYxMzhlZTk5OTlhNjRhOTNjZTI5NTA5OTAzYTZiMjMwMmZk
|
10
|
+
ZmFjYmI3MTEzOTM4NWU0NDkzNTk1NzkxMDNmNGQ4YjgzNjg1ZDRmOGE3YmUx
|
11
|
+
YmJiM2RlMDUwMWVlZjJlMGNhZGYzNjdmNTYwOGZjNTRhMWI4ZTM=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
YTUzNGZiYmY2MGQ2NmYwNzk4ZDk2OWY4YTVmMThhNDFiZDE1NTEzNTU2ZDlh
|
14
|
+
YjlmNTc4MjA2YmZhNTkxMGExZWMwNWIyMWJhMDFlMDZlZWU2M2NlNWMzYjJj
|
15
|
+
OGQ3ZDQ5MWU2MjM1YWU1MDY0ZmNiNTliZTZjMTk2ZGI2ZDk2ODY=
|
data/.gitignore
CHANGED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
|
2
|
+
#### [Current]
|
3
|
+
* 2015-10-26 [62ce841](../../commit/62ce841) - __(Dmitry Bochkarev)__ feature(html): удаление ссылок без хоста по-умолчанию
|
4
|
+
* 2015-10-23 [8fe4384](../../commit/8fe4384) - __(Dmitry Bochkarev)__ fix(sanitizer): нормализация ссылок в юникоде
|
5
|
+
* 2015-10-23 [4017e3d](../../commit/4017e3d) - __(Dmitry Bochkarev)__ fix(html): кириллические ссылки в урлах
|
6
|
+
* 2015-10-23 [e05076f](../../commit/e05076f) - __(Dmitry Bochkarev)__ fix(html): поддержка относительных путей
|
7
|
+
* 2015-10-14 [24bd113](../../commit/24bd113) - __(Dmitry Bochkarev)__ chore: костанта с минимальным размером строки содержащей ссылки
|
8
|
+
* 2015-10-12 [e48da9f](../../commit/e48da9f) - __(Dmitry Bochkarev)__ feature: удаление ссылок из текста
|
9
|
+
* 2015-08-07 [274f820](../../commit/274f820) - __(evseevleo)__ feature(strip_tags): removing open comment tags
|
10
|
+
* 2015-07-20 [94b855d](../../commit/94b855d) - __(Sergey D)__ Release 0.2.0
|
11
|
+
* 2015-07-18 [81cb0f1](../../commit/81cb0f1) - __(Sergey D)__ feat: missing String.natcmp & Colorize methods
|
12
|
+
|
13
|
+
#### v0.1.0
|
14
|
+
* 2015-07-15 [29dd2f8](../../commit/29dd2f8) - __(Sergey D)__ feat: Initial commit
|
15
|
+
* 2015-07-15 [569f0d6](../../commit/569f0d6) - __(Artem Napolskih)__ Initial commit
|
data/Gemfile
CHANGED
@@ -17,7 +17,7 @@ class String
|
|
17
17
|
# возвращает строку из которой удалены HTML-теги
|
18
18
|
# символы <>&" остаются без изменения
|
19
19
|
def strip_tags
|
20
|
-
ActionController::Base.helpers.strip_tags(self).to_str
|
20
|
+
ActionController::Base.helpers.strip_tags(self).to_str.gsub(/<!--/, '<--')
|
21
21
|
end
|
22
22
|
|
23
23
|
# '11,3'.to_f
|
@@ -0,0 +1,97 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'addressable/uri'
|
4
|
+
require 'simpleidn'
|
5
|
+
|
6
|
+
module StringTools
|
7
|
+
module HTML
|
8
|
+
# минимальная длина строки, в которой могут быть ссылки
|
9
|
+
TEXT_WITH_LINKS_MINIMUM_LENGTH = '<a href="'.length
|
10
|
+
HTML_SERIALIZE_OPTIONS = {
|
11
|
+
indent: 0,
|
12
|
+
# сериализуем в xhtml, поскольку при сериализации в html, libxml2 делает чуть больше, чем хотелось бы:
|
13
|
+
# http://stackoverflow.com/questions/24174032/prevent-nokogiri-from-url-encoding-src-attributes
|
14
|
+
save_with: Nokogiri::XML::Node::SaveOptions::AS_XHTML
|
15
|
+
}
|
16
|
+
|
17
|
+
# Public: Удаляет ссылки на неразрешенные домены
|
18
|
+
#
|
19
|
+
# html - String содержимое потенциально ненужных ссылок
|
20
|
+
# options - Hash
|
21
|
+
# :whitelist - Array of String разрешенныe домены
|
22
|
+
#
|
23
|
+
# Examples
|
24
|
+
# html = '<a href="https://www.yandex.ru">yandex</a>'
|
25
|
+
#
|
26
|
+
# StringTools::HTML.remove_links(html, whitelist: ['google.com'])
|
27
|
+
# # => 'yandex'
|
28
|
+
#
|
29
|
+
# StringTools::HTML.remove_links(html, whitelist: ['yandex.ru'])
|
30
|
+
# # => '<a href="https://www.yandex.ru">yandex</a>'
|
31
|
+
#
|
32
|
+
# StringTools::HTML.remove_links(html, whitelist: ['www.yandex.ru'])
|
33
|
+
# # => '<a href="https://www.yandex.ru">yandex</a>'
|
34
|
+
#
|
35
|
+
# html = '<a href="https://yandex.ru">yandex</a>'
|
36
|
+
#
|
37
|
+
# StringTools::HTML.remove_links(html, whitelist: ['www.yandex.ru'])
|
38
|
+
# # => 'yandex'
|
39
|
+
#
|
40
|
+
# Returns String without links to external resources
|
41
|
+
def self.remove_links(html, options = {})
|
42
|
+
return html if html.length < TEXT_WITH_LINKS_MINIMUM_LENGTH
|
43
|
+
|
44
|
+
doc = Nokogiri::HTML::DocumentFragment.parse(html)
|
45
|
+
scrubber = LinksRemoveScrubber.new(options)
|
46
|
+
|
47
|
+
doc.css('a'.freeze).each { |node| scrubber.call node }
|
48
|
+
|
49
|
+
if scrubber.done_changes?
|
50
|
+
doc.children.map { |node| node.serialize HTML_SERIALIZE_OPTIONS }.join
|
51
|
+
else
|
52
|
+
html
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
class LinksRemoveScrubber
|
57
|
+
def initialize(options)
|
58
|
+
@whitelist = options.fetch(:whitelist)
|
59
|
+
@remove_without_host = options.fetch(:remove_without_host, true)
|
60
|
+
@is_have_done_changes = false
|
61
|
+
end
|
62
|
+
|
63
|
+
def done_changes?
|
64
|
+
@is_have_done_changes
|
65
|
+
end
|
66
|
+
|
67
|
+
def call(node)
|
68
|
+
href = node['href']
|
69
|
+
return if href.blank?
|
70
|
+
uri = Addressable::URI.parse(href).normalize
|
71
|
+
if !uri.host
|
72
|
+
replace_with_content node if @remove_without_host
|
73
|
+
elsif !whitelisted?(SimpleIDN.to_unicode(uri.host))
|
74
|
+
replace_with_content node
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def whitelisted?(domain)
|
79
|
+
host_parts = domain.split('.'.freeze)
|
80
|
+
host = host_parts[-1] # com, ru ...
|
81
|
+
(host_parts.length - 2).downto(0) do |i|
|
82
|
+
subdomain = host_parts[i]
|
83
|
+
host = "#{subdomain}.#{host}"
|
84
|
+
return true if @whitelist.include? host
|
85
|
+
end
|
86
|
+
false
|
87
|
+
end
|
88
|
+
|
89
|
+
private
|
90
|
+
|
91
|
+
def replace_with_content(node)
|
92
|
+
node.swap(node.children)
|
93
|
+
@is_have_done_changes = true
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
data/lib/string_tools/version.rb
CHANGED
data/lib/string_tools.rb
CHANGED
@@ -6,6 +6,8 @@ require 'active_support/core_ext/string'
|
|
6
6
|
require 'string_tools/core_ext/string'
|
7
7
|
|
8
8
|
module StringTools
|
9
|
+
autoload :HTML, 'string_tools/html'
|
10
|
+
|
9
11
|
module CharDet
|
10
12
|
# Возвращает true если строка содержит допустимую
|
11
13
|
# последовательность байтов для кодировки utf8 и false в обратном случае
|
@@ -108,11 +110,10 @@ module StringTools
|
|
108
110
|
|
109
111
|
module Sanitizer
|
110
112
|
class Base
|
111
|
-
|
112
113
|
TAGS_WITH_ATTRIBUTES = {
|
113
114
|
'p' => %w(align style),
|
114
115
|
'div' => %w(align style),
|
115
|
-
'span'
|
116
|
+
'span' => %w(align style),
|
116
117
|
'td' => %w(align width valign colspan rowspan style),
|
117
118
|
'th' => %w(align width valign colspan rowspan style),
|
118
119
|
'a' => %w(href target name style),
|
@@ -135,15 +136,40 @@ module StringTools
|
|
135
136
|
attributes.merge!(attr)
|
136
137
|
elements = attributes.keys | TAGS_WITHOUT_ATTRIBUTES
|
137
138
|
|
138
|
-
Sanitize.fragment(
|
139
|
+
Sanitize.fragment(
|
140
|
+
str,
|
139
141
|
:attributes => attributes,
|
140
142
|
:elements => elements,
|
141
143
|
:css => {:properties => Sanitize::Config::RELAXED[:css][:properties]},
|
142
144
|
:remove_contents => %w(style javascript),
|
143
|
-
:allow_comments => false
|
145
|
+
:allow_comments => false,
|
146
|
+
:transformers => [LINK_NORMALIZER]
|
144
147
|
)
|
145
148
|
end
|
146
149
|
end
|
150
|
+
|
151
|
+
# приводит ссылки согласно стандарту, не корёжит
|
152
|
+
# http://www.фермаежей.рф => http://www.xn--80ajbaetq5a8a.xn--p1ai
|
153
|
+
class LinkNormalizer
|
154
|
+
def call(env)
|
155
|
+
node = env[:node]
|
156
|
+
case node.name
|
157
|
+
when 'a'.freeze
|
158
|
+
normalize_link node, 'href'.freeze
|
159
|
+
when 'img'.freeze
|
160
|
+
normalize_link node, 'src'.freeze
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
private
|
165
|
+
|
166
|
+
def normalize_link(node, attr_name)
|
167
|
+
return unless node[attr_name]
|
168
|
+
node[attr_name] = Addressable::URI.parse(node[attr_name]).normalize.to_s
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
LINK_NORMALIZER = LinkNormalizer.new
|
147
173
|
end
|
148
174
|
|
149
175
|
module SumInWords
|
data/string_tools.gemspec
CHANGED
@@ -26,13 +26,15 @@ Gem::Specification.new do |spec|
|
|
26
26
|
spec.add_runtime_dependency 'addressable', '~> 2.3.2'
|
27
27
|
spec.add_runtime_dependency 'ru_propisju', '~> 2.1.4'
|
28
28
|
spec.add_runtime_dependency 'sanitize', '>= 3.1.2'
|
29
|
+
spec.add_runtime_dependency 'nokogiri'
|
30
|
+
spec.add_runtime_dependency 'simpleidn', '>= 0.0.5'
|
29
31
|
|
30
32
|
spec.add_development_dependency 'bundler', '~> 1.7'
|
31
33
|
spec.add_development_dependency 'rake', '~> 10.0'
|
32
34
|
spec.add_development_dependency 'rspec', '>= 2.14.0'
|
33
35
|
spec.add_development_dependency 'rspec-rails', '>= 2.14.0'
|
34
36
|
spec.add_development_dependency 'rspec-given', '~> 3.5'
|
35
|
-
spec.add_development_dependency 'shoulda-matchers'
|
37
|
+
spec.add_development_dependency 'shoulda-matchers', '~> 2.0'
|
36
38
|
spec.add_development_dependency 'appraisal', '>= 1.0.2'
|
37
39
|
spec.add_development_dependency 'combustion', '>= 0.5.3'
|
38
40
|
spec.add_development_dependency 'simplecov', '>= 0.9'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string_tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sergey D.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-04-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -108,6 +108,34 @@ dependencies:
|
|
108
108
|
- - ! '>='
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: 3.1.2
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: nokogiri
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ! '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ! '>='
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: simpleidn
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ! '>='
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: 0.0.5
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ! '>='
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: 0.0.5
|
111
139
|
- !ruby/object:Gem::Dependency
|
112
140
|
name: bundler
|
113
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -182,16 +210,16 @@ dependencies:
|
|
182
210
|
name: shoulda-matchers
|
183
211
|
requirement: !ruby/object:Gem::Requirement
|
184
212
|
requirements:
|
185
|
-
- -
|
213
|
+
- - ~>
|
186
214
|
- !ruby/object:Gem::Version
|
187
|
-
version: '0'
|
215
|
+
version: '2.0'
|
188
216
|
type: :development
|
189
217
|
prerelease: false
|
190
218
|
version_requirements: !ruby/object:Gem::Requirement
|
191
219
|
requirements:
|
192
|
-
- -
|
220
|
+
- - ~>
|
193
221
|
- !ruby/object:Gem::Version
|
194
|
-
version: '0'
|
222
|
+
version: '2.0'
|
195
223
|
- !ruby/object:Gem::Dependency
|
196
224
|
name: appraisal
|
197
225
|
requirement: !ruby/object:Gem::Requirement
|
@@ -258,6 +286,7 @@ files:
|
|
258
286
|
- .gitignore
|
259
287
|
- .rspec
|
260
288
|
- Appraisals
|
289
|
+
- CHANGELOG.md
|
261
290
|
- Gemfile
|
262
291
|
- LICENSE.txt
|
263
292
|
- Makefile
|
@@ -266,6 +295,7 @@ files:
|
|
266
295
|
- bin/console
|
267
296
|
- lib/string_tools.rb
|
268
297
|
- lib/string_tools/core_ext/string.rb
|
298
|
+
- lib/string_tools/html.rb
|
269
299
|
- lib/string_tools/version.rb
|
270
300
|
- string_tools.gemspec
|
271
301
|
homepage: https://github.com/abak-press/string_tools
|
@@ -288,7 +318,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
288
318
|
version: '0'
|
289
319
|
requirements: []
|
290
320
|
rubyforge_project:
|
291
|
-
rubygems_version: 2.4.
|
321
|
+
rubygems_version: 2.4.3
|
292
322
|
signing_key:
|
293
323
|
specification_version: 4
|
294
324
|
summary: String Tools
|