string_tools 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +8 -8
- data/.gitignore +1 -0
- data/CHANGELOG.md +15 -0
- data/Gemfile +1 -0
- data/lib/string_tools/core_ext/string.rb +1 -1
- data/lib/string_tools/html.rb +97 -0
- data/lib/string_tools/version.rb +1 -1
- data/lib/string_tools.rb +30 -4
- data/string_tools.gemspec +3 -1
- metadata +37 -7
checksums.yaml
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
---
|
2
2
|
!binary "U0hBMQ==":
|
3
3
|
metadata.gz: !binary |-
|
4
|
-
|
4
|
+
ZDQzNWJhM2NkOTkyMzVlYWQ4N2Y2YWM4OTEwZjQ3MTZjMjMyYzc0Ng==
|
5
5
|
data.tar.gz: !binary |-
|
6
|
-
|
6
|
+
MTZmZTMyYjcwY2NmYjcwMzZlZjg4MTk0NDU0ODAzZDY3NmVjZDQyNQ==
|
7
7
|
SHA512:
|
8
8
|
metadata.gz: !binary |-
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
ODUyNWM1ODVkZDYxMzhlZTk5OTlhNjRhOTNjZTI5NTA5OTAzYTZiMjMwMmZk
|
10
|
+
ZmFjYmI3MTEzOTM4NWU0NDkzNTk1NzkxMDNmNGQ4YjgzNjg1ZDRmOGE3YmUx
|
11
|
+
YmJiM2RlMDUwMWVlZjJlMGNhZGYzNjdmNTYwOGZjNTRhMWI4ZTM=
|
12
12
|
data.tar.gz: !binary |-
|
13
|
-
|
14
|
-
|
15
|
-
|
13
|
+
YTUzNGZiYmY2MGQ2NmYwNzk4ZDk2OWY4YTVmMThhNDFiZDE1NTEzNTU2ZDlh
|
14
|
+
YjlmNTc4MjA2YmZhNTkxMGExZWMwNWIyMWJhMDFlMDZlZWU2M2NlNWMzYjJj
|
15
|
+
OGQ3ZDQ5MWU2MjM1YWU1MDY0ZmNiNTliZTZjMTk2ZGI2ZDk2ODY=
|
data/.gitignore
CHANGED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
|
2
|
+
#### [Current]
|
3
|
+
* 2015-10-26 [62ce841](../../commit/62ce841) - __(Dmitry Bochkarev)__ feature(html): удаление ссылок без хоста по-умолчанию
|
4
|
+
* 2015-10-23 [8fe4384](../../commit/8fe4384) - __(Dmitry Bochkarev)__ fix(sanitizer): нормализация ссылок в юникоде
|
5
|
+
* 2015-10-23 [4017e3d](../../commit/4017e3d) - __(Dmitry Bochkarev)__ fix(html): кириллические ссылки в урлах
|
6
|
+
* 2015-10-23 [e05076f](../../commit/e05076f) - __(Dmitry Bochkarev)__ fix(html): поддержка относительных путей
|
7
|
+
* 2015-10-14 [24bd113](../../commit/24bd113) - __(Dmitry Bochkarev)__ chore: костанта с минимальным размером строки содержащей ссылки
|
8
|
+
* 2015-10-12 [e48da9f](../../commit/e48da9f) - __(Dmitry Bochkarev)__ feature: удаление ссылок из текста
|
9
|
+
* 2015-08-07 [274f820](../../commit/274f820) - __(evseevleo)__ feature(strip_tags): removing open comment tags
|
10
|
+
* 2015-07-20 [94b855d](../../commit/94b855d) - __(Sergey D)__ Release 0.2.0
|
11
|
+
* 2015-07-18 [81cb0f1](../../commit/81cb0f1) - __(Sergey D)__ feat: missing String.natcmp & Colorize methods
|
12
|
+
|
13
|
+
#### v0.1.0
|
14
|
+
* 2015-07-15 [29dd2f8](../../commit/29dd2f8) - __(Sergey D)__ feat: Initial commit
|
15
|
+
* 2015-07-15 [569f0d6](../../commit/569f0d6) - __(Artem Napolskih)__ Initial commit
|
data/Gemfile
CHANGED
@@ -17,7 +17,7 @@ class String
|
|
17
17
|
# возвращает строку из которой удалены HTML-теги
|
18
18
|
# символы <>&" остаются без изменения
|
19
19
|
def strip_tags
|
20
|
-
ActionController::Base.helpers.strip_tags(self).to_str
|
20
|
+
ActionController::Base.helpers.strip_tags(self).to_str.gsub(/<!--/, '<--')
|
21
21
|
end
|
22
22
|
|
23
23
|
# '11,3'.to_f
|
@@ -0,0 +1,97 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'addressable/uri'
|
4
|
+
require 'simpleidn'
|
5
|
+
|
6
|
+
module StringTools
|
7
|
+
module HTML
|
8
|
+
# минимальная длина строки, в которой могут быть ссылки
|
9
|
+
TEXT_WITH_LINKS_MINIMUM_LENGTH = '<a href="'.length
|
10
|
+
HTML_SERIALIZE_OPTIONS = {
|
11
|
+
indent: 0,
|
12
|
+
# сериализуем в xhtml, поскольку при сериализации в html, libxml2 делает чуть больше, чем хотелось бы:
|
13
|
+
# http://stackoverflow.com/questions/24174032/prevent-nokogiri-from-url-encoding-src-attributes
|
14
|
+
save_with: Nokogiri::XML::Node::SaveOptions::AS_XHTML
|
15
|
+
}
|
16
|
+
|
17
|
+
# Public: Удаляет ссылки на неразрешенные домены
|
18
|
+
#
|
19
|
+
# html - String содержимое потенциально ненужных ссылок
|
20
|
+
# options - Hash
|
21
|
+
# :whitelist - Array of String разрешенныe домены
|
22
|
+
#
|
23
|
+
# Examples
|
24
|
+
# html = '<a href="https://www.yandex.ru">yandex</a>'
|
25
|
+
#
|
26
|
+
# StringTools::HTML.remove_links(html, whitelist: ['google.com'])
|
27
|
+
# # => 'yandex'
|
28
|
+
#
|
29
|
+
# StringTools::HTML.remove_links(html, whitelist: ['yandex.ru'])
|
30
|
+
# # => '<a href="https://www.yandex.ru">yandex</a>'
|
31
|
+
#
|
32
|
+
# StringTools::HTML.remove_links(html, whitelist: ['www.yandex.ru'])
|
33
|
+
# # => '<a href="https://www.yandex.ru">yandex</a>'
|
34
|
+
#
|
35
|
+
# html = '<a href="https://yandex.ru">yandex</a>'
|
36
|
+
#
|
37
|
+
# StringTools::HTML.remove_links(html, whitelist: ['www.yandex.ru'])
|
38
|
+
# # => 'yandex'
|
39
|
+
#
|
40
|
+
# Returns String without links to external resources
|
41
|
+
def self.remove_links(html, options = {})
|
42
|
+
return html if html.length < TEXT_WITH_LINKS_MINIMUM_LENGTH
|
43
|
+
|
44
|
+
doc = Nokogiri::HTML::DocumentFragment.parse(html)
|
45
|
+
scrubber = LinksRemoveScrubber.new(options)
|
46
|
+
|
47
|
+
doc.css('a'.freeze).each { |node| scrubber.call node }
|
48
|
+
|
49
|
+
if scrubber.done_changes?
|
50
|
+
doc.children.map { |node| node.serialize HTML_SERIALIZE_OPTIONS }.join
|
51
|
+
else
|
52
|
+
html
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
class LinksRemoveScrubber
|
57
|
+
def initialize(options)
|
58
|
+
@whitelist = options.fetch(:whitelist)
|
59
|
+
@remove_without_host = options.fetch(:remove_without_host, true)
|
60
|
+
@is_have_done_changes = false
|
61
|
+
end
|
62
|
+
|
63
|
+
def done_changes?
|
64
|
+
@is_have_done_changes
|
65
|
+
end
|
66
|
+
|
67
|
+
def call(node)
|
68
|
+
href = node['href']
|
69
|
+
return if href.blank?
|
70
|
+
uri = Addressable::URI.parse(href).normalize
|
71
|
+
if !uri.host
|
72
|
+
replace_with_content node if @remove_without_host
|
73
|
+
elsif !whitelisted?(SimpleIDN.to_unicode(uri.host))
|
74
|
+
replace_with_content node
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
def whitelisted?(domain)
|
79
|
+
host_parts = domain.split('.'.freeze)
|
80
|
+
host = host_parts[-1] # com, ru ...
|
81
|
+
(host_parts.length - 2).downto(0) do |i|
|
82
|
+
subdomain = host_parts[i]
|
83
|
+
host = "#{subdomain}.#{host}"
|
84
|
+
return true if @whitelist.include? host
|
85
|
+
end
|
86
|
+
false
|
87
|
+
end
|
88
|
+
|
89
|
+
private
|
90
|
+
|
91
|
+
def replace_with_content(node)
|
92
|
+
node.swap(node.children)
|
93
|
+
@is_have_done_changes = true
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
end
|
data/lib/string_tools/version.rb
CHANGED
data/lib/string_tools.rb
CHANGED
@@ -6,6 +6,8 @@ require 'active_support/core_ext/string'
|
|
6
6
|
require 'string_tools/core_ext/string'
|
7
7
|
|
8
8
|
module StringTools
|
9
|
+
autoload :HTML, 'string_tools/html'
|
10
|
+
|
9
11
|
module CharDet
|
10
12
|
# Возвращает true если строка содержит допустимую
|
11
13
|
# последовательность байтов для кодировки utf8 и false в обратном случае
|
@@ -108,11 +110,10 @@ module StringTools
|
|
108
110
|
|
109
111
|
module Sanitizer
|
110
112
|
class Base
|
111
|
-
|
112
113
|
TAGS_WITH_ATTRIBUTES = {
|
113
114
|
'p' => %w(align style),
|
114
115
|
'div' => %w(align style),
|
115
|
-
'span'
|
116
|
+
'span' => %w(align style),
|
116
117
|
'td' => %w(align width valign colspan rowspan style),
|
117
118
|
'th' => %w(align width valign colspan rowspan style),
|
118
119
|
'a' => %w(href target name style),
|
@@ -135,15 +136,40 @@ module StringTools
|
|
135
136
|
attributes.merge!(attr)
|
136
137
|
elements = attributes.keys | TAGS_WITHOUT_ATTRIBUTES
|
137
138
|
|
138
|
-
Sanitize.fragment(
|
139
|
+
Sanitize.fragment(
|
140
|
+
str,
|
139
141
|
:attributes => attributes,
|
140
142
|
:elements => elements,
|
141
143
|
:css => {:properties => Sanitize::Config::RELAXED[:css][:properties]},
|
142
144
|
:remove_contents => %w(style javascript),
|
143
|
-
:allow_comments => false
|
145
|
+
:allow_comments => false,
|
146
|
+
:transformers => [LINK_NORMALIZER]
|
144
147
|
)
|
145
148
|
end
|
146
149
|
end
|
150
|
+
|
151
|
+
# приводит ссылки согласно стандарту, не корёжит
|
152
|
+
# http://www.фермаежей.рф => http://www.xn--80ajbaetq5a8a.xn--p1ai
|
153
|
+
class LinkNormalizer
|
154
|
+
def call(env)
|
155
|
+
node = env[:node]
|
156
|
+
case node.name
|
157
|
+
when 'a'.freeze
|
158
|
+
normalize_link node, 'href'.freeze
|
159
|
+
when 'img'.freeze
|
160
|
+
normalize_link node, 'src'.freeze
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
private
|
165
|
+
|
166
|
+
def normalize_link(node, attr_name)
|
167
|
+
return unless node[attr_name]
|
168
|
+
node[attr_name] = Addressable::URI.parse(node[attr_name]).normalize.to_s
|
169
|
+
end
|
170
|
+
end
|
171
|
+
|
172
|
+
LINK_NORMALIZER = LinkNormalizer.new
|
147
173
|
end
|
148
174
|
|
149
175
|
module SumInWords
|
data/string_tools.gemspec
CHANGED
@@ -26,13 +26,15 @@ Gem::Specification.new do |spec|
|
|
26
26
|
spec.add_runtime_dependency 'addressable', '~> 2.3.2'
|
27
27
|
spec.add_runtime_dependency 'ru_propisju', '~> 2.1.4'
|
28
28
|
spec.add_runtime_dependency 'sanitize', '>= 3.1.2'
|
29
|
+
spec.add_runtime_dependency 'nokogiri'
|
30
|
+
spec.add_runtime_dependency 'simpleidn', '>= 0.0.5'
|
29
31
|
|
30
32
|
spec.add_development_dependency 'bundler', '~> 1.7'
|
31
33
|
spec.add_development_dependency 'rake', '~> 10.0'
|
32
34
|
spec.add_development_dependency 'rspec', '>= 2.14.0'
|
33
35
|
spec.add_development_dependency 'rspec-rails', '>= 2.14.0'
|
34
36
|
spec.add_development_dependency 'rspec-given', '~> 3.5'
|
35
|
-
spec.add_development_dependency 'shoulda-matchers'
|
37
|
+
spec.add_development_dependency 'shoulda-matchers', '~> 2.0'
|
36
38
|
spec.add_development_dependency 'appraisal', '>= 1.0.2'
|
37
39
|
spec.add_development_dependency 'combustion', '>= 0.5.3'
|
38
40
|
spec.add_development_dependency 'simplecov', '>= 0.9'
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string_tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sergey D.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2016-04-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activerecord
|
@@ -108,6 +108,34 @@ dependencies:
|
|
108
108
|
- - ! '>='
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: 3.1.2
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: nokogiri
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ! '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ! '>='
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: simpleidn
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - ! '>='
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: 0.0.5
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - ! '>='
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: 0.0.5
|
111
139
|
- !ruby/object:Gem::Dependency
|
112
140
|
name: bundler
|
113
141
|
requirement: !ruby/object:Gem::Requirement
|
@@ -182,16 +210,16 @@ dependencies:
|
|
182
210
|
name: shoulda-matchers
|
183
211
|
requirement: !ruby/object:Gem::Requirement
|
184
212
|
requirements:
|
185
|
-
- -
|
213
|
+
- - ~>
|
186
214
|
- !ruby/object:Gem::Version
|
187
|
-
version: '0'
|
215
|
+
version: '2.0'
|
188
216
|
type: :development
|
189
217
|
prerelease: false
|
190
218
|
version_requirements: !ruby/object:Gem::Requirement
|
191
219
|
requirements:
|
192
|
-
- -
|
220
|
+
- - ~>
|
193
221
|
- !ruby/object:Gem::Version
|
194
|
-
version: '0'
|
222
|
+
version: '2.0'
|
195
223
|
- !ruby/object:Gem::Dependency
|
196
224
|
name: appraisal
|
197
225
|
requirement: !ruby/object:Gem::Requirement
|
@@ -258,6 +286,7 @@ files:
|
|
258
286
|
- .gitignore
|
259
287
|
- .rspec
|
260
288
|
- Appraisals
|
289
|
+
- CHANGELOG.md
|
261
290
|
- Gemfile
|
262
291
|
- LICENSE.txt
|
263
292
|
- Makefile
|
@@ -266,6 +295,7 @@ files:
|
|
266
295
|
- bin/console
|
267
296
|
- lib/string_tools.rb
|
268
297
|
- lib/string_tools/core_ext/string.rb
|
298
|
+
- lib/string_tools/html.rb
|
269
299
|
- lib/string_tools/version.rb
|
270
300
|
- string_tools.gemspec
|
271
301
|
homepage: https://github.com/abak-press/string_tools
|
@@ -288,7 +318,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
288
318
|
version: '0'
|
289
319
|
requirements: []
|
290
320
|
rubyforge_project:
|
291
|
-
rubygems_version: 2.4.
|
321
|
+
rubygems_version: 2.4.3
|
292
322
|
signing_key:
|
293
323
|
specification_version: 4
|
294
324
|
summary: String Tools
|