string_tools 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- MzJkYTUyMTM2ODcwZGMzYjg3NWQ1ZTE4MTQ2YzEyYmY1MjdhMzg1MA==
4
+ ZDQzNWJhM2NkOTkyMzVlYWQ4N2Y2YWM4OTEwZjQ3MTZjMjMyYzc0Ng==
5
5
  data.tar.gz: !binary |-
6
- ODJhNTU1Mzg5YTNkNTNkMjJkNmRlZjk5MzRhN2QzOTNjNjEyYjYwYg==
6
+ MTZmZTMyYjcwY2NmYjcwMzZlZjg4MTk0NDU0ODAzZDY3NmVjZDQyNQ==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- YzFjYWZlNzY5M2ZmZGU2MDYyZWQ0ZjJlZmM3NDhiMzE5MzMyYTU5MGJhNmY0
10
- YjIzMTI2YzBkNDk0MjdlZjA2MWRjMzU0MWM5NmE4NTE3OGY4N2RhNzYxNmFk
11
- NjFlMzNlYWY5YWVlMzIwYzc4NzhjODY0ZTk1ZGI3MmYzMzllYjU=
9
+ ODUyNWM1ODVkZDYxMzhlZTk5OTlhNjRhOTNjZTI5NTA5OTAzYTZiMjMwMmZk
10
+ ZmFjYmI3MTEzOTM4NWU0NDkzNTk1NzkxMDNmNGQ4YjgzNjg1ZDRmOGE3YmUx
11
+ YmJiM2RlMDUwMWVlZjJlMGNhZGYzNjdmNTYwOGZjNTRhMWI4ZTM=
12
12
  data.tar.gz: !binary |-
13
- NDQyOTRjNTQwZTU5ODViMmRhMWI5NGJhMDBmNWYwYTMxOWRiNDBjYmIwOWE1
14
- NTI4Y2U0MGQ3MTEwMWM3MDU0NTlhYjIzZmQyNGE3MGU2OWNmNDU4YTU4ZTFk
15
- OTY0MWYxN2VjYjlkMjRmYzI1ODA1ODRiNzhkNzRkM2EwMTI2OTM=
13
+ YTUzNGZiYmY2MGQ2NmYwNzk4ZDk2OWY4YTVmMThhNDFiZDE1NTEzNTU2ZDlh
14
+ YjlmNTc4MjA2YmZhNTkxMGExZWMwNWIyMWJhMDFlMDZlZWU2M2NlNWMzYjJj
15
+ OGQ3ZDQ5MWU2MjM1YWU1MDY0ZmNiNTliZTZjMTk2ZGI2ZDk2ODY=
data/.gitignore CHANGED
@@ -7,3 +7,4 @@
7
7
  /pkg/
8
8
  /spec/reports/
9
9
  /tmp/
10
+ /gemfiles
data/CHANGELOG.md ADDED
@@ -0,0 +1,15 @@
1
+
2
+ #### [Current]
3
+ * 2015-10-26 [62ce841](../../commit/62ce841) - __(Dmitry Bochkarev)__ feature(html): удаление ссылок без хоста по-умолчанию
4
+ * 2015-10-23 [8fe4384](../../commit/8fe4384) - __(Dmitry Bochkarev)__ fix(sanitizer): нормализация ссылок в юникоде
5
+ * 2015-10-23 [4017e3d](../../commit/4017e3d) - __(Dmitry Bochkarev)__ fix(html): кириллические ссылки в урлах
6
+ * 2015-10-23 [e05076f](../../commit/e05076f) - __(Dmitry Bochkarev)__ fix(html): поддержка относительных путей
7
+ * 2015-10-14 [24bd113](../../commit/24bd113) - __(Dmitry Bochkarev)__ chore: костанта с минимальным размером строки содержащей ссылки
8
+ * 2015-10-12 [e48da9f](../../commit/e48da9f) - __(Dmitry Bochkarev)__ feature: удаление ссылок из текста
9
+ * 2015-08-07 [274f820](../../commit/274f820) - __(evseevleo)__ feature(strip_tags): removing open comment tags
10
+ * 2015-07-20 [94b855d](../../commit/94b855d) - __(Sergey D)__ Release 0.2.0
11
+ * 2015-07-18 [81cb0f1](../../commit/81cb0f1) - __(Sergey D)__ feat: missing String.natcmp & Colorize methods
12
+
13
+ #### v0.1.0
14
+ * 2015-07-15 [29dd2f8](../../commit/29dd2f8) - __(Sergey D)__ feat: Initial commit
15
+ * 2015-07-15 [569f0d6](../../commit/569f0d6) - __(Artem Napolskih)__ Initial commit
data/Gemfile CHANGED
@@ -2,3 +2,4 @@ source 'https://rubygems.org'
2
2
 
3
3
  # Specify your gem's dependencies in string_tools.gemspec
4
4
  gemspec
5
+
@@ -17,7 +17,7 @@ class String
17
17
  # возвращает строку из которой удалены HTML-теги
18
18
  # символы <>&" остаются без изменения
19
19
  def strip_tags
20
- ActionController::Base.helpers.strip_tags(self).to_str
20
+ ActionController::Base.helpers.strip_tags(self).to_str.gsub(/<!--/, '<--')
21
21
  end
22
22
 
23
23
  # '11,3'.to_f
@@ -0,0 +1,97 @@
1
+ # coding: utf-8
2
+ require 'nokogiri'
3
+ require 'addressable/uri'
4
+ require 'simpleidn'
5
+
6
+ module StringTools
7
+ module HTML
8
+ # минимальная длина строки, в которой могут быть ссылки
9
+ TEXT_WITH_LINKS_MINIMUM_LENGTH = '<a href="'.length
10
+ HTML_SERIALIZE_OPTIONS = {
11
+ indent: 0,
12
+ # сериализуем в xhtml, поскольку при сериализации в html, libxml2 делает чуть больше, чем хотелось бы:
13
+ # http://stackoverflow.com/questions/24174032/prevent-nokogiri-from-url-encoding-src-attributes
14
+ save_with: Nokogiri::XML::Node::SaveOptions::AS_XHTML
15
+ }
16
+
17
+ # Public: Удаляет ссылки на неразрешенные домены
18
+ #
19
+ # html - String содержимое потенциально ненужных ссылок
20
+ # options - Hash
21
+ # :whitelist - Array of String разрешенныe домены
22
+ #
23
+ # Examples
24
+ # html = '<a href="https://www.yandex.ru">yandex</a>'
25
+ #
26
+ # StringTools::HTML.remove_links(html, whitelist: ['google.com'])
27
+ # # => 'yandex'
28
+ #
29
+ # StringTools::HTML.remove_links(html, whitelist: ['yandex.ru'])
30
+ # # => '<a href="https://www.yandex.ru">yandex</a>'
31
+ #
32
+ # StringTools::HTML.remove_links(html, whitelist: ['www.yandex.ru'])
33
+ # # => '<a href="https://www.yandex.ru">yandex</a>'
34
+ #
35
+ # html = '<a href="https://yandex.ru">yandex</a>'
36
+ #
37
+ # StringTools::HTML.remove_links(html, whitelist: ['www.yandex.ru'])
38
+ # # => 'yandex'
39
+ #
40
+ # Returns String without links to external resources
41
+ def self.remove_links(html, options = {})
42
+ return html if html.length < TEXT_WITH_LINKS_MINIMUM_LENGTH
43
+
44
+ doc = Nokogiri::HTML::DocumentFragment.parse(html)
45
+ scrubber = LinksRemoveScrubber.new(options)
46
+
47
+ doc.css('a'.freeze).each { |node| scrubber.call node }
48
+
49
+ if scrubber.done_changes?
50
+ doc.children.map { |node| node.serialize HTML_SERIALIZE_OPTIONS }.join
51
+ else
52
+ html
53
+ end
54
+ end
55
+
56
+ class LinksRemoveScrubber
57
+ def initialize(options)
58
+ @whitelist = options.fetch(:whitelist)
59
+ @remove_without_host = options.fetch(:remove_without_host, true)
60
+ @is_have_done_changes = false
61
+ end
62
+
63
+ def done_changes?
64
+ @is_have_done_changes
65
+ end
66
+
67
+ def call(node)
68
+ href = node['href']
69
+ return if href.blank?
70
+ uri = Addressable::URI.parse(href).normalize
71
+ if !uri.host
72
+ replace_with_content node if @remove_without_host
73
+ elsif !whitelisted?(SimpleIDN.to_unicode(uri.host))
74
+ replace_with_content node
75
+ end
76
+ end
77
+
78
+ def whitelisted?(domain)
79
+ host_parts = domain.split('.'.freeze)
80
+ host = host_parts[-1] # com, ru ...
81
+ (host_parts.length - 2).downto(0) do |i|
82
+ subdomain = host_parts[i]
83
+ host = "#{subdomain}.#{host}"
84
+ return true if @whitelist.include? host
85
+ end
86
+ false
87
+ end
88
+
89
+ private
90
+
91
+ def replace_with_content(node)
92
+ node.swap(node.children)
93
+ @is_have_done_changes = true
94
+ end
95
+ end
96
+ end
97
+ end
@@ -1,3 +1,3 @@
1
1
  module StringTools
2
- VERSION = '0.2.0'
2
+ VERSION = '0.3.0'
3
3
  end
data/lib/string_tools.rb CHANGED
@@ -6,6 +6,8 @@ require 'active_support/core_ext/string'
6
6
  require 'string_tools/core_ext/string'
7
7
 
8
8
  module StringTools
9
+ autoload :HTML, 'string_tools/html'
10
+
9
11
  module CharDet
10
12
  # Возвращает true если строка содержит допустимую
11
13
  # последовательность байтов для кодировки utf8 и false в обратном случае
@@ -108,11 +110,10 @@ module StringTools
108
110
 
109
111
  module Sanitizer
110
112
  class Base
111
-
112
113
  TAGS_WITH_ATTRIBUTES = {
113
114
  'p' => %w(align style),
114
115
  'div' => %w(align style),
115
- 'span' => %w(align style),
116
+ 'span' => %w(align style),
116
117
  'td' => %w(align width valign colspan rowspan style),
117
118
  'th' => %w(align width valign colspan rowspan style),
118
119
  'a' => %w(href target name style),
@@ -135,15 +136,40 @@ module StringTools
135
136
  attributes.merge!(attr)
136
137
  elements = attributes.keys | TAGS_WITHOUT_ATTRIBUTES
137
138
 
138
- Sanitize.fragment(str,
139
+ Sanitize.fragment(
140
+ str,
139
141
  :attributes => attributes,
140
142
  :elements => elements,
141
143
  :css => {:properties => Sanitize::Config::RELAXED[:css][:properties]},
142
144
  :remove_contents => %w(style javascript),
143
- :allow_comments => false
145
+ :allow_comments => false,
146
+ :transformers => [LINK_NORMALIZER]
144
147
  )
145
148
  end
146
149
  end
150
+
151
+ # приводит ссылки согласно стандарту, не корёжит
152
+ # http://www.фермаежей.рф => http://www.xn--80ajbaetq5a8a.xn--p1ai
153
+ class LinkNormalizer
154
+ def call(env)
155
+ node = env[:node]
156
+ case node.name
157
+ when 'a'.freeze
158
+ normalize_link node, 'href'.freeze
159
+ when 'img'.freeze
160
+ normalize_link node, 'src'.freeze
161
+ end
162
+ end
163
+
164
+ private
165
+
166
+ def normalize_link(node, attr_name)
167
+ return unless node[attr_name]
168
+ node[attr_name] = Addressable::URI.parse(node[attr_name]).normalize.to_s
169
+ end
170
+ end
171
+
172
+ LINK_NORMALIZER = LinkNormalizer.new
147
173
  end
148
174
 
149
175
  module SumInWords
data/string_tools.gemspec CHANGED
@@ -26,13 +26,15 @@ Gem::Specification.new do |spec|
26
26
  spec.add_runtime_dependency 'addressable', '~> 2.3.2'
27
27
  spec.add_runtime_dependency 'ru_propisju', '~> 2.1.4'
28
28
  spec.add_runtime_dependency 'sanitize', '>= 3.1.2'
29
+ spec.add_runtime_dependency 'nokogiri'
30
+ spec.add_runtime_dependency 'simpleidn', '>= 0.0.5'
29
31
 
30
32
  spec.add_development_dependency 'bundler', '~> 1.7'
31
33
  spec.add_development_dependency 'rake', '~> 10.0'
32
34
  spec.add_development_dependency 'rspec', '>= 2.14.0'
33
35
  spec.add_development_dependency 'rspec-rails', '>= 2.14.0'
34
36
  spec.add_development_dependency 'rspec-given', '~> 3.5'
35
- spec.add_development_dependency 'shoulda-matchers'
37
+ spec.add_development_dependency 'shoulda-matchers', '~> 2.0'
36
38
  spec.add_development_dependency 'appraisal', '>= 1.0.2'
37
39
  spec.add_development_dependency 'combustion', '>= 0.5.3'
38
40
  spec.add_development_dependency 'simplecov', '>= 0.9'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: string_tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sergey D.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-07-20 00:00:00.000000000 Z
11
+ date: 2016-04-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -108,6 +108,34 @@ dependencies:
108
108
  - - ! '>='
109
109
  - !ruby/object:Gem::Version
110
110
  version: 3.1.2
111
+ - !ruby/object:Gem::Dependency
112
+ name: nokogiri
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ! '>='
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: simpleidn
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ! '>='
130
+ - !ruby/object:Gem::Version
131
+ version: 0.0.5
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ! '>='
137
+ - !ruby/object:Gem::Version
138
+ version: 0.0.5
111
139
  - !ruby/object:Gem::Dependency
112
140
  name: bundler
113
141
  requirement: !ruby/object:Gem::Requirement
@@ -182,16 +210,16 @@ dependencies:
182
210
  name: shoulda-matchers
183
211
  requirement: !ruby/object:Gem::Requirement
184
212
  requirements:
185
- - - ! '>='
213
+ - - ~>
186
214
  - !ruby/object:Gem::Version
187
- version: '0'
215
+ version: '2.0'
188
216
  type: :development
189
217
  prerelease: false
190
218
  version_requirements: !ruby/object:Gem::Requirement
191
219
  requirements:
192
- - - ! '>='
220
+ - - ~>
193
221
  - !ruby/object:Gem::Version
194
- version: '0'
222
+ version: '2.0'
195
223
  - !ruby/object:Gem::Dependency
196
224
  name: appraisal
197
225
  requirement: !ruby/object:Gem::Requirement
@@ -258,6 +286,7 @@ files:
258
286
  - .gitignore
259
287
  - .rspec
260
288
  - Appraisals
289
+ - CHANGELOG.md
261
290
  - Gemfile
262
291
  - LICENSE.txt
263
292
  - Makefile
@@ -266,6 +295,7 @@ files:
266
295
  - bin/console
267
296
  - lib/string_tools.rb
268
297
  - lib/string_tools/core_ext/string.rb
298
+ - lib/string_tools/html.rb
269
299
  - lib/string_tools/version.rb
270
300
  - string_tools.gemspec
271
301
  homepage: https://github.com/abak-press/string_tools
@@ -288,7 +318,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
288
318
  version: '0'
289
319
  requirements: []
290
320
  rubyforge_project:
291
- rubygems_version: 2.4.7
321
+ rubygems_version: 2.4.3
292
322
  signing_key:
293
323
  specification_version: 4
294
324
  summary: String Tools