string_tools 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,15 @@
1
1
  ---
2
2
  !binary "U0hBMQ==":
3
3
  metadata.gz: !binary |-
4
- MzJkYTUyMTM2ODcwZGMzYjg3NWQ1ZTE4MTQ2YzEyYmY1MjdhMzg1MA==
4
+ ZDQzNWJhM2NkOTkyMzVlYWQ4N2Y2YWM4OTEwZjQ3MTZjMjMyYzc0Ng==
5
5
  data.tar.gz: !binary |-
6
- ODJhNTU1Mzg5YTNkNTNkMjJkNmRlZjk5MzRhN2QzOTNjNjEyYjYwYg==
6
+ MTZmZTMyYjcwY2NmYjcwMzZlZjg4MTk0NDU0ODAzZDY3NmVjZDQyNQ==
7
7
  SHA512:
8
8
  metadata.gz: !binary |-
9
- YzFjYWZlNzY5M2ZmZGU2MDYyZWQ0ZjJlZmM3NDhiMzE5MzMyYTU5MGJhNmY0
10
- YjIzMTI2YzBkNDk0MjdlZjA2MWRjMzU0MWM5NmE4NTE3OGY4N2RhNzYxNmFk
11
- NjFlMzNlYWY5YWVlMzIwYzc4NzhjODY0ZTk1ZGI3MmYzMzllYjU=
9
+ ODUyNWM1ODVkZDYxMzhlZTk5OTlhNjRhOTNjZTI5NTA5OTAzYTZiMjMwMmZk
10
+ ZmFjYmI3MTEzOTM4NWU0NDkzNTk1NzkxMDNmNGQ4YjgzNjg1ZDRmOGE3YmUx
11
+ YmJiM2RlMDUwMWVlZjJlMGNhZGYzNjdmNTYwOGZjNTRhMWI4ZTM=
12
12
  data.tar.gz: !binary |-
13
- NDQyOTRjNTQwZTU5ODViMmRhMWI5NGJhMDBmNWYwYTMxOWRiNDBjYmIwOWE1
14
- NTI4Y2U0MGQ3MTEwMWM3MDU0NTlhYjIzZmQyNGE3MGU2OWNmNDU4YTU4ZTFk
15
- OTY0MWYxN2VjYjlkMjRmYzI1ODA1ODRiNzhkNzRkM2EwMTI2OTM=
13
+ YTUzNGZiYmY2MGQ2NmYwNzk4ZDk2OWY4YTVmMThhNDFiZDE1NTEzNTU2ZDlh
14
+ YjlmNTc4MjA2YmZhNTkxMGExZWMwNWIyMWJhMDFlMDZlZWU2M2NlNWMzYjJj
15
+ OGQ3ZDQ5MWU2MjM1YWU1MDY0ZmNiNTliZTZjMTk2ZGI2ZDk2ODY=
data/.gitignore CHANGED
@@ -7,3 +7,4 @@
7
7
  /pkg/
8
8
  /spec/reports/
9
9
  /tmp/
10
+ /gemfiles
data/CHANGELOG.md ADDED
@@ -0,0 +1,15 @@
1
+
2
+ #### [Current]
3
+ * 2015-10-26 [62ce841](../../commit/62ce841) - __(Dmitry Bochkarev)__ feature(html): удаление ссылок без хоста по-умолчанию
4
+ * 2015-10-23 [8fe4384](../../commit/8fe4384) - __(Dmitry Bochkarev)__ fix(sanitizer): нормализация ссылок в юникоде
5
+ * 2015-10-23 [4017e3d](../../commit/4017e3d) - __(Dmitry Bochkarev)__ fix(html): кириллические ссылки в урлах
6
+ * 2015-10-23 [e05076f](../../commit/e05076f) - __(Dmitry Bochkarev)__ fix(html): поддержка относительных путей
7
+ * 2015-10-14 [24bd113](../../commit/24bd113) - __(Dmitry Bochkarev)__ chore: костанта с минимальным размером строки содержащей ссылки
8
+ * 2015-10-12 [e48da9f](../../commit/e48da9f) - __(Dmitry Bochkarev)__ feature: удаление ссылок из текста
9
+ * 2015-08-07 [274f820](../../commit/274f820) - __(evseevleo)__ feature(strip_tags): removing open comment tags
10
+ * 2015-07-20 [94b855d](../../commit/94b855d) - __(Sergey D)__ Release 0.2.0
11
+ * 2015-07-18 [81cb0f1](../../commit/81cb0f1) - __(Sergey D)__ feat: missing String.natcmp & Colorize methods
12
+
13
+ #### v0.1.0
14
+ * 2015-07-15 [29dd2f8](../../commit/29dd2f8) - __(Sergey D)__ feat: Initial commit
15
+ * 2015-07-15 [569f0d6](../../commit/569f0d6) - __(Artem Napolskih)__ Initial commit
data/Gemfile CHANGED
@@ -2,3 +2,4 @@ source 'https://rubygems.org'
2
2
 
3
3
  # Specify your gem's dependencies in string_tools.gemspec
4
4
  gemspec
5
+
@@ -17,7 +17,7 @@ class String
17
17
  # возвращает строку из которой удалены HTML-теги
18
18
  # символы <>&" остаются без изменения
19
19
  def strip_tags
20
- ActionController::Base.helpers.strip_tags(self).to_str
20
+ ActionController::Base.helpers.strip_tags(self).to_str.gsub(/<!--/, '<--')
21
21
  end
22
22
 
23
23
  # '11,3'.to_f
@@ -0,0 +1,97 @@
1
+ # coding: utf-8
2
+ require 'nokogiri'
3
+ require 'addressable/uri'
4
+ require 'simpleidn'
5
+
6
+ module StringTools
7
+ module HTML
8
+ # минимальная длина строки, в которой могут быть ссылки
9
+ TEXT_WITH_LINKS_MINIMUM_LENGTH = '<a href="'.length
10
+ HTML_SERIALIZE_OPTIONS = {
11
+ indent: 0,
12
+ # сериализуем в xhtml, поскольку при сериализации в html, libxml2 делает чуть больше, чем хотелось бы:
13
+ # http://stackoverflow.com/questions/24174032/prevent-nokogiri-from-url-encoding-src-attributes
14
+ save_with: Nokogiri::XML::Node::SaveOptions::AS_XHTML
15
+ }
16
+
17
+ # Public: Удаляет ссылки на неразрешенные домены
18
+ #
19
+ # html - String содержимое потенциально ненужных ссылок
20
+ # options - Hash
21
+ # :whitelist - Array of String разрешенныe домены
22
+ #
23
+ # Examples
24
+ # html = '<a href="https://www.yandex.ru">yandex</a>'
25
+ #
26
+ # StringTools::HTML.remove_links(html, whitelist: ['google.com'])
27
+ # # => 'yandex'
28
+ #
29
+ # StringTools::HTML.remove_links(html, whitelist: ['yandex.ru'])
30
+ # # => '<a href="https://www.yandex.ru">yandex</a>'
31
+ #
32
+ # StringTools::HTML.remove_links(html, whitelist: ['www.yandex.ru'])
33
+ # # => '<a href="https://www.yandex.ru">yandex</a>'
34
+ #
35
+ # html = '<a href="https://yandex.ru">yandex</a>'
36
+ #
37
+ # StringTools::HTML.remove_links(html, whitelist: ['www.yandex.ru'])
38
+ # # => 'yandex'
39
+ #
40
+ # Returns String without links to external resources
41
+ def self.remove_links(html, options = {})
42
+ return html if html.length < TEXT_WITH_LINKS_MINIMUM_LENGTH
43
+
44
+ doc = Nokogiri::HTML::DocumentFragment.parse(html)
45
+ scrubber = LinksRemoveScrubber.new(options)
46
+
47
+ doc.css('a'.freeze).each { |node| scrubber.call node }
48
+
49
+ if scrubber.done_changes?
50
+ doc.children.map { |node| node.serialize HTML_SERIALIZE_OPTIONS }.join
51
+ else
52
+ html
53
+ end
54
+ end
55
+
56
+ class LinksRemoveScrubber
57
+ def initialize(options)
58
+ @whitelist = options.fetch(:whitelist)
59
+ @remove_without_host = options.fetch(:remove_without_host, true)
60
+ @is_have_done_changes = false
61
+ end
62
+
63
+ def done_changes?
64
+ @is_have_done_changes
65
+ end
66
+
67
+ def call(node)
68
+ href = node['href']
69
+ return if href.blank?
70
+ uri = Addressable::URI.parse(href).normalize
71
+ if !uri.host
72
+ replace_with_content node if @remove_without_host
73
+ elsif !whitelisted?(SimpleIDN.to_unicode(uri.host))
74
+ replace_with_content node
75
+ end
76
+ end
77
+
78
+ def whitelisted?(domain)
79
+ host_parts = domain.split('.'.freeze)
80
+ host = host_parts[-1] # com, ru ...
81
+ (host_parts.length - 2).downto(0) do |i|
82
+ subdomain = host_parts[i]
83
+ host = "#{subdomain}.#{host}"
84
+ return true if @whitelist.include? host
85
+ end
86
+ false
87
+ end
88
+
89
+ private
90
+
91
+ def replace_with_content(node)
92
+ node.swap(node.children)
93
+ @is_have_done_changes = true
94
+ end
95
+ end
96
+ end
97
+ end
@@ -1,3 +1,3 @@
1
1
  module StringTools
2
- VERSION = '0.2.0'
2
+ VERSION = '0.3.0'
3
3
  end
data/lib/string_tools.rb CHANGED
@@ -6,6 +6,8 @@ require 'active_support/core_ext/string'
6
6
  require 'string_tools/core_ext/string'
7
7
 
8
8
  module StringTools
9
+ autoload :HTML, 'string_tools/html'
10
+
9
11
  module CharDet
10
12
  # Возвращает true если строка содержит допустимую
11
13
  # последовательность байтов для кодировки utf8 и false в обратном случае
@@ -108,11 +110,10 @@ module StringTools
108
110
 
109
111
  module Sanitizer
110
112
  class Base
111
-
112
113
  TAGS_WITH_ATTRIBUTES = {
113
114
  'p' => %w(align style),
114
115
  'div' => %w(align style),
115
- 'span' => %w(align style),
116
+ 'span' => %w(align style),
116
117
  'td' => %w(align width valign colspan rowspan style),
117
118
  'th' => %w(align width valign colspan rowspan style),
118
119
  'a' => %w(href target name style),
@@ -135,15 +136,40 @@ module StringTools
135
136
  attributes.merge!(attr)
136
137
  elements = attributes.keys | TAGS_WITHOUT_ATTRIBUTES
137
138
 
138
- Sanitize.fragment(str,
139
+ Sanitize.fragment(
140
+ str,
139
141
  :attributes => attributes,
140
142
  :elements => elements,
141
143
  :css => {:properties => Sanitize::Config::RELAXED[:css][:properties]},
142
144
  :remove_contents => %w(style javascript),
143
- :allow_comments => false
145
+ :allow_comments => false,
146
+ :transformers => [LINK_NORMALIZER]
144
147
  )
145
148
  end
146
149
  end
150
+
151
+ # приводит ссылки согласно стандарту, не корёжит
152
+ # http://www.фермаежей.рф => http://www.xn--80ajbaetq5a8a.xn--p1ai
153
+ class LinkNormalizer
154
+ def call(env)
155
+ node = env[:node]
156
+ case node.name
157
+ when 'a'.freeze
158
+ normalize_link node, 'href'.freeze
159
+ when 'img'.freeze
160
+ normalize_link node, 'src'.freeze
161
+ end
162
+ end
163
+
164
+ private
165
+
166
+ def normalize_link(node, attr_name)
167
+ return unless node[attr_name]
168
+ node[attr_name] = Addressable::URI.parse(node[attr_name]).normalize.to_s
169
+ end
170
+ end
171
+
172
+ LINK_NORMALIZER = LinkNormalizer.new
147
173
  end
148
174
 
149
175
  module SumInWords
data/string_tools.gemspec CHANGED
@@ -26,13 +26,15 @@ Gem::Specification.new do |spec|
26
26
  spec.add_runtime_dependency 'addressable', '~> 2.3.2'
27
27
  spec.add_runtime_dependency 'ru_propisju', '~> 2.1.4'
28
28
  spec.add_runtime_dependency 'sanitize', '>= 3.1.2'
29
+ spec.add_runtime_dependency 'nokogiri'
30
+ spec.add_runtime_dependency 'simpleidn', '>= 0.0.5'
29
31
 
30
32
  spec.add_development_dependency 'bundler', '~> 1.7'
31
33
  spec.add_development_dependency 'rake', '~> 10.0'
32
34
  spec.add_development_dependency 'rspec', '>= 2.14.0'
33
35
  spec.add_development_dependency 'rspec-rails', '>= 2.14.0'
34
36
  spec.add_development_dependency 'rspec-given', '~> 3.5'
35
- spec.add_development_dependency 'shoulda-matchers'
37
+ spec.add_development_dependency 'shoulda-matchers', '~> 2.0'
36
38
  spec.add_development_dependency 'appraisal', '>= 1.0.2'
37
39
  spec.add_development_dependency 'combustion', '>= 0.5.3'
38
40
  spec.add_development_dependency 'simplecov', '>= 0.9'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: string_tools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sergey D.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-07-20 00:00:00.000000000 Z
11
+ date: 2016-04-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activerecord
@@ -108,6 +108,34 @@ dependencies:
108
108
  - - ! '>='
109
109
  - !ruby/object:Gem::Version
110
110
  version: 3.1.2
111
+ - !ruby/object:Gem::Dependency
112
+ name: nokogiri
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - ! '>='
123
+ - !ruby/object:Gem::Version
124
+ version: '0'
125
+ - !ruby/object:Gem::Dependency
126
+ name: simpleidn
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - ! '>='
130
+ - !ruby/object:Gem::Version
131
+ version: 0.0.5
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - ! '>='
137
+ - !ruby/object:Gem::Version
138
+ version: 0.0.5
111
139
  - !ruby/object:Gem::Dependency
112
140
  name: bundler
113
141
  requirement: !ruby/object:Gem::Requirement
@@ -182,16 +210,16 @@ dependencies:
182
210
  name: shoulda-matchers
183
211
  requirement: !ruby/object:Gem::Requirement
184
212
  requirements:
185
- - - ! '>='
213
+ - - ~>
186
214
  - !ruby/object:Gem::Version
187
- version: '0'
215
+ version: '2.0'
188
216
  type: :development
189
217
  prerelease: false
190
218
  version_requirements: !ruby/object:Gem::Requirement
191
219
  requirements:
192
- - - ! '>='
220
+ - - ~>
193
221
  - !ruby/object:Gem::Version
194
- version: '0'
222
+ version: '2.0'
195
223
  - !ruby/object:Gem::Dependency
196
224
  name: appraisal
197
225
  requirement: !ruby/object:Gem::Requirement
@@ -258,6 +286,7 @@ files:
258
286
  - .gitignore
259
287
  - .rspec
260
288
  - Appraisals
289
+ - CHANGELOG.md
261
290
  - Gemfile
262
291
  - LICENSE.txt
263
292
  - Makefile
@@ -266,6 +295,7 @@ files:
266
295
  - bin/console
267
296
  - lib/string_tools.rb
268
297
  - lib/string_tools/core_ext/string.rb
298
+ - lib/string_tools/html.rb
269
299
  - lib/string_tools/version.rb
270
300
  - string_tools.gemspec
271
301
  homepage: https://github.com/abak-press/string_tools
@@ -288,7 +318,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
288
318
  version: '0'
289
319
  requirements: []
290
320
  rubyforge_project:
291
- rubygems_version: 2.4.7
321
+ rubygems_version: 2.4.3
292
322
  signing_key:
293
323
  specification_version: 4
294
324
  summary: String Tools