string_tools 0.12.2 → 0.16.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.drone.yml +61 -29
- data/Appraisals +9 -7
- data/CHANGELOG.md +43 -1
- data/Gemfile +3 -0
- data/README.md +25 -0
- data/Rakefile +1 -0
- data/bin/console +1 -0
- data/dip.yml +4 -3
- data/lib/string_tools.rb +242 -19
- data/lib/string_tools/core_ext/string.rb +4 -2
- data/lib/string_tools/html.rb +3 -2
- data/lib/string_tools/string.rb +1 -0
- data/lib/string_tools/version.rb +2 -1
- data/string_tools.gemspec +4 -2
- metadata +20 -6
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: '084549131263f4bead47691b27277aecbbc504d6'
|
|
4
|
+
data.tar.gz: d8256b2bd745db61a6e7f3a20c70c0debccb70dc
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9409816487699de77f2d7ef3e0bdb5c33fd3cd104c5a8d8a7c2a68ba31e6f71051442fceecbca02a1ad92fd8a226c9441a9535905b5a8f7d5d536d077e82d446
|
|
7
|
+
data.tar.gz: 81ee9e971dcc3568c67157127634d6412eb12f7d165411dbbc201317e344cdc889590adaa9d72b718a92022b2f19a550f74a197e24533813c5ee443096988aae
|
data/.drone.yml
CHANGED
|
@@ -1,32 +1,64 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
privileged: true
|
|
6
|
-
volumes:
|
|
7
|
-
- /home/data/drone/images:/images
|
|
8
|
-
- /home/data/drone/gems:/bundle
|
|
9
|
-
- /home/data/drone/key_cache:/ssh_keys
|
|
10
|
-
environment:
|
|
11
|
-
- COMPOSE_FILE_EXT=drone
|
|
12
|
-
- RUBY_IMAGE_TAG=2.2-latest
|
|
13
|
-
commands:
|
|
14
|
-
- wrapdocker docker -v
|
|
1
|
+
---
|
|
2
|
+
kind: pipeline
|
|
3
|
+
type: docker
|
|
4
|
+
name: build
|
|
15
5
|
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
6
|
+
volumes:
|
|
7
|
+
- name: bundle
|
|
8
|
+
host:
|
|
9
|
+
path: /home/data/drone/gems
|
|
10
|
+
- name: rubygems
|
|
11
|
+
host:
|
|
12
|
+
path: /home/data/drone/rubygems
|
|
19
13
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
14
|
+
spec_step_common: &spec_step_common
|
|
15
|
+
pull: if-not-exists
|
|
16
|
+
volumes:
|
|
17
|
+
- name: bundle
|
|
18
|
+
path: /bundle
|
|
19
|
+
commands:
|
|
20
|
+
- rm -fr Gemfile.lock gemfiles/
|
|
21
|
+
- bundle install -j 5
|
|
22
|
+
- bundle exec appraisal install
|
|
23
|
+
- bundle exec appraisal bundle exec rspec
|
|
23
24
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
25
|
+
steps:
|
|
26
|
+
- name: build on ruby2.2
|
|
27
|
+
image: abakpress/ruby-app:2.2-latest
|
|
28
|
+
environment:
|
|
29
|
+
TEST_DB_HOST: postgres
|
|
30
|
+
TEST_DB_NAME: docker
|
|
31
|
+
TEST_DB_USERNAME: postgres
|
|
32
|
+
BUNDLE_PATH: /bundle/2.2
|
|
33
|
+
<<: *spec_step_common
|
|
34
|
+
|
|
35
|
+
- name: build on ruby2.3
|
|
36
|
+
image: abakpress/ruby-app:2.3-latest
|
|
37
|
+
environment:
|
|
38
|
+
TEST_DB_HOST: postgres
|
|
39
|
+
TEST_DB_NAME: docker
|
|
40
|
+
TEST_DB_USERNAME: postgres
|
|
41
|
+
BUNDLE_PATH: /bundle/2.3
|
|
42
|
+
<<: *spec_step_common
|
|
43
|
+
|
|
44
|
+
- name: build on ruby2.4
|
|
45
|
+
image: abakpress/ruby-app:2.4-latest
|
|
46
|
+
environment:
|
|
47
|
+
TEST_DB_HOST: postgres
|
|
48
|
+
TEST_DB_NAME: docker
|
|
49
|
+
TEST_DB_USERNAME: postgres
|
|
50
|
+
BUNDLE_PATH: /bundle/2.4
|
|
51
|
+
<<: *spec_step_common
|
|
52
|
+
|
|
53
|
+
- name: release
|
|
54
|
+
image: abakpress/gem-publication:latest
|
|
55
|
+
pull: if-not-exists
|
|
56
|
+
when:
|
|
57
|
+
event: push
|
|
58
|
+
branch: master
|
|
59
|
+
status: success
|
|
60
|
+
volumes:
|
|
61
|
+
- name: rubygems
|
|
62
|
+
path: /root/.gem
|
|
63
|
+
commands:
|
|
64
|
+
- release-gem --public
|
data/Appraisals
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
if RUBY_VERSION < '2.4'
|
|
3
|
+
appraise 'rails4.0' do
|
|
4
|
+
gem 'actionpack', '~> 4.0.0'
|
|
5
|
+
gem 'activesupport', '~> 4.0.0'
|
|
6
|
+
end
|
|
4
7
|
end
|
|
5
8
|
|
|
6
|
-
appraise 'rails4.
|
|
7
|
-
gem 'actionpack', '~> 4.
|
|
8
|
-
gem 'activesupport', '~> 4.
|
|
9
|
+
appraise 'rails4.2' do
|
|
10
|
+
gem 'actionpack', '~> 4.2.0'
|
|
11
|
+
gem 'activesupport', '~> 4.2.0'
|
|
9
12
|
end
|
|
10
|
-
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,22 @@
|
|
|
1
|
-
# v0.
|
|
1
|
+
# v0.16.0
|
|
2
|
+
|
|
3
|
+
* 2021-07-16 [8a3bf46](../../commit/8a3bf46) - __(Andrew N. Shalaev)__ Release v0.16.0
|
|
4
|
+
* 2021-07-16 [5784a91](../../commit/5784a91) - __(Andrew N. Shalaev)__ feature: add support for ruby v2.4
|
|
5
|
+
https://jira.railsc.ru/browse/BPC-19154
|
|
6
|
+
|
|
7
|
+
* 2020-05-13 [19cb127](../../commit/19cb127) - __(TamarinEA)__ Release 0.15.0
|
|
8
|
+
* 2020-04-03 [e63c121](../../commit/e63c121) - __(Mikhail Nelaev)__ feature: optionally turn off uri normalization
|
|
9
|
+
https://jira.railsc.ru/browse/GOODS-2219
|
|
2
10
|
|
|
11
|
+
* 2019-10-10 [770ed2b](../../commit/770ed2b) - __(TamarinEA)__ Release 0.14.0
|
|
12
|
+
* 2019-10-08 [b182709](../../commit/b182709) - __(Ilya Zhidkov)__ Release 0.13.0
|
|
13
|
+
* 2019-09-24 [87d134d](../../commit/87d134d) - __(evseevleo)__ chore(transliteration): add examples to README
|
|
14
|
+
* 2019-09-23 [4effcb9](../../commit/4effcb9) - __(evseevleo)__ feat(translit): add keyboard layout switch&transliteration
|
|
15
|
+
https://jira.railsc.ru/browse/BPC-15151
|
|
16
|
+
|
|
17
|
+
* 2019-09-23 [f495195](../../commit/f495195) - __(TamarinEA)__ feature: use nokogiri for strip tags
|
|
18
|
+
* 2019-09-23 [2054e2a](../../commit/2054e2a) - __(TamarinEA)__ chore: test rails 4.0 - 4.2
|
|
19
|
+
* 2018-06-29 [f5213df](../../commit/f5213df) - __(bibendi)__ chore: User latest gem-publication
|
|
3
20
|
* 2018-06-29 [7dfb4d8](../../commit/7dfb4d8) - __(bibendi)__ chore: Fix mounting rubygems in drone.yml
|
|
4
21
|
* 2018-06-29 [238a46f](../../commit/238a46f) - __(bibendi)__ chore: Set readonly gem/credentials on drone.yml
|
|
5
22
|
* 2018-06-29 [38b862d](../../commit/38b862d) - __(bibendi)__ chore: Add test section into droen.yml
|
|
@@ -74,6 +91,31 @@ https://jira.railsc.ru/browse/PC4-16353
|
|
|
74
91
|
# v3.0.1
|
|
75
92
|
|
|
76
93
|
|
|
94
|
+
# v0.15.0
|
|
95
|
+
|
|
96
|
+
* 2020-04-03 [e63c121](../../commit/e63c121) - __(Mikhail Nelaev)__ feature: optionally turn off uri normalization
|
|
97
|
+
https://jira.railsc.ru/browse/GOODS-2219
|
|
98
|
+
|
|
99
|
+
# v0.14.0
|
|
100
|
+
|
|
101
|
+
* 2019-09-23 [f495195](../../commit/f495195) - __(TamarinEA)__ feature: use nokogiri for strip tags
|
|
102
|
+
* 2019-09-23 [2054e2a](../../commit/2054e2a) - __(TamarinEA)__ chore: test rails 4.0 - 4.2
|
|
103
|
+
|
|
104
|
+
# v0.13.0
|
|
105
|
+
|
|
106
|
+
* 2019-09-24 [87d134d](../../commit/87d134d) - __(evseevleo)__ chore(transliteration): add examples to README
|
|
107
|
+
* 2019-09-23 [4effcb9](../../commit/4effcb9) - __(evseevleo)__ feat(translit): add keyboard layout switch&transliteration
|
|
108
|
+
https://jira.railsc.ru/browse/BPC-15151
|
|
109
|
+
|
|
110
|
+
* 2018-06-29 [f5213df](../../commit/f5213df) - __(bibendi)__ chore: User latest gem-publication
|
|
111
|
+
|
|
112
|
+
# v0.12.2
|
|
113
|
+
|
|
114
|
+
* 2018-06-29 [7dfb4d8](../../commit/7dfb4d8) - __(bibendi)__ chore: Fix mounting rubygems in drone.yml
|
|
115
|
+
* 2018-06-29 [238a46f](../../commit/238a46f) - __(bibendi)__ chore: Set readonly gem/credentials on drone.yml
|
|
116
|
+
* 2018-06-29 [38b862d](../../commit/38b862d) - __(bibendi)__ chore: Add test section into droen.yml
|
|
117
|
+
* 2018-06-29 [3004b9a](../../commit/3004b9a) - __(bibendi)__ chore: Add automatic publication
|
|
118
|
+
|
|
77
119
|
# v0.12.1
|
|
78
120
|
|
|
79
121
|
* 2018-06-19 [8f2da34](../../commit/8f2da34) - __(Simeon Movchan)__ fix: exclude \n and \t in nonprintable characters
|
data/Gemfile
CHANGED
data/README.md
CHANGED
|
@@ -22,6 +22,31 @@ Or install it yourself as:
|
|
|
22
22
|
|
|
23
23
|
TODO: Write usage instructions here
|
|
24
24
|
|
|
25
|
+
### Transliteration
|
|
26
|
+
|
|
27
|
+
Usage: ```StringTools.transliteration_variations(<string>)```.
|
|
28
|
+
Method returns an Array of Strings. Returned strings are: given string, string in different keboard layout and transliteration of whichever of first two string happens to be in Russian.
|
|
29
|
+
If there is a char in strng which isn't a part of RU <-> EN keyboard mapping, or string containes both Russian and English chars, only given string wrapped in Array is returned.
|
|
30
|
+
Examples:
|
|
31
|
+
```ruby
|
|
32
|
+
StringTools.transliteration_variations('"Мы почитаем всех нулями, А единицами — себя." - А. С. Пушкин')
|
|
33
|
+
=> ["\"Мы почитаем всех нулями, А единицами — себя.\" - А. С. Пушкин",
|
|
34
|
+
"@Vs gjxbnftv dct[ yekzvb? F tlbybwfvb — ct,z/@ - F/ C/ Geirby",
|
|
35
|
+
"\"My` pochitaem vsex nulyami, A ediniczami — sebya.\" - A. S. Pushkin"]
|
|
36
|
+
```
|
|
37
|
+
```ruby
|
|
38
|
+
StringTools.transliteration_variations('Ntrcn d ytdthyjq hfcrkflrt')
|
|
39
|
+
=> ["Ntrcn d ytdthyjq hfcrkflrt", "Текст в неверной раскладке", "Tekst v nevernoj raskladke"]
|
|
40
|
+
```
|
|
41
|
+
```ruby
|
|
42
|
+
StringTools.transliteration_variations('Еуче шт цкщтп лунищфкв дфнщгею')
|
|
43
|
+
=> ["Еуче шт цкщтп лунищфкв дфнщгею", "Text in wrong keyboard layout.", "Euche sht czkshhtp lunishhfkv dfns hhge."]
|
|
44
|
+
```
|
|
45
|
+
```ruby
|
|
46
|
+
StringTools.transliteration_variations('ﻮﻴﻜﻴﺒﻳﺪﻳ')
|
|
47
|
+
=> ["ﻮﻴﻜﻴﺒﻳﺪﻳ"]
|
|
48
|
+
```
|
|
49
|
+
|
|
25
50
|
## Development
|
|
26
51
|
|
|
27
52
|
After checking out the repo, run `bundle install` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/Rakefile
CHANGED
data/bin/console
CHANGED
data/dip.yml
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
version: '1'
|
|
2
2
|
|
|
3
3
|
environment:
|
|
4
|
-
DOCKER_RUBY_VERSION: 2.
|
|
5
|
-
RUBY_IMAGE_TAG: 2.
|
|
4
|
+
DOCKER_RUBY_VERSION: 2.3
|
|
5
|
+
RUBY_IMAGE_TAG: 2.3-latest
|
|
6
6
|
COMPOSE_FILE_EXT: development
|
|
7
7
|
RAILS_ENV: test
|
|
8
8
|
|
|
@@ -33,9 +33,10 @@ interaction:
|
|
|
33
33
|
|
|
34
34
|
clean:
|
|
35
35
|
service: app
|
|
36
|
-
command: rm -
|
|
36
|
+
command: rm -rf Gemfile.lock gemfiles
|
|
37
37
|
|
|
38
38
|
provision:
|
|
39
39
|
- docker volume create --name bundler_data
|
|
40
|
+
- dip clean
|
|
40
41
|
- dip bundle install
|
|
41
42
|
- dip appraisal install
|
data/lib/string_tools.rb
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
# coding: utf-8
|
|
2
|
+
# frozen_string_literal: true
|
|
2
3
|
require 'string_tools/version'
|
|
3
4
|
require 'ru_propisju'
|
|
4
5
|
require 'sanitize'
|
|
@@ -9,6 +10,9 @@ module StringTools
|
|
|
9
10
|
autoload :HTML, 'string_tools/html'
|
|
10
11
|
|
|
11
12
|
module CharDet
|
|
13
|
+
CP1251_COMPATIBLE_ENCODINGS =
|
|
14
|
+
%w(windows-1253 windows-1254 windows-1255 windows-1256 windows-1258 EUC-TW ISO-8859-8).freeze
|
|
15
|
+
|
|
12
16
|
# Возвращает true если строка содержит допустимую
|
|
13
17
|
# последовательность байтов для кодировки utf8 и false в обратном случае
|
|
14
18
|
# см. http://en.wikipedia.org/wiki/UTF-8
|
|
@@ -29,18 +33,6 @@ module StringTools
|
|
|
29
33
|
def to_cp1251(str)
|
|
30
34
|
str.to_cp1251
|
|
31
35
|
end
|
|
32
|
-
|
|
33
|
-
def cp1251_compatible_encodings
|
|
34
|
-
[
|
|
35
|
-
'windows-1253',
|
|
36
|
-
'windows-1254',
|
|
37
|
-
'windows-1255',
|
|
38
|
-
'windows-1256',
|
|
39
|
-
'windows-1258',
|
|
40
|
-
'EUC-TW',
|
|
41
|
-
'ISO-8859-8'
|
|
42
|
-
]
|
|
43
|
-
end
|
|
44
36
|
end
|
|
45
37
|
extend CharDet
|
|
46
38
|
|
|
@@ -195,10 +187,10 @@ module StringTools
|
|
|
195
187
|
def call(env)
|
|
196
188
|
node = env[:node]
|
|
197
189
|
case node.name
|
|
198
|
-
when 'a'
|
|
199
|
-
normalize_link node, 'href'
|
|
200
|
-
when 'img'
|
|
201
|
-
normalize_link node, 'src'
|
|
190
|
+
when 'a'
|
|
191
|
+
normalize_link node, 'href'
|
|
192
|
+
when 'img'
|
|
193
|
+
normalize_link node, 'src'
|
|
202
194
|
end
|
|
203
195
|
end
|
|
204
196
|
|
|
@@ -238,7 +230,7 @@ module StringTools
|
|
|
238
230
|
# Сумма в рублях прописью. Кол-во копеек выводится всегда. Первая буква заглавная
|
|
239
231
|
def rublej_propisju(amount)
|
|
240
232
|
kop = (amount.divmod(1)[1]*100).round
|
|
241
|
-
result = RuPropisju.rublej(amount.to_i).capitalize
|
|
233
|
+
result = RuPropisju.rublej(amount.to_i).capitalize.dup
|
|
242
234
|
result << " %.2d " % kop
|
|
243
235
|
result << RuPropisju.choose_plural(kop, 'копейка', 'копейки', 'копеек')
|
|
244
236
|
end
|
|
@@ -246,14 +238,245 @@ module StringTools
|
|
|
246
238
|
extend SumInWords
|
|
247
239
|
|
|
248
240
|
module Uri
|
|
249
|
-
def add_params_to_url(url, params = nil)
|
|
241
|
+
def add_params_to_url(url, params = nil, options = {normalize: true})
|
|
250
242
|
uri = Addressable::URI.parse(url)
|
|
251
243
|
uri = Addressable::URI.parse("http://#{url}") unless uri.scheme
|
|
252
244
|
uri.query_values = (uri.query_values || {}).merge!(params.stringify_keys) if params.present?
|
|
253
|
-
|
|
245
|
+
|
|
246
|
+
uri.normalize! if options[:normalize]
|
|
247
|
+
|
|
248
|
+
uri.to_s
|
|
254
249
|
rescue Addressable::URI::InvalidURIError
|
|
255
250
|
nil
|
|
256
251
|
end
|
|
257
252
|
end
|
|
258
253
|
extend Uri
|
|
254
|
+
|
|
255
|
+
module Transliteration
|
|
256
|
+
LAYOUT_EN_TO_RU_MAP = {
|
|
257
|
+
'q' => 'й', 'Q' => 'Й',
|
|
258
|
+
'w' => 'ц', 'W' => 'Ц',
|
|
259
|
+
'e' => 'у', 'E' => 'У',
|
|
260
|
+
'r' => 'к', 'R' => 'К',
|
|
261
|
+
't' => 'е', 'T' => 'Е',
|
|
262
|
+
'y' => 'н', 'Y' => 'Н',
|
|
263
|
+
'u' => 'г', 'U' => 'Г',
|
|
264
|
+
'i' => 'ш', 'I' => 'Ш',
|
|
265
|
+
'o' => 'щ', 'O' => 'Щ',
|
|
266
|
+
'p' => 'з', 'P' => 'З',
|
|
267
|
+
'[' => 'х',
|
|
268
|
+
'{' => 'Х',
|
|
269
|
+
']' => 'ъ',
|
|
270
|
+
'}' => 'Ъ',
|
|
271
|
+
'|' => '/',
|
|
272
|
+
'`' => 'ё',
|
|
273
|
+
'~' => 'Ё',
|
|
274
|
+
'a' => 'ф', 'A' => 'Ф',
|
|
275
|
+
's' => 'ы', 'S' => 'Ы',
|
|
276
|
+
'd' => 'в', 'D' => 'В',
|
|
277
|
+
'f' => 'а', 'F' => 'А',
|
|
278
|
+
'g' => 'п', 'G' => 'П',
|
|
279
|
+
'h' => 'р', 'H' => 'Р',
|
|
280
|
+
'j' => 'о', 'J' => 'О',
|
|
281
|
+
'k' => 'л', 'K' => 'Л',
|
|
282
|
+
'l' => 'д', 'L' => 'Д',
|
|
283
|
+
';' => 'ж',
|
|
284
|
+
':' => 'Ж',
|
|
285
|
+
"'" => 'э',
|
|
286
|
+
'"' => 'Э',
|
|
287
|
+
'z' => 'я', 'Z' => 'Я',
|
|
288
|
+
'x' => 'ч', 'X' => 'Ч',
|
|
289
|
+
'c' => 'с', 'C' => 'С',
|
|
290
|
+
'v' => 'м', 'V' => 'М',
|
|
291
|
+
'b' => 'и', 'B' => 'И',
|
|
292
|
+
'n' => 'т', 'N' => 'Т',
|
|
293
|
+
'm' => 'ь', 'M' => 'Ь',
|
|
294
|
+
',' => 'б',
|
|
295
|
+
'<' => 'Б',
|
|
296
|
+
'.' => 'ю',
|
|
297
|
+
'>' => 'Ю',
|
|
298
|
+
'/' => '.',
|
|
299
|
+
'?' => ',',
|
|
300
|
+
'@' => '"',
|
|
301
|
+
'#' => '№',
|
|
302
|
+
'$' => ';',
|
|
303
|
+
'^' => ':',
|
|
304
|
+
'&' => '?'
|
|
305
|
+
}.freeze
|
|
306
|
+
LAYOUT_RU_TO_EN_MAP = {
|
|
307
|
+
'й' => 'q', 'Й' => 'Q',
|
|
308
|
+
'ц' => 'w', 'Ц' => 'W',
|
|
309
|
+
'у' => 'e', 'У' => 'E',
|
|
310
|
+
'к' => 'r', 'К' => 'R',
|
|
311
|
+
'е' => 't', 'Е' => 'T',
|
|
312
|
+
'н' => 'y', 'Н' => 'Y',
|
|
313
|
+
'г' => 'u', 'Г' => 'U',
|
|
314
|
+
'ш' => 'i', 'Ш' => 'I',
|
|
315
|
+
'щ' => 'o', 'Щ' => 'O',
|
|
316
|
+
'з' => 'p', 'З' => 'P',
|
|
317
|
+
'х' => '[',
|
|
318
|
+
'Х' => '{',
|
|
319
|
+
'ъ' => ']',
|
|
320
|
+
'Ъ' => '}',
|
|
321
|
+
'/' => '|',
|
|
322
|
+
'ё' => '`',
|
|
323
|
+
'Ё' => '~',
|
|
324
|
+
'ф' => 'a', 'Ф' => 'A',
|
|
325
|
+
'ы' => 's', 'Ы' => 'S',
|
|
326
|
+
'в' => 'd', 'В' => 'D',
|
|
327
|
+
'а' => 'f', 'А' => 'F',
|
|
328
|
+
'п' => 'g', 'П' => 'G',
|
|
329
|
+
'р' => 'h', 'Р' => 'H',
|
|
330
|
+
'о' => 'j', 'О' => 'J',
|
|
331
|
+
'л' => 'k', 'Л' => 'K',
|
|
332
|
+
'д' => 'l', 'Д' => 'L',
|
|
333
|
+
'ж' => ';',
|
|
334
|
+
'Ж' => ':',
|
|
335
|
+
'э' => "'",
|
|
336
|
+
'Э' => '"',
|
|
337
|
+
'я' => 'z', 'Я' => 'Z',
|
|
338
|
+
'ч' => 'x', 'Ч' => 'X',
|
|
339
|
+
'с' => 'c', 'С' => 'C',
|
|
340
|
+
'м' => 'v', 'М' => 'V',
|
|
341
|
+
'и' => 'b', 'И' => 'B',
|
|
342
|
+
'т' => 'n', 'Т' => 'N',
|
|
343
|
+
'ь' => 'm', 'Ь' => 'M',
|
|
344
|
+
'б' => ',',
|
|
345
|
+
'Б' => '<',
|
|
346
|
+
'ю' => '.',
|
|
347
|
+
'Ю' => '>',
|
|
348
|
+
'.' => '/',
|
|
349
|
+
',' => '?',
|
|
350
|
+
'"' => '@',
|
|
351
|
+
'№' => '#',
|
|
352
|
+
';' => '$',
|
|
353
|
+
':' => '^',
|
|
354
|
+
'?' => '&'
|
|
355
|
+
}.freeze
|
|
356
|
+
LAYOUT_PERSISTENT = {
|
|
357
|
+
'0' => '0',
|
|
358
|
+
'1' => '1',
|
|
359
|
+
'2' => '2',
|
|
360
|
+
'3' => '3',
|
|
361
|
+
'4' => '4',
|
|
362
|
+
'5' => '5',
|
|
363
|
+
'6' => '6',
|
|
364
|
+
'7' => '7',
|
|
365
|
+
'8' => '8',
|
|
366
|
+
'9' => '9',
|
|
367
|
+
'!' => '!',
|
|
368
|
+
'*' => '*',
|
|
369
|
+
'(' => '(',
|
|
370
|
+
')' => ')',
|
|
371
|
+
' ' => ' ',
|
|
372
|
+
'-' => '-',
|
|
373
|
+
'—' => '—',
|
|
374
|
+
'_' => '_',
|
|
375
|
+
'=' => '=',
|
|
376
|
+
'+' => '+'
|
|
377
|
+
}.freeze
|
|
378
|
+
TRANSLIT_RU_TO_EN_MAP = {
|
|
379
|
+
'щ' => 'shh', 'Щ' => 'Shh',
|
|
380
|
+
'ё' => 'yo', 'Ё' => 'Yo',
|
|
381
|
+
'ж' => 'zh', 'Ж' => 'Zh',
|
|
382
|
+
'ц' => 'cz', 'Ц' => 'Cz',
|
|
383
|
+
'ч' => 'ch', 'Ч' => 'Ch',
|
|
384
|
+
'ш' => 'sh', 'Ш' => 'Sh',
|
|
385
|
+
'ъ' => '``', 'Ъ' => '``',
|
|
386
|
+
'ы' => 'y`', 'Ы' => 'Y`',
|
|
387
|
+
'э' => 'e`', 'Э' => 'E`',
|
|
388
|
+
'ю' => 'yu', 'Ю' => 'Yu',
|
|
389
|
+
'я' => 'ya', 'Я' => 'Ya',
|
|
390
|
+
'а' => 'a', 'А' => 'A',
|
|
391
|
+
'б' => 'b', 'Б' => 'B',
|
|
392
|
+
'в' => 'v', 'В' => 'V',
|
|
393
|
+
'г' => 'g', 'Г' => 'G',
|
|
394
|
+
'д' => 'd', 'Д' => 'D',
|
|
395
|
+
'е' => 'e', 'Е' => 'E',
|
|
396
|
+
'з' => 'z', 'З' => 'Z',
|
|
397
|
+
'и' => 'i', 'И' => 'I',
|
|
398
|
+
'й' => 'j', 'Й' => 'J',
|
|
399
|
+
'к' => 'k', 'К' => 'K',
|
|
400
|
+
'л' => 'l', 'Л' => 'L',
|
|
401
|
+
'м' => 'm', 'М' => 'M',
|
|
402
|
+
'н' => 'n', 'Н' => 'N',
|
|
403
|
+
'о' => 'o', 'О' => 'O',
|
|
404
|
+
'п' => 'p', 'П' => 'P',
|
|
405
|
+
'р' => 'r', 'Р' => 'R',
|
|
406
|
+
'с' => 's', 'С' => 'S',
|
|
407
|
+
'т' => 't', 'Т' => 'T',
|
|
408
|
+
'у' => 'u', 'У' => 'U',
|
|
409
|
+
'ф' => 'f', 'Ф' => 'F',
|
|
410
|
+
'х' => 'x', 'Х' => 'X',
|
|
411
|
+
'ь' => '`', 'Ь' => '`'
|
|
412
|
+
}.freeze
|
|
413
|
+
|
|
414
|
+
# Public: варианты строки с учетом смены раскладки и/или транслитерации для Русского и Английского языков
|
|
415
|
+
# Смена раскладки выполняется в обе стороны, транслитерация - с Русского на Английский.
|
|
416
|
+
#
|
|
417
|
+
# str - String
|
|
418
|
+
#
|
|
419
|
+
# Examples
|
|
420
|
+
# transliteration_variations('Ruby')
|
|
421
|
+
# => ['Ruby', 'Кгин', 'kgin']
|
|
422
|
+
# transliteration_variations('Слово')
|
|
423
|
+
# => ['Слово', 'ckjdj', 'slovo']
|
|
424
|
+
# transliteration_variations('КомпанияPro')
|
|
425
|
+
# => ['КомпанияPro']
|
|
426
|
+
# transliteration_variations('ويكيبيدي')
|
|
427
|
+
# => ['ويكيبيدي']
|
|
428
|
+
#
|
|
429
|
+
# returns Array of String
|
|
430
|
+
def transliteration_variations(str)
|
|
431
|
+
str_as_chars = str.chars
|
|
432
|
+
converted = convert_layout(str_as_chars)
|
|
433
|
+
|
|
434
|
+
layout_swap = converted[:chars].try(:join)
|
|
435
|
+
tranliterated = (converted[:was_ru] ? transliterate(str_as_chars) : transliterate(converted[:chars])).try(:join)
|
|
436
|
+
|
|
437
|
+
[str, layout_swap, tranliterated].tap(&:compact!)
|
|
438
|
+
end
|
|
439
|
+
|
|
440
|
+
private
|
|
441
|
+
|
|
442
|
+
# Internal: Смена раскладки массива символов, ru <-> en.
|
|
443
|
+
# Возвращает Hash с двумя ключами:
|
|
444
|
+
# :chars - Array, символы в другой раскладке(nil если не удалось сменить раскладку)
|
|
445
|
+
# :was_ru - Bool, принадлежали ли все символы русскому языку.
|
|
446
|
+
#
|
|
447
|
+
# splitted_string - Array of String
|
|
448
|
+
#
|
|
449
|
+
# Example:
|
|
450
|
+
# convert_layout(['a', 'b', 'c']) =>
|
|
451
|
+
# {chars: ['ф', 'и', 'с'], was_ru: false}
|
|
452
|
+
# convert_layout(['а', 'б', 'в']) =>
|
|
453
|
+
# {chars: ['f', ',', 'd'], was_ru: true}
|
|
454
|
+
# convert_layout(['ﻮ', 'ﻴ', 'ﻜ']) =>
|
|
455
|
+
# {chars: nil, was_ru: false}
|
|
456
|
+
#
|
|
457
|
+
# returns Array
|
|
458
|
+
def convert_layout(splitted_string)
|
|
459
|
+
str_arr = splitted_string.map do |char|
|
|
460
|
+
LAYOUT_RU_TO_EN_MAP[char] || LAYOUT_PERSISTENT[char] || break
|
|
461
|
+
end
|
|
462
|
+
|
|
463
|
+
return {chars: str_arr, was_ru: true} if str_arr
|
|
464
|
+
|
|
465
|
+
{chars: splitted_string.map { |char| LAYOUT_EN_TO_RU_MAP[char] || LAYOUT_PERSISTENT[char] || break },
|
|
466
|
+
was_ru: false}
|
|
467
|
+
end
|
|
468
|
+
|
|
469
|
+
# Internal: Транслитерация массива символов, ru -> en
|
|
470
|
+
# Если символа нет в словаре, не изменяет его.
|
|
471
|
+
#
|
|
472
|
+
# splitted string - Array of String
|
|
473
|
+
#
|
|
474
|
+
# Returns Array
|
|
475
|
+
def transliterate(splitted_string)
|
|
476
|
+
return unless splitted_string
|
|
477
|
+
|
|
478
|
+
splitted_string.map { |char| TRANSLIT_RU_TO_EN_MAP[char] || char }
|
|
479
|
+
end
|
|
480
|
+
end
|
|
481
|
+
extend Transliteration
|
|
259
482
|
end
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
1
2
|
require 'rchardet19'
|
|
2
3
|
require 'addressable/uri'
|
|
3
4
|
require 'active_support/core_ext/module'
|
|
@@ -18,7 +19,7 @@ class String
|
|
|
18
19
|
# возвращает строку из которой удалены HTML-теги
|
|
19
20
|
# символы <>&" остаются без изменения
|
|
20
21
|
def strip_tags
|
|
21
|
-
|
|
22
|
+
Nokogiri::HTML5.parse(self).content
|
|
22
23
|
end
|
|
23
24
|
|
|
24
25
|
# '11,3'.to_f
|
|
@@ -154,10 +155,11 @@ class String
|
|
|
154
155
|
end
|
|
155
156
|
end
|
|
156
157
|
|
|
158
|
+
WIN_1251_ENCODING = 'windows-1251'
|
|
157
159
|
# shorthand
|
|
158
160
|
def detect_encoding
|
|
159
161
|
e = ::CharDet.detect(self)["encoding"]
|
|
160
|
-
e =
|
|
162
|
+
e = WIN_1251_ENCODING if StringTools::CharDet::CP1251_COMPATIBLE_ENCODINGS.include?(e)
|
|
161
163
|
e
|
|
162
164
|
end
|
|
163
165
|
|
data/lib/string_tools/html.rb
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
# coding: utf-8
|
|
2
|
+
# frozen_string_literal: true
|
|
2
3
|
require 'nokogiri'
|
|
3
4
|
require 'addressable/uri'
|
|
4
5
|
require 'simpleidn'
|
|
@@ -44,7 +45,7 @@ module StringTools
|
|
|
44
45
|
doc = Nokogiri::HTML::DocumentFragment.parse(html)
|
|
45
46
|
scrubber = LinksRemoveScrubber.new(options)
|
|
46
47
|
|
|
47
|
-
doc.css('a'
|
|
48
|
+
doc.css('a').each { |node| scrubber.call node }
|
|
48
49
|
|
|
49
50
|
if scrubber.done_changes?
|
|
50
51
|
doc.children.map { |node| node.serialize HTML_SERIALIZE_OPTIONS }.join
|
|
@@ -78,7 +79,7 @@ module StringTools
|
|
|
78
79
|
end
|
|
79
80
|
|
|
80
81
|
def whitelisted?(domain)
|
|
81
|
-
host_parts = domain.split('.'
|
|
82
|
+
host_parts = domain.split('.')
|
|
82
83
|
host = host_parts[-1] # com, ru ...
|
|
83
84
|
(host_parts.length - 2).downto(0) do |i|
|
|
84
85
|
subdomain = host_parts[i]
|
data/lib/string_tools/string.rb
CHANGED
data/lib/string_tools/version.rb
CHANGED
data/string_tools.gemspec
CHANGED
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
# coding: utf-8
|
|
2
|
+
# frozen_string_literal: true
|
|
2
3
|
lib = File.expand_path('../lib', __FILE__)
|
|
3
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
5
|
require 'string_tools/version'
|
|
@@ -19,8 +20,8 @@ Gem::Specification.new do |spec|
|
|
|
19
20
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
|
20
21
|
spec.require_paths = ['lib']
|
|
21
22
|
|
|
22
|
-
spec.add_runtime_dependency 'actionpack', '>=
|
|
23
|
-
spec.add_runtime_dependency 'activesupport', '>=
|
|
23
|
+
spec.add_runtime_dependency 'actionpack', '>= 4.0.13'
|
|
24
|
+
spec.add_runtime_dependency 'activesupport', '>= 4.0.13'
|
|
24
25
|
spec.add_runtime_dependency 'rchardet19', '~> 1.3.5'
|
|
25
26
|
spec.add_runtime_dependency 'addressable', '>= 2.3.2'
|
|
26
27
|
spec.add_runtime_dependency 'ru_propisju', '>= 2.1.4'
|
|
@@ -33,4 +34,5 @@ Gem::Specification.new do |spec|
|
|
|
33
34
|
spec.add_development_dependency 'rspec', '>= 3.4'
|
|
34
35
|
spec.add_development_dependency 'appraisal', '>= 1.0.2'
|
|
35
36
|
spec.add_development_dependency 'simplecov', '>= 0.9'
|
|
37
|
+
spec.add_development_dependency 'pry-byebug'
|
|
36
38
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: string_tools
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.16.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Sergey D.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date:
|
|
11
|
+
date: 2021-07-22 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: actionpack
|
|
@@ -16,28 +16,28 @@ dependencies:
|
|
|
16
16
|
requirements:
|
|
17
17
|
- - ">="
|
|
18
18
|
- !ruby/object:Gem::Version
|
|
19
|
-
version:
|
|
19
|
+
version: 4.0.13
|
|
20
20
|
type: :runtime
|
|
21
21
|
prerelease: false
|
|
22
22
|
version_requirements: !ruby/object:Gem::Requirement
|
|
23
23
|
requirements:
|
|
24
24
|
- - ">="
|
|
25
25
|
- !ruby/object:Gem::Version
|
|
26
|
-
version:
|
|
26
|
+
version: 4.0.13
|
|
27
27
|
- !ruby/object:Gem::Dependency
|
|
28
28
|
name: activesupport
|
|
29
29
|
requirement: !ruby/object:Gem::Requirement
|
|
30
30
|
requirements:
|
|
31
31
|
- - ">="
|
|
32
32
|
- !ruby/object:Gem::Version
|
|
33
|
-
version:
|
|
33
|
+
version: 4.0.13
|
|
34
34
|
type: :runtime
|
|
35
35
|
prerelease: false
|
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
|
37
37
|
requirements:
|
|
38
38
|
- - ">="
|
|
39
39
|
- !ruby/object:Gem::Version
|
|
40
|
-
version:
|
|
40
|
+
version: 4.0.13
|
|
41
41
|
- !ruby/object:Gem::Dependency
|
|
42
42
|
name: rchardet19
|
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
|
@@ -192,6 +192,20 @@ dependencies:
|
|
|
192
192
|
- - ">="
|
|
193
193
|
- !ruby/object:Gem::Version
|
|
194
194
|
version: '0.9'
|
|
195
|
+
- !ruby/object:Gem::Dependency
|
|
196
|
+
name: pry-byebug
|
|
197
|
+
requirement: !ruby/object:Gem::Requirement
|
|
198
|
+
requirements:
|
|
199
|
+
- - ">="
|
|
200
|
+
- !ruby/object:Gem::Version
|
|
201
|
+
version: '0'
|
|
202
|
+
type: :development
|
|
203
|
+
prerelease: false
|
|
204
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
205
|
+
requirements:
|
|
206
|
+
- - ">="
|
|
207
|
+
- !ruby/object:Gem::Version
|
|
208
|
+
version: '0'
|
|
195
209
|
description: String Tools
|
|
196
210
|
email:
|
|
197
211
|
- sclinede@gmail.com
|