string_tools 0.12.2 → 0.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.drone.yml +1 -1
- data/CHANGELOG.md +14 -1
- data/README.md +25 -0
- data/lib/string_tools.rb +231 -12
- data/lib/string_tools/core_ext/string.rb +2 -1
- data/lib/string_tools/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: aacc7b6fad14a8c6972725fe7fc740893b3a24f3
|
4
|
+
data.tar.gz: 6055322c6803fb896c76290fe61c20d1b3202458
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 56d07342c5b225ce65eece9dae4b89fdee2a335df7f6663c1b6231bdbf24573777f1b3f69a86a43d96079a05cabafde6bb0ceff373c6326f23fd672bada2549b
|
7
|
+
data.tar.gz: 9141388d89966dff07e3f8645142faf00759e9f824fed8ed585d387d0b84f3abc59fb90aef244603de0fb937c9242a42d5f7941a3f516f0640d95e6fbc8123e2
|
data/.drone.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,11 @@
|
|
1
|
-
# v0.
|
1
|
+
# v0.13.0
|
2
|
+
|
3
|
+
* 2019-10-08 [b182709](../../commit/b182709) - __(Ilya Zhidkov)__ Release 0.13.0
|
4
|
+
* 2019-09-24 [87d134d](../../commit/87d134d) - __(evseevleo)__ chore(transliteration): add examples to README
|
5
|
+
* 2019-09-23 [4effcb9](../../commit/4effcb9) - __(evseevleo)__ feat(translit): add keyboard layout switch&transliteration
|
6
|
+
https://jira.railsc.ru/browse/BPC-15151
|
2
7
|
|
8
|
+
* 2018-06-29 [f5213df](../../commit/f5213df) - __(bibendi)__ chore: User latest gem-publication
|
3
9
|
* 2018-06-29 [7dfb4d8](../../commit/7dfb4d8) - __(bibendi)__ chore: Fix mounting rubygems in drone.yml
|
4
10
|
* 2018-06-29 [238a46f](../../commit/238a46f) - __(bibendi)__ chore: Set readonly gem/credentials on drone.yml
|
5
11
|
* 2018-06-29 [38b862d](../../commit/38b862d) - __(bibendi)__ chore: Add test section into droen.yml
|
@@ -74,6 +80,13 @@ https://jira.railsc.ru/browse/PC4-16353
|
|
74
80
|
# v3.0.1
|
75
81
|
|
76
82
|
|
83
|
+
# v0.12.2
|
84
|
+
|
85
|
+
* 2018-06-29 [7dfb4d8](../../commit/7dfb4d8) - __(bibendi)__ chore: Fix mounting rubygems in drone.yml
|
86
|
+
* 2018-06-29 [238a46f](../../commit/238a46f) - __(bibendi)__ chore: Set readonly gem/credentials on drone.yml
|
87
|
+
* 2018-06-29 [38b862d](../../commit/38b862d) - __(bibendi)__ chore: Add test section into droen.yml
|
88
|
+
* 2018-06-29 [3004b9a](../../commit/3004b9a) - __(bibendi)__ chore: Add automatic publication
|
89
|
+
|
77
90
|
# v0.12.1
|
78
91
|
|
79
92
|
* 2018-06-19 [8f2da34](../../commit/8f2da34) - __(Simeon Movchan)__ fix: exclude \n and \t in nonprintable characters
|
data/README.md
CHANGED
@@ -22,6 +22,31 @@ Or install it yourself as:
|
|
22
22
|
|
23
23
|
TODO: Write usage instructions here
|
24
24
|
|
25
|
+
### Transliteration
|
26
|
+
|
27
|
+
Usage: ```StringTools.transliteration_variations(<string>)```.
|
28
|
+
Method returns an Array of Strings. Returned strings are: given string, string in different keboard layout and transliteration of whichever of first two string happens to be in Russian.
|
29
|
+
If there is a char in strng which isn't a part of RU <-> EN keyboard mapping, or string containes both Russian and English chars, only given string wrapped in Array is returned.
|
30
|
+
Examples:
|
31
|
+
```ruby
|
32
|
+
StringTools.transliteration_variations('"Мы почитаем всех нулями, А единицами — себя." - А. С. Пушкин')
|
33
|
+
=> ["\"Мы почитаем всех нулями, А единицами — себя.\" - А. С. Пушкин",
|
34
|
+
"@Vs gjxbnftv dct[ yekzvb? F tlbybwfvb — ct,z/@ - F/ C/ Geirby",
|
35
|
+
"\"My` pochitaem vsex nulyami, A ediniczami — sebya.\" - A. S. Pushkin"]
|
36
|
+
```
|
37
|
+
```ruby
|
38
|
+
StringTools.transliteration_variations('Ntrcn d ytdthyjq hfcrkflrt')
|
39
|
+
=> ["Ntrcn d ytdthyjq hfcrkflrt", "Текст в неверной раскладке", "Tekst v nevernoj raskladke"]
|
40
|
+
```
|
41
|
+
```ruby
|
42
|
+
StringTools.transliteration_variations('Еуче шт цкщтп лунищфкв дфнщгею')
|
43
|
+
=> ["Еуче шт цкщтп лунищфкв дфнщгею", "Text in wrong keyboard layout.", "Euche sht czkshhtp lunishhfkv dfns hhge."]
|
44
|
+
```
|
45
|
+
```ruby
|
46
|
+
StringTools.transliteration_variations('ﻮﻴﻜﻴﺒﻳﺪﻳ')
|
47
|
+
=> ["ﻮﻴﻜﻴﺒﻳﺪﻳ"]
|
48
|
+
```
|
49
|
+
|
25
50
|
## Development
|
26
51
|
|
27
52
|
After checking out the repo, run `bundle install` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/lib/string_tools.rb
CHANGED
@@ -9,6 +9,9 @@ module StringTools
|
|
9
9
|
autoload :HTML, 'string_tools/html'
|
10
10
|
|
11
11
|
module CharDet
|
12
|
+
CP1251_COMPATIBLE_ENCODINGS =
|
13
|
+
%w(windows-1253 windows-1254 windows-1255 windows-1256 windows-1258 EUC-TW ISO-8859-8).freeze
|
14
|
+
|
12
15
|
# Возвращает true если строка содержит допустимую
|
13
16
|
# последовательность байтов для кодировки utf8 и false в обратном случае
|
14
17
|
# см. http://en.wikipedia.org/wiki/UTF-8
|
@@ -29,18 +32,6 @@ module StringTools
|
|
29
32
|
def to_cp1251(str)
|
30
33
|
str.to_cp1251
|
31
34
|
end
|
32
|
-
|
33
|
-
def cp1251_compatible_encodings
|
34
|
-
[
|
35
|
-
'windows-1253',
|
36
|
-
'windows-1254',
|
37
|
-
'windows-1255',
|
38
|
-
'windows-1256',
|
39
|
-
'windows-1258',
|
40
|
-
'EUC-TW',
|
41
|
-
'ISO-8859-8'
|
42
|
-
]
|
43
|
-
end
|
44
35
|
end
|
45
36
|
extend CharDet
|
46
37
|
|
@@ -256,4 +247,232 @@ module StringTools
|
|
256
247
|
end
|
257
248
|
end
|
258
249
|
extend Uri
|
250
|
+
|
251
|
+
module Transliteration
|
252
|
+
LAYOUT_EN_TO_RU_MAP = {
|
253
|
+
'q' => 'й', 'Q' => 'Й',
|
254
|
+
'w' => 'ц', 'W' => 'Ц',
|
255
|
+
'e' => 'у', 'E' => 'У',
|
256
|
+
'r' => 'к', 'R' => 'К',
|
257
|
+
't' => 'е', 'T' => 'Е',
|
258
|
+
'y' => 'н', 'Y' => 'Н',
|
259
|
+
'u' => 'г', 'U' => 'Г',
|
260
|
+
'i' => 'ш', 'I' => 'Ш',
|
261
|
+
'o' => 'щ', 'O' => 'Щ',
|
262
|
+
'p' => 'з', 'P' => 'З',
|
263
|
+
'[' => 'х',
|
264
|
+
'{' => 'Х',
|
265
|
+
']' => 'ъ',
|
266
|
+
'}' => 'Ъ',
|
267
|
+
'|' => '/',
|
268
|
+
'`' => 'ё',
|
269
|
+
'~' => 'Ё',
|
270
|
+
'a' => 'ф', 'A' => 'Ф',
|
271
|
+
's' => 'ы', 'S' => 'Ы',
|
272
|
+
'd' => 'в', 'D' => 'В',
|
273
|
+
'f' => 'а', 'F' => 'А',
|
274
|
+
'g' => 'п', 'G' => 'П',
|
275
|
+
'h' => 'р', 'H' => 'Р',
|
276
|
+
'j' => 'о', 'J' => 'О',
|
277
|
+
'k' => 'л', 'K' => 'Л',
|
278
|
+
'l' => 'д', 'L' => 'Д',
|
279
|
+
';' => 'ж',
|
280
|
+
':' => 'Ж',
|
281
|
+
"'" => 'э',
|
282
|
+
'"' => 'Э',
|
283
|
+
'z' => 'я', 'Z' => 'Я',
|
284
|
+
'x' => 'ч', 'X' => 'Ч',
|
285
|
+
'c' => 'с', 'C' => 'С',
|
286
|
+
'v' => 'м', 'V' => 'М',
|
287
|
+
'b' => 'и', 'B' => 'И',
|
288
|
+
'n' => 'т', 'N' => 'Т',
|
289
|
+
'm' => 'ь', 'M' => 'Ь',
|
290
|
+
',' => 'б',
|
291
|
+
'<' => 'Б',
|
292
|
+
'.' => 'ю',
|
293
|
+
'>' => 'Ю',
|
294
|
+
'/' => '.',
|
295
|
+
'?' => ',',
|
296
|
+
'@' => '"',
|
297
|
+
'#' => '№',
|
298
|
+
'$' => ';',
|
299
|
+
'^' => ':',
|
300
|
+
'&' => '?'
|
301
|
+
}.freeze
|
302
|
+
LAYOUT_RU_TO_EN_MAP = {
|
303
|
+
'й' => 'q', 'Й' => 'Q',
|
304
|
+
'ц' => 'w', 'Ц' => 'W',
|
305
|
+
'у' => 'e', 'У' => 'E',
|
306
|
+
'к' => 'r', 'К' => 'R',
|
307
|
+
'е' => 't', 'Е' => 'T',
|
308
|
+
'н' => 'y', 'Н' => 'Y',
|
309
|
+
'г' => 'u', 'Г' => 'U',
|
310
|
+
'ш' => 'i', 'Ш' => 'I',
|
311
|
+
'щ' => 'o', 'Щ' => 'O',
|
312
|
+
'з' => 'p', 'З' => 'P',
|
313
|
+
'х' => '[',
|
314
|
+
'Х' => '{',
|
315
|
+
'ъ' => ']',
|
316
|
+
'Ъ' => '}',
|
317
|
+
'/' => '|',
|
318
|
+
'ё' => '`',
|
319
|
+
'Ё' => '~',
|
320
|
+
'ф' => 'a', 'Ф' => 'A',
|
321
|
+
'ы' => 's', 'Ы' => 'S',
|
322
|
+
'в' => 'd', 'В' => 'D',
|
323
|
+
'а' => 'f', 'А' => 'F',
|
324
|
+
'п' => 'g', 'П' => 'G',
|
325
|
+
'р' => 'h', 'Р' => 'H',
|
326
|
+
'о' => 'j', 'О' => 'J',
|
327
|
+
'л' => 'k', 'Л' => 'K',
|
328
|
+
'д' => 'l', 'Д' => 'L',
|
329
|
+
'ж' => ';',
|
330
|
+
'Ж' => ':',
|
331
|
+
'э' => "'",
|
332
|
+
'Э' => '"',
|
333
|
+
'я' => 'z', 'Я' => 'Z',
|
334
|
+
'ч' => 'x', 'Ч' => 'X',
|
335
|
+
'с' => 'c', 'С' => 'C',
|
336
|
+
'м' => 'v', 'М' => 'V',
|
337
|
+
'и' => 'b', 'И' => 'B',
|
338
|
+
'т' => 'n', 'Т' => 'N',
|
339
|
+
'ь' => 'm', 'Ь' => 'M',
|
340
|
+
'б' => ',',
|
341
|
+
'Б' => '<',
|
342
|
+
'ю' => '.',
|
343
|
+
'Ю' => '>',
|
344
|
+
'.' => '/',
|
345
|
+
',' => '?',
|
346
|
+
'"' => '@',
|
347
|
+
'№' => '#',
|
348
|
+
';' => '$',
|
349
|
+
':' => '^',
|
350
|
+
'?' => '&'
|
351
|
+
}.freeze
|
352
|
+
LAYOUT_PERSISTENT = {
|
353
|
+
'0' => '0',
|
354
|
+
'1' => '1',
|
355
|
+
'2' => '2',
|
356
|
+
'3' => '3',
|
357
|
+
'4' => '4',
|
358
|
+
'5' => '5',
|
359
|
+
'6' => '6',
|
360
|
+
'7' => '7',
|
361
|
+
'8' => '8',
|
362
|
+
'9' => '9',
|
363
|
+
'!' => '!',
|
364
|
+
'*' => '*',
|
365
|
+
'(' => '(',
|
366
|
+
')' => ')',
|
367
|
+
' ' => ' ',
|
368
|
+
'-' => '-',
|
369
|
+
'—' => '—',
|
370
|
+
'_' => '_',
|
371
|
+
'=' => '=',
|
372
|
+
'+' => '+'
|
373
|
+
}.freeze
|
374
|
+
TRANSLIT_RU_TO_EN_MAP = {
|
375
|
+
'щ' => 'shh', 'Щ' => 'Shh',
|
376
|
+
'ё' => 'yo', 'Ё' => 'Yo',
|
377
|
+
'ж' => 'zh', 'Ж' => 'Zh',
|
378
|
+
'ц' => 'cz', 'Ц' => 'Cz',
|
379
|
+
'ч' => 'ch', 'Ч' => 'Ch',
|
380
|
+
'ш' => 'sh', 'Ш' => 'Sh',
|
381
|
+
'ъ' => '``', 'Ъ' => '``',
|
382
|
+
'ы' => 'y`', 'Ы' => 'Y`',
|
383
|
+
'э' => 'e`', 'Э' => 'E`',
|
384
|
+
'ю' => 'yu', 'Ю' => 'Yu',
|
385
|
+
'я' => 'ya', 'Я' => 'Ya',
|
386
|
+
'а' => 'a', 'А' => 'A',
|
387
|
+
'б' => 'b', 'Б' => 'B',
|
388
|
+
'в' => 'v', 'В' => 'V',
|
389
|
+
'г' => 'g', 'Г' => 'G',
|
390
|
+
'д' => 'd', 'Д' => 'D',
|
391
|
+
'е' => 'e', 'Е' => 'E',
|
392
|
+
'з' => 'z', 'З' => 'Z',
|
393
|
+
'и' => 'i', 'И' => 'I',
|
394
|
+
'й' => 'j', 'Й' => 'J',
|
395
|
+
'к' => 'k', 'К' => 'K',
|
396
|
+
'л' => 'l', 'Л' => 'L',
|
397
|
+
'м' => 'm', 'М' => 'M',
|
398
|
+
'н' => 'n', 'Н' => 'N',
|
399
|
+
'о' => 'o', 'О' => 'O',
|
400
|
+
'п' => 'p', 'П' => 'P',
|
401
|
+
'р' => 'r', 'Р' => 'R',
|
402
|
+
'с' => 's', 'С' => 'S',
|
403
|
+
'т' => 't', 'Т' => 'T',
|
404
|
+
'у' => 'u', 'У' => 'U',
|
405
|
+
'ф' => 'f', 'Ф' => 'F',
|
406
|
+
'х' => 'x', 'Х' => 'X',
|
407
|
+
'ь' => '`', 'Ь' => '`'
|
408
|
+
}.freeze
|
409
|
+
|
410
|
+
# Public: варианты строки с учетом смены раскладки и/или транслитерации для Русского и Английского языков
|
411
|
+
# Смена раскладки выполняется в обе стороны, транслитерация - с Русского на Английский.
|
412
|
+
#
|
413
|
+
# str - String
|
414
|
+
#
|
415
|
+
# Examples
|
416
|
+
# transliteration_variations('Ruby')
|
417
|
+
# => ['Ruby', 'Кгин', 'kgin']
|
418
|
+
# transliteration_variations('Слово')
|
419
|
+
# => ['Слово', 'ckjdj', 'slovo']
|
420
|
+
# transliteration_variations('КомпанияPro')
|
421
|
+
# => ['КомпанияPro']
|
422
|
+
# transliteration_variations('ويكيبيدي')
|
423
|
+
# => ['ويكيبيدي']
|
424
|
+
#
|
425
|
+
# returns Array of String
|
426
|
+
def transliteration_variations(str)
|
427
|
+
str_as_chars = str.chars
|
428
|
+
converted = convert_layout(str_as_chars)
|
429
|
+
|
430
|
+
layout_swap = converted[:chars].try(:join)
|
431
|
+
tranliterated = (converted[:was_ru] ? transliterate(str_as_chars) : transliterate(converted[:chars])).try(:join)
|
432
|
+
|
433
|
+
[str, layout_swap, tranliterated].tap(&:compact!)
|
434
|
+
end
|
435
|
+
|
436
|
+
private
|
437
|
+
|
438
|
+
# Internal: Смена раскладки массива символов, ru <-> en.
|
439
|
+
# Возвращает Hash с двумя ключами:
|
440
|
+
# :chars - Array, символы в другой раскладке(nil если не удалось сменить раскладку)
|
441
|
+
# :was_ru - Bool, принадлежали ли все символы русскому языку.
|
442
|
+
#
|
443
|
+
# splitted_string - Array of String
|
444
|
+
#
|
445
|
+
# Example:
|
446
|
+
# convert_layout(['a', 'b', 'c']) =>
|
447
|
+
# {chars: ['ф', 'и', 'с'], was_ru: false}
|
448
|
+
# convert_layout(['а', 'б', 'в']) =>
|
449
|
+
# {chars: ['f', ',', 'd'], was_ru: true}
|
450
|
+
# convert_layout(['ﻮ', 'ﻴ', 'ﻜ']) =>
|
451
|
+
# {chars: nil, was_ru: false}
|
452
|
+
#
|
453
|
+
# returns Array
|
454
|
+
def convert_layout(splitted_string)
|
455
|
+
str_arr = splitted_string.map do |char|
|
456
|
+
LAYOUT_RU_TO_EN_MAP[char] || LAYOUT_PERSISTENT[char] || break
|
457
|
+
end
|
458
|
+
|
459
|
+
return {chars: str_arr, was_ru: true} if str_arr
|
460
|
+
|
461
|
+
{chars: splitted_string.map { |char| LAYOUT_EN_TO_RU_MAP[char] || LAYOUT_PERSISTENT[char] || break },
|
462
|
+
was_ru: false}
|
463
|
+
end
|
464
|
+
|
465
|
+
# Internal: Транслитерация массива символов, ru -> en
|
466
|
+
# Если символа нет в словаре, не изменяет его.
|
467
|
+
#
|
468
|
+
# splitted string - Array of String
|
469
|
+
#
|
470
|
+
# Returns Array
|
471
|
+
def transliterate(splitted_string)
|
472
|
+
return unless splitted_string
|
473
|
+
|
474
|
+
splitted_string.map { |char| TRANSLIT_RU_TO_EN_MAP[char] || char }
|
475
|
+
end
|
476
|
+
end
|
477
|
+
extend Transliteration
|
259
478
|
end
|
@@ -154,10 +154,11 @@ class String
|
|
154
154
|
end
|
155
155
|
end
|
156
156
|
|
157
|
+
WIN_1251_ENCODING = 'windows-1251'.freeze
|
157
158
|
# shorthand
|
158
159
|
def detect_encoding
|
159
160
|
e = ::CharDet.detect(self)["encoding"]
|
160
|
-
e =
|
161
|
+
e = WIN_1251_ENCODING if StringTools::CharDet::CP1251_COMPATIBLE_ENCODINGS.include?(e)
|
161
162
|
e
|
162
163
|
end
|
163
164
|
|
data/lib/string_tools/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string_tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.13.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sergey D.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-10-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: actionpack
|