string_tools 0.12.2 → 0.13.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.drone.yml +1 -1
- data/CHANGELOG.md +14 -1
- data/README.md +25 -0
- data/lib/string_tools.rb +231 -12
- data/lib/string_tools/core_ext/string.rb +2 -1
- data/lib/string_tools/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: aacc7b6fad14a8c6972725fe7fc740893b3a24f3
|
4
|
+
data.tar.gz: 6055322c6803fb896c76290fe61c20d1b3202458
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 56d07342c5b225ce65eece9dae4b89fdee2a335df7f6663c1b6231bdbf24573777f1b3f69a86a43d96079a05cabafde6bb0ceff373c6326f23fd672bada2549b
|
7
|
+
data.tar.gz: 9141388d89966dff07e3f8645142faf00759e9f824fed8ed585d387d0b84f3abc59fb90aef244603de0fb937c9242a42d5f7941a3f516f0640d95e6fbc8123e2
|
data/.drone.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,11 @@
|
|
1
|
-
# v0.
|
1
|
+
# v0.13.0
|
2
|
+
|
3
|
+
* 2019-10-08 [b182709](../../commit/b182709) - __(Ilya Zhidkov)__ Release 0.13.0
|
4
|
+
* 2019-09-24 [87d134d](../../commit/87d134d) - __(evseevleo)__ chore(transliteration): add examples to README
|
5
|
+
* 2019-09-23 [4effcb9](../../commit/4effcb9) - __(evseevleo)__ feat(translit): add keyboard layout switch&transliteration
|
6
|
+
https://jira.railsc.ru/browse/BPC-15151
|
2
7
|
|
8
|
+
* 2018-06-29 [f5213df](../../commit/f5213df) - __(bibendi)__ chore: User latest gem-publication
|
3
9
|
* 2018-06-29 [7dfb4d8](../../commit/7dfb4d8) - __(bibendi)__ chore: Fix mounting rubygems in drone.yml
|
4
10
|
* 2018-06-29 [238a46f](../../commit/238a46f) - __(bibendi)__ chore: Set readonly gem/credentials on drone.yml
|
5
11
|
* 2018-06-29 [38b862d](../../commit/38b862d) - __(bibendi)__ chore: Add test section into droen.yml
|
@@ -74,6 +80,13 @@ https://jira.railsc.ru/browse/PC4-16353
|
|
74
80
|
# v3.0.1
|
75
81
|
|
76
82
|
|
83
|
+
# v0.12.2
|
84
|
+
|
85
|
+
* 2018-06-29 [7dfb4d8](../../commit/7dfb4d8) - __(bibendi)__ chore: Fix mounting rubygems in drone.yml
|
86
|
+
* 2018-06-29 [238a46f](../../commit/238a46f) - __(bibendi)__ chore: Set readonly gem/credentials on drone.yml
|
87
|
+
* 2018-06-29 [38b862d](../../commit/38b862d) - __(bibendi)__ chore: Add test section into droen.yml
|
88
|
+
* 2018-06-29 [3004b9a](../../commit/3004b9a) - __(bibendi)__ chore: Add automatic publication
|
89
|
+
|
77
90
|
# v0.12.1
|
78
91
|
|
79
92
|
* 2018-06-19 [8f2da34](../../commit/8f2da34) - __(Simeon Movchan)__ fix: exclude \n and \t in nonprintable characters
|
data/README.md
CHANGED
@@ -22,6 +22,31 @@ Or install it yourself as:
|
|
22
22
|
|
23
23
|
TODO: Write usage instructions here
|
24
24
|
|
25
|
+
### Transliteration
|
26
|
+
|
27
|
+
Usage: ```StringTools.transliteration_variations(<string>)```.
|
28
|
+
Method returns an Array of Strings. Returned strings are: given string, string in different keboard layout and transliteration of whichever of first two string happens to be in Russian.
|
29
|
+
If there is a char in strng which isn't a part of RU <-> EN keyboard mapping, or string containes both Russian and English chars, only given string wrapped in Array is returned.
|
30
|
+
Examples:
|
31
|
+
```ruby
|
32
|
+
StringTools.transliteration_variations('"Мы почитаем всех нулями, А единицами — себя." - А. С. Пушкин')
|
33
|
+
=> ["\"Мы почитаем всех нулями, А единицами — себя.\" - А. С. Пушкин",
|
34
|
+
"@Vs gjxbnftv dct[ yekzvb? F tlbybwfvb — ct,z/@ - F/ C/ Geirby",
|
35
|
+
"\"My` pochitaem vsex nulyami, A ediniczami — sebya.\" - A. S. Pushkin"]
|
36
|
+
```
|
37
|
+
```ruby
|
38
|
+
StringTools.transliteration_variations('Ntrcn d ytdthyjq hfcrkflrt')
|
39
|
+
=> ["Ntrcn d ytdthyjq hfcrkflrt", "Текст в неверной раскладке", "Tekst v nevernoj raskladke"]
|
40
|
+
```
|
41
|
+
```ruby
|
42
|
+
StringTools.transliteration_variations('Еуче шт цкщтп лунищфкв дфнщгею')
|
43
|
+
=> ["Еуче шт цкщтп лунищфкв дфнщгею", "Text in wrong keyboard layout.", "Euche sht czkshhtp lunishhfkv dfns hhge."]
|
44
|
+
```
|
45
|
+
```ruby
|
46
|
+
StringTools.transliteration_variations('ﻮﻴﻜﻴﺒﻳﺪﻳ')
|
47
|
+
=> ["ﻮﻴﻜﻴﺒﻳﺪﻳ"]
|
48
|
+
```
|
49
|
+
|
25
50
|
## Development
|
26
51
|
|
27
52
|
After checking out the repo, run `bundle install` to install dependencies. Then, run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/lib/string_tools.rb
CHANGED
@@ -9,6 +9,9 @@ module StringTools
|
|
9
9
|
autoload :HTML, 'string_tools/html'
|
10
10
|
|
11
11
|
module CharDet
|
12
|
+
CP1251_COMPATIBLE_ENCODINGS =
|
13
|
+
%w(windows-1253 windows-1254 windows-1255 windows-1256 windows-1258 EUC-TW ISO-8859-8).freeze
|
14
|
+
|
12
15
|
# Возвращает true если строка содержит допустимую
|
13
16
|
# последовательность байтов для кодировки utf8 и false в обратном случае
|
14
17
|
# см. http://en.wikipedia.org/wiki/UTF-8
|
@@ -29,18 +32,6 @@ module StringTools
|
|
29
32
|
def to_cp1251(str)
|
30
33
|
str.to_cp1251
|
31
34
|
end
|
32
|
-
|
33
|
-
def cp1251_compatible_encodings
|
34
|
-
[
|
35
|
-
'windows-1253',
|
36
|
-
'windows-1254',
|
37
|
-
'windows-1255',
|
38
|
-
'windows-1256',
|
39
|
-
'windows-1258',
|
40
|
-
'EUC-TW',
|
41
|
-
'ISO-8859-8'
|
42
|
-
]
|
43
|
-
end
|
44
35
|
end
|
45
36
|
extend CharDet
|
46
37
|
|
@@ -256,4 +247,232 @@ module StringTools
|
|
256
247
|
end
|
257
248
|
end
|
258
249
|
extend Uri
|
250
|
+
|
251
|
+
module Transliteration
|
252
|
+
LAYOUT_EN_TO_RU_MAP = {
|
253
|
+
'q' => 'й', 'Q' => 'Й',
|
254
|
+
'w' => 'ц', 'W' => 'Ц',
|
255
|
+
'e' => 'у', 'E' => 'У',
|
256
|
+
'r' => 'к', 'R' => 'К',
|
257
|
+
't' => 'е', 'T' => 'Е',
|
258
|
+
'y' => 'н', 'Y' => 'Н',
|
259
|
+
'u' => 'г', 'U' => 'Г',
|
260
|
+
'i' => 'ш', 'I' => 'Ш',
|
261
|
+
'o' => 'щ', 'O' => 'Щ',
|
262
|
+
'p' => 'з', 'P' => 'З',
|
263
|
+
'[' => 'х',
|
264
|
+
'{' => 'Х',
|
265
|
+
']' => 'ъ',
|
266
|
+
'}' => 'Ъ',
|
267
|
+
'|' => '/',
|
268
|
+
'`' => 'ё',
|
269
|
+
'~' => 'Ё',
|
270
|
+
'a' => 'ф', 'A' => 'Ф',
|
271
|
+
's' => 'ы', 'S' => 'Ы',
|
272
|
+
'd' => 'в', 'D' => 'В',
|
273
|
+
'f' => 'а', 'F' => 'А',
|
274
|
+
'g' => 'п', 'G' => 'П',
|
275
|
+
'h' => 'р', 'H' => 'Р',
|
276
|
+
'j' => 'о', 'J' => 'О',
|
277
|
+
'k' => 'л', 'K' => 'Л',
|
278
|
+
'l' => 'д', 'L' => 'Д',
|
279
|
+
';' => 'ж',
|
280
|
+
':' => 'Ж',
|
281
|
+
"'" => 'э',
|
282
|
+
'"' => 'Э',
|
283
|
+
'z' => 'я', 'Z' => 'Я',
|
284
|
+
'x' => 'ч', 'X' => 'Ч',
|
285
|
+
'c' => 'с', 'C' => 'С',
|
286
|
+
'v' => 'м', 'V' => 'М',
|
287
|
+
'b' => 'и', 'B' => 'И',
|
288
|
+
'n' => 'т', 'N' => 'Т',
|
289
|
+
'm' => 'ь', 'M' => 'Ь',
|
290
|
+
',' => 'б',
|
291
|
+
'<' => 'Б',
|
292
|
+
'.' => 'ю',
|
293
|
+
'>' => 'Ю',
|
294
|
+
'/' => '.',
|
295
|
+
'?' => ',',
|
296
|
+
'@' => '"',
|
297
|
+
'#' => '№',
|
298
|
+
'$' => ';',
|
299
|
+
'^' => ':',
|
300
|
+
'&' => '?'
|
301
|
+
}.freeze
|
302
|
+
LAYOUT_RU_TO_EN_MAP = {
|
303
|
+
'й' => 'q', 'Й' => 'Q',
|
304
|
+
'ц' => 'w', 'Ц' => 'W',
|
305
|
+
'у' => 'e', 'У' => 'E',
|
306
|
+
'к' => 'r', 'К' => 'R',
|
307
|
+
'е' => 't', 'Е' => 'T',
|
308
|
+
'н' => 'y', 'Н' => 'Y',
|
309
|
+
'г' => 'u', 'Г' => 'U',
|
310
|
+
'ш' => 'i', 'Ш' => 'I',
|
311
|
+
'щ' => 'o', 'Щ' => 'O',
|
312
|
+
'з' => 'p', 'З' => 'P',
|
313
|
+
'х' => '[',
|
314
|
+
'Х' => '{',
|
315
|
+
'ъ' => ']',
|
316
|
+
'Ъ' => '}',
|
317
|
+
'/' => '|',
|
318
|
+
'ё' => '`',
|
319
|
+
'Ё' => '~',
|
320
|
+
'ф' => 'a', 'Ф' => 'A',
|
321
|
+
'ы' => 's', 'Ы' => 'S',
|
322
|
+
'в' => 'd', 'В' => 'D',
|
323
|
+
'а' => 'f', 'А' => 'F',
|
324
|
+
'п' => 'g', 'П' => 'G',
|
325
|
+
'р' => 'h', 'Р' => 'H',
|
326
|
+
'о' => 'j', 'О' => 'J',
|
327
|
+
'л' => 'k', 'Л' => 'K',
|
328
|
+
'д' => 'l', 'Д' => 'L',
|
329
|
+
'ж' => ';',
|
330
|
+
'Ж' => ':',
|
331
|
+
'э' => "'",
|
332
|
+
'Э' => '"',
|
333
|
+
'я' => 'z', 'Я' => 'Z',
|
334
|
+
'ч' => 'x', 'Ч' => 'X',
|
335
|
+
'с' => 'c', 'С' => 'C',
|
336
|
+
'м' => 'v', 'М' => 'V',
|
337
|
+
'и' => 'b', 'И' => 'B',
|
338
|
+
'т' => 'n', 'Т' => 'N',
|
339
|
+
'ь' => 'm', 'Ь' => 'M',
|
340
|
+
'б' => ',',
|
341
|
+
'Б' => '<',
|
342
|
+
'ю' => '.',
|
343
|
+
'Ю' => '>',
|
344
|
+
'.' => '/',
|
345
|
+
',' => '?',
|
346
|
+
'"' => '@',
|
347
|
+
'№' => '#',
|
348
|
+
';' => '$',
|
349
|
+
':' => '^',
|
350
|
+
'?' => '&'
|
351
|
+
}.freeze
|
352
|
+
LAYOUT_PERSISTENT = {
|
353
|
+
'0' => '0',
|
354
|
+
'1' => '1',
|
355
|
+
'2' => '2',
|
356
|
+
'3' => '3',
|
357
|
+
'4' => '4',
|
358
|
+
'5' => '5',
|
359
|
+
'6' => '6',
|
360
|
+
'7' => '7',
|
361
|
+
'8' => '8',
|
362
|
+
'9' => '9',
|
363
|
+
'!' => '!',
|
364
|
+
'*' => '*',
|
365
|
+
'(' => '(',
|
366
|
+
')' => ')',
|
367
|
+
' ' => ' ',
|
368
|
+
'-' => '-',
|
369
|
+
'—' => '—',
|
370
|
+
'_' => '_',
|
371
|
+
'=' => '=',
|
372
|
+
'+' => '+'
|
373
|
+
}.freeze
|
374
|
+
TRANSLIT_RU_TO_EN_MAP = {
|
375
|
+
'щ' => 'shh', 'Щ' => 'Shh',
|
376
|
+
'ё' => 'yo', 'Ё' => 'Yo',
|
377
|
+
'ж' => 'zh', 'Ж' => 'Zh',
|
378
|
+
'ц' => 'cz', 'Ц' => 'Cz',
|
379
|
+
'ч' => 'ch', 'Ч' => 'Ch',
|
380
|
+
'ш' => 'sh', 'Ш' => 'Sh',
|
381
|
+
'ъ' => '``', 'Ъ' => '``',
|
382
|
+
'ы' => 'y`', 'Ы' => 'Y`',
|
383
|
+
'э' => 'e`', 'Э' => 'E`',
|
384
|
+
'ю' => 'yu', 'Ю' => 'Yu',
|
385
|
+
'я' => 'ya', 'Я' => 'Ya',
|
386
|
+
'а' => 'a', 'А' => 'A',
|
387
|
+
'б' => 'b', 'Б' => 'B',
|
388
|
+
'в' => 'v', 'В' => 'V',
|
389
|
+
'г' => 'g', 'Г' => 'G',
|
390
|
+
'д' => 'd', 'Д' => 'D',
|
391
|
+
'е' => 'e', 'Е' => 'E',
|
392
|
+
'з' => 'z', 'З' => 'Z',
|
393
|
+
'и' => 'i', 'И' => 'I',
|
394
|
+
'й' => 'j', 'Й' => 'J',
|
395
|
+
'к' => 'k', 'К' => 'K',
|
396
|
+
'л' => 'l', 'Л' => 'L',
|
397
|
+
'м' => 'm', 'М' => 'M',
|
398
|
+
'н' => 'n', 'Н' => 'N',
|
399
|
+
'о' => 'o', 'О' => 'O',
|
400
|
+
'п' => 'p', 'П' => 'P',
|
401
|
+
'р' => 'r', 'Р' => 'R',
|
402
|
+
'с' => 's', 'С' => 'S',
|
403
|
+
'т' => 't', 'Т' => 'T',
|
404
|
+
'у' => 'u', 'У' => 'U',
|
405
|
+
'ф' => 'f', 'Ф' => 'F',
|
406
|
+
'х' => 'x', 'Х' => 'X',
|
407
|
+
'ь' => '`', 'Ь' => '`'
|
408
|
+
}.freeze
|
409
|
+
|
410
|
+
# Public: варианты строки с учетом смены раскладки и/или транслитерации для Русского и Английского языков
|
411
|
+
# Смена раскладки выполняется в обе стороны, транслитерация - с Русского на Английский.
|
412
|
+
#
|
413
|
+
# str - String
|
414
|
+
#
|
415
|
+
# Examples
|
416
|
+
# transliteration_variations('Ruby')
|
417
|
+
# => ['Ruby', 'Кгин', 'kgin']
|
418
|
+
# transliteration_variations('Слово')
|
419
|
+
# => ['Слово', 'ckjdj', 'slovo']
|
420
|
+
# transliteration_variations('КомпанияPro')
|
421
|
+
# => ['КомпанияPro']
|
422
|
+
# transliteration_variations('ويكيبيدي')
|
423
|
+
# => ['ويكيبيدي']
|
424
|
+
#
|
425
|
+
# returns Array of String
|
426
|
+
def transliteration_variations(str)
|
427
|
+
str_as_chars = str.chars
|
428
|
+
converted = convert_layout(str_as_chars)
|
429
|
+
|
430
|
+
layout_swap = converted[:chars].try(:join)
|
431
|
+
tranliterated = (converted[:was_ru] ? transliterate(str_as_chars) : transliterate(converted[:chars])).try(:join)
|
432
|
+
|
433
|
+
[str, layout_swap, tranliterated].tap(&:compact!)
|
434
|
+
end
|
435
|
+
|
436
|
+
private
|
437
|
+
|
438
|
+
# Internal: Смена раскладки массива символов, ru <-> en.
|
439
|
+
# Возвращает Hash с двумя ключами:
|
440
|
+
# :chars - Array, символы в другой раскладке(nil если не удалось сменить раскладку)
|
441
|
+
# :was_ru - Bool, принадлежали ли все символы русскому языку.
|
442
|
+
#
|
443
|
+
# splitted_string - Array of String
|
444
|
+
#
|
445
|
+
# Example:
|
446
|
+
# convert_layout(['a', 'b', 'c']) =>
|
447
|
+
# {chars: ['ф', 'и', 'с'], was_ru: false}
|
448
|
+
# convert_layout(['а', 'б', 'в']) =>
|
449
|
+
# {chars: ['f', ',', 'd'], was_ru: true}
|
450
|
+
# convert_layout(['ﻮ', 'ﻴ', 'ﻜ']) =>
|
451
|
+
# {chars: nil, was_ru: false}
|
452
|
+
#
|
453
|
+
# returns Array
|
454
|
+
def convert_layout(splitted_string)
|
455
|
+
str_arr = splitted_string.map do |char|
|
456
|
+
LAYOUT_RU_TO_EN_MAP[char] || LAYOUT_PERSISTENT[char] || break
|
457
|
+
end
|
458
|
+
|
459
|
+
return {chars: str_arr, was_ru: true} if str_arr
|
460
|
+
|
461
|
+
{chars: splitted_string.map { |char| LAYOUT_EN_TO_RU_MAP[char] || LAYOUT_PERSISTENT[char] || break },
|
462
|
+
was_ru: false}
|
463
|
+
end
|
464
|
+
|
465
|
+
# Internal: Транслитерация массива символов, ru -> en
|
466
|
+
# Если символа нет в словаре, не изменяет его.
|
467
|
+
#
|
468
|
+
# splitted string - Array of String
|
469
|
+
#
|
470
|
+
# Returns Array
|
471
|
+
def transliterate(splitted_string)
|
472
|
+
return unless splitted_string
|
473
|
+
|
474
|
+
splitted_string.map { |char| TRANSLIT_RU_TO_EN_MAP[char] || char }
|
475
|
+
end
|
476
|
+
end
|
477
|
+
extend Transliteration
|
259
478
|
end
|
@@ -154,10 +154,11 @@ class String
|
|
154
154
|
end
|
155
155
|
end
|
156
156
|
|
157
|
+
WIN_1251_ENCODING = 'windows-1251'.freeze
|
157
158
|
# shorthand
|
158
159
|
def detect_encoding
|
159
160
|
e = ::CharDet.detect(self)["encoding"]
|
160
|
-
e =
|
161
|
+
e = WIN_1251_ENCODING if StringTools::CharDet::CP1251_COMPATIBLE_ENCODINGS.include?(e)
|
161
162
|
e
|
162
163
|
end
|
163
164
|
|
data/lib/string_tools/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: string_tools
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.13.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sergey D.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2019-10-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: actionpack
|