unicode-confusable 1.12.0 → 1.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/Gemfile.lock +1 -1
- data/MIT-LICENSE.txt +1 -1
- data/README.md +7 -13
- data/data/confusable.marshal.gz +0 -0
- data/lib/unicode/confusable/constants.rb +2 -2
- data/spec/unicode_confusable_spec.rb +2 -2
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b0fb80c9f71641f4246d0b7fe969d6495c09678168204cf88dbf660a08e83e96
|
4
|
+
data.tar.gz: 71b09a54d8a909ab9a4718b0d3a561572cf7325e8f91febafadcfdc4d68b6b42
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6b2d5fd1fd5fa69645693e266c4180edc4c6b042289f28f69a424f116d2f3ca4dd17560d824bd164942079c56891c016497b0eac9b5d3bd513e27b9ec03595cb
|
7
|
+
data.tar.gz: f4614d86cb2a393e8e83ddf83f3fd4d65d35ddfacc53dc192439eef1d79c692b0e025853010f6878adce8e8c7d17bbb86f1355f1de6cd939ad09ad9bdb18a3b3
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
data/MIT-LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
Compares two strings if they are visually confusable as described in [Unicode® Technical Standard #39](https://www.unicode.org/reports/tr39/#Confusable_Detection): Both strings get transformed into a skeleton format before comparing them. The skeleton is generated by normalizing the string ([NFD](http://unicode.org/reports/tr15/#Norm_Forms)), removing ignorable characters, replacing [confusable characters](https://unicode.org/Public/security/16.0.0/confusables.txt), and normalizing the string again.
|
4
4
|
|
5
|
-
Unicode version: **
|
5
|
+
Unicode version: **17.0.0** (September 2025)
|
6
6
|
|
7
7
|
\* The Unicode normalization [depends on your Ruby version](https://idiosyncratic-ruby.com/73-unicode-version-mapping.html)
|
8
8
|
|
@@ -35,37 +35,31 @@ Unicode::Confusable.skeleton "ℜ𝘂ᖯʏ" # => "Ruby"
|
|
35
35
|
|
36
36
|
### List
|
37
37
|
|
38
|
-
List all
|
38
|
+
List all characters that map to the confusable exemplar given:
|
39
39
|
|
40
40
|
```ruby
|
41
41
|
Unicode::Confusable.list("o", false)
|
42
|
-
# => ["ం", "ಂ", "ം", "ං", "०", "੦", "૦", "
|
42
|
+
# => ["ం", "ಂ", "ം", "ං", "०", "০", "੦", "૦", "୦", "௦", "౦", "൦", "๐", "໐", "၀", "០", "𑓐", "٥", "۵", "o", "ℴ", "𝐨", "𝑜", "𝒐", "𝓸", "𝔬", "𝕠", "𝖔", "𝗈", "𝗼", "𝘰", "𝙤", "𝚘", "ᴏ", "ᴑ", "ꬽ", "ο", "𝛐", "𝜊", "𝝄", "𝝾", "𝞸", "σ", "𝛔", "𝜎", "𝝈", "𝞂", "𝞼", "ⲟ", "ϭ", "о", "ჿ", "օ", "ס", "ه", "𞸤", "𞹤", "𞺄", "ﻫ", "ﻬ", "ﻪ", "ﻩ", "ھ", "ﮬ", "ﮭ", "ﮫ", "ﮪ", "ہ", "ﮨ", "ﮩ", "ﮧ", "ﮦ", "ە", "ഠ", "ဝ", "𐓪", "𑣈", "𑣗", "𐐬"]
|
43
43
|
```
|
44
44
|
|
45
45
|
If you omit the second parameter, it will also show confusables, where the given character is just a part of:
|
46
46
|
|
47
47
|
```ruby
|
48
48
|
Unicode::Confusable.list("o")
|
49
|
-
# => ["⒪", "ꜵ", "℅", "ᴔ", "ꭁ", "ꭂ", "ﷲ", "№", "ం", "ಂ", "ം", "ං", "०", "੦", "૦", "
|
49
|
+
# => ["⒪", "ꜵ", "℅", "ᴔ", "ꭁ", "ꭂ", "ﷲ", "№", "ం", "ಂ", "ം", "ං", "०", "০", "੦", "૦", "୦", "௦", "౦", "൦", "๐", "໐", "၀", "០", "𑓐", "٥", "۵", "o", "ℴ", "𝐨", "𝑜", "𝒐", "𝓸", "𝔬", "𝕠", "𝖔", "𝗈", "𝗼", "𝘰", "𝙤", "𝚘", "ᴏ", "ᴑ", "ꬽ", "ο", "𝛐", "𝜊", "𝝄", "𝝾", "𝞸", "σ", "𝛔", "𝜎", "𝝈", "𝞂", "𝞼", "ⲟ", "ϭ", "о", "ჿ", "օ", "ס", "ه", "𞸤", "𞹤", "𞺄", "ﻫ", "ﻬ", "ﻪ", "ﻩ", "ھ", "ﮬ", "ﮭ", "ﮫ", "ﮪ", "ہ", "ﮨ", "ﮩ", "ﮧ", "ﮦ", "ە", "ഠ", "ဝ", "𐓪", "𑣈", "𑣗", "𐐬", "ۿ", "ø", "ꬾ", "ɵ", "ꝋ", "ⲑ", "ө", "ѳ", "ꮎ", "ꮻ", "ꭴ", "ﳙ", "ơ", "œ", "ɶ", "∞", "ꝏ", "ꚙ", "ﳗ", "ﱑ", "ﳘ", "ﱒ", "ﶓ", "ﶔ", "ﱓ", "ﱔ", "ൟ", "თ", "တ", "ꭣ", "ﲠ", "ﳢ", "ﲥ", "ﳤ", "ﷻ", "ﴱ", "ﳨ", "ﴲ", "ﳪ", "ﷺ", "ﷷ", "ﳍ", "ﳖ", "ﳯ", "ﳞ", "ﳱ", "ﳦ", "ﲛ", "ﳠ", "ﯭ", "ﯬ"]
|
50
50
|
```
|
51
51
|
|
52
52
|
## No Bidi-Confusable Check
|
53
53
|
|
54
54
|
Testing for bidirectional confusables is currently not supported.
|
55
55
|
|
56
|
-
##
|
56
|
+
## Single-script / Mixed-script / Whole-script
|
57
57
|
|
58
|
-
TR 39 also describes mechanisms for
|
59
|
-
|
60
|
-
- Single-script confusable
|
61
|
-
- Mixed-script confusable
|
62
|
-
- Whole-script confusable
|
63
|
-
|
64
|
-
This is currently not supported by this gem.
|
58
|
+
TR 39 also describes mechanisms for further categorization of confusables. This is currently not part of this gem, however the [unicode-scripts gem](https://github.com/janlelis/unicode-scripts) does include mixed-script detection, which you can use for this purpose.
|
65
59
|
|
66
60
|
See [unicode-x](https://github.com/janlelis/unicode-x) for more Unicode related micro libraries.
|
67
61
|
|
68
62
|
## MIT License
|
69
63
|
|
70
|
-
- Copyright (C) 2016-
|
64
|
+
- Copyright (C) 2016-2025 Jan Lelis <https://janlelis.com>. Released under the MIT license.
|
71
65
|
- Unicode data: https://www.unicode.org/copyright.html#Exhibit1
|
data/data/confusable.marshal.gz
CHANGED
Binary file
|
@@ -2,8 +2,8 @@
|
|
2
2
|
|
3
3
|
module Unicode
|
4
4
|
module Confusable
|
5
|
-
VERSION = "1.
|
6
|
-
UNICODE_VERSION = "
|
5
|
+
VERSION = "1.13.0"
|
6
|
+
UNICODE_VERSION = "17.0.0"
|
7
7
|
DATA_DIRECTORY = File.expand_path(File.dirname(__FILE__) + "/../../../data/").freeze
|
8
8
|
INDEX_FILENAME = (DATA_DIRECTORY + "/confusable.marshal.gz").freeze
|
9
9
|
end
|
@@ -28,13 +28,13 @@ describe Unicode::Confusable do
|
|
28
28
|
|
29
29
|
describe ".list(char)" do
|
30
30
|
it "will return list of confusables for a character, also confusables where given character is part of" do
|
31
|
-
assert_equal ["⒪", "ꜵ", "℅", "ᴔ", "ꭁ", "ꭂ", "ﷲ", "№", "ం", "ಂ", "ം", "ං", "०", "੦", "૦", "
|
31
|
+
assert_equal ["⒪", "ꜵ", "℅", "ᴔ", "ꭁ", "ꭂ", "ﷲ", "№", "ం", "ಂ", "ം", "ං", "०", "০", "੦", "૦", "୦", "௦", "౦", "൦", "๐", "໐", "၀", "០", "𑓐", "٥", "۵", "o", "ℴ", "𝐨", "𝑜", "𝒐", "𝓸", "𝔬", "𝕠", "𝖔", "𝗈", "𝗼", "𝘰", "𝙤", "𝚘", "ᴏ", "ᴑ", "ꬽ", "ο", "𝛐", "𝜊", "𝝄", "𝝾", "𝞸", "σ", "𝛔", "𝜎", "𝝈", "𝞂", "𝞼", "ⲟ", "ϭ", "о", "ჿ", "օ", "ס", "ه", "𞸤", "𞹤", "𞺄", "ﻫ", "ﻬ", "ﻪ", "ﻩ", "ھ", "ﮬ", "ﮭ", "ﮫ", "ﮪ", "ہ", "ﮨ", "ﮩ", "ﮧ", "ﮦ", "ە", "ഠ", "ဝ", "𐓪", "𑣈", "𑣗", "𐐬", "ۿ", "ø", "ꬾ", "ɵ", "ꝋ", "ⲑ", "ө", "ѳ", "ꮎ", "ꮻ", "ꭴ", "ﳙ", "ơ", "œ", "ɶ", "∞", "ꝏ", "ꚙ", "ﳗ", "ﱑ", "ﳘ", "ﱒ", "ﶓ", "ﶔ", "ﱓ", "ﱔ", "ൟ", "თ", "တ", "ꭣ", "ﲠ", "ﳢ", "ﲥ", "ﳤ", "ﷻ", "ﴱ", "ﳨ", "ﴲ", "ﳪ", "ﷺ", "ﷷ", "ﳍ", "ﳖ", "ﳯ", "ﳞ", "ﳱ", "ﳦ", "ﲛ", "ﳠ", "ﯭ", "ﯬ"], Unicode::Confusable.list("o")
|
32
32
|
end
|
33
33
|
end
|
34
34
|
|
35
35
|
describe ".list(char, false)" do
|
36
36
|
it "will return list of confusables for a character, only direct confusables" do
|
37
|
-
assert_equal ["ం", "ಂ", "ം", "ං", "०", "੦", "૦", "
|
37
|
+
assert_equal ["ం", "ಂ", "ം", "ං", "०", "০", "੦", "૦", "୦", "௦", "౦", "൦", "๐", "໐", "၀", "០", "𑓐", "٥", "۵", "o", "ℴ", "𝐨", "𝑜", "𝒐", "𝓸", "𝔬", "𝕠", "𝖔", "𝗈", "𝗼", "𝘰", "𝙤", "𝚘", "ᴏ", "ᴑ", "ꬽ", "ο", "𝛐", "𝜊", "𝝄", "𝝾", "𝞸", "σ", "𝛔", "𝜎", "𝝈", "𝞂", "𝞼", "ⲟ", "ϭ", "о", "ჿ", "օ", "ס", "ه", "𞸤", "𞹤", "𞺄", "ﻫ", "ﻬ", "ﻪ", "ﻩ", "ھ", "ﮬ", "ﮭ", "ﮫ", "ﮪ", "ہ", "ﮨ", "ﮩ", "ﮧ", "ﮦ", "ە", "ഠ", "ဝ", "𐓪", "𑣈", "𑣗", "𐐬"], Unicode::Confusable.list("o", false)
|
38
38
|
end
|
39
39
|
end
|
40
40
|
end
|
metadata
CHANGED
@@ -1,16 +1,16 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unicode-confusable
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.13.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Lelis
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-09-09 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
|
-
description: "[Unicode
|
13
|
+
description: "[Unicode 17.0.0] Compares two strings if they are visually confusable
|
14
14
|
as described in Unicode® Technical Standard #39: Both strings get transformed into
|
15
15
|
a skeleton format before comparing them. The skeleton is generated by normalizing
|
16
16
|
the string, replacing confusable characters, and then normalizing the string again."
|