east_asian_width_simple 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/lib/east_asian_width_simple.rb +59 -0
- metadata +78 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 9bdea9f77fd2c6429ec5e7cd8e541717c1f06ab33f1c3b270cc2bb97c6723d7a
|
4
|
+
data.tar.gz: 27375d92f4329323c09fa6dba5cae653757dd2e32102c7f03dbb1f691dde8be9
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 1a71a14256e90f3d87cd0d9533cef3552aad0a9ac78a60a4f066f063598148a261f9b549bb76e11e5d4d28575853802a293d1b1ee1b11881a0f684ea67b166cb
|
7
|
+
data.tar.gz: 719022070f2f7afa493ddd58cb4cd9e8e8f662eb6ddfd3334e50335c9bc809744ad8cc92983ac7954a4024737713d2d870754e4638a6e6e9eca630fa28efebe1
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
class EastAsianWidthSimple
|
4
|
+
Error = Class.new(StandardError)
|
5
|
+
MissingCodePointError = Class.new(Error)
|
6
|
+
|
7
|
+
HEX_DIGIT_REGEXP = /\h/
|
8
|
+
PROPERTY_TO_WIDTH_MAP = { A: nil, F: 2, H: 1, N: nil, Na: 1, W: 2 }.freeze
|
9
|
+
|
10
|
+
def initialize(east_asian_width_txt_io)
|
11
|
+
@lookup_table = Array.new(2**21)
|
12
|
+
east_asian_width_txt_io.each_line do |line|
|
13
|
+
next unless line.start_with?(HEX_DIGIT_REGEXP)
|
14
|
+
|
15
|
+
code_point, property = line.split(' ').first.split(';')
|
16
|
+
if code_point.include?('..')
|
17
|
+
first, last = code_point.split('..')
|
18
|
+
@lookup_table.fill(property.to_sym, first.to_i(16)..last.to_i(16))
|
19
|
+
else
|
20
|
+
@lookup_table[code_point.to_i(16)] = property.to_sym
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def string_width(string)
|
26
|
+
string = string.encode(Encoding::UTF_8) unless string.encoding == Encoding::UTF_8
|
27
|
+
string.codepoints.sum { |codepoint| lookup_width(codepoint) }
|
28
|
+
end
|
29
|
+
|
30
|
+
def lookup_width(codepoint)
|
31
|
+
property = lookup(codepoint)
|
32
|
+
width = PROPERTY_TO_WIDTH_MAP[property]
|
33
|
+
if width.nil?
|
34
|
+
warn <<~WARNING_MESSAGE
|
35
|
+
The code point 0x#{codepoint.to_s(16)} has the property "#{property}" \
|
36
|
+
whose width is unknown.
|
37
|
+
WARNING_MESSAGE
|
38
|
+
return 1
|
39
|
+
end
|
40
|
+
width
|
41
|
+
end
|
42
|
+
|
43
|
+
def lookup(codepoint)
|
44
|
+
ret = @lookup_table[codepoint]
|
45
|
+
if ret.nil?
|
46
|
+
raise(
|
47
|
+
MissingCodePointError,
|
48
|
+
"Cannot find the code point 0x#{codepoint.to_s(16)} " \
|
49
|
+
'in the lookup table.'
|
50
|
+
)
|
51
|
+
end
|
52
|
+
|
53
|
+
ret
|
54
|
+
end
|
55
|
+
|
56
|
+
def inspect
|
57
|
+
"#<#{self.class}:#{object_id}>"
|
58
|
+
end
|
59
|
+
end
|
metadata
ADDED
@@ -0,0 +1,78 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: east_asian_width_simple
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Weihang Jian
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2022-06-11 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: rake
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '13.0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '13.0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: minitest
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '5.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '5.0'
|
41
|
+
description: 'east_asian_width_simple is a Ruby gem that calculates the visual width
|
42
|
+
of strings by parsing and looking up East Asian Width Property Data File, aka EastAsianWidth.txt,
|
43
|
+
which is defined in Unicode Standard Annex #11: East Asian Width. It aims to be
|
44
|
+
time-performant and easy to use.
|
45
|
+
|
46
|
+
'
|
47
|
+
email:
|
48
|
+
- tonytonyjan@gmail.com
|
49
|
+
executables: []
|
50
|
+
extensions: []
|
51
|
+
extra_rdoc_files: []
|
52
|
+
files:
|
53
|
+
- lib/east_asian_width_simple.rb
|
54
|
+
homepage: https://github.com/tonytonyjan/east_asian_width_simple
|
55
|
+
licenses:
|
56
|
+
- MIT
|
57
|
+
metadata: {}
|
58
|
+
post_install_message:
|
59
|
+
rdoc_options: []
|
60
|
+
require_paths:
|
61
|
+
- lib
|
62
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
67
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
|
+
- - ">="
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '0'
|
72
|
+
requirements: []
|
73
|
+
rubygems_version: 3.3.11
|
74
|
+
signing_key:
|
75
|
+
specification_version: 4
|
76
|
+
summary: 'Calculate string visual width by looking up EastAsianWidth.txt from UAX
|
77
|
+
#11: East Asian Width'
|
78
|
+
test_files: []
|