east_asian_width_simple 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/lib/east_asian_width_simple.rb +59 -0
- metadata +78 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 9bdea9f77fd2c6429ec5e7cd8e541717c1f06ab33f1c3b270cc2bb97c6723d7a
|
|
4
|
+
data.tar.gz: 27375d92f4329323c09fa6dba5cae653757dd2e32102c7f03dbb1f691dde8be9
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 1a71a14256e90f3d87cd0d9533cef3552aad0a9ac78a60a4f066f063598148a261f9b549bb76e11e5d4d28575853802a293d1b1ee1b11881a0f684ea67b166cb
|
|
7
|
+
data.tar.gz: 719022070f2f7afa493ddd58cb4cd9e8e8f662eb6ddfd3334e50335c9bc809744ad8cc92983ac7954a4024737713d2d870754e4638a6e6e9eca630fa28efebe1
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
class EastAsianWidthSimple
|
|
4
|
+
Error = Class.new(StandardError)
|
|
5
|
+
MissingCodePointError = Class.new(Error)
|
|
6
|
+
|
|
7
|
+
HEX_DIGIT_REGEXP = /\h/
|
|
8
|
+
PROPERTY_TO_WIDTH_MAP = { A: nil, F: 2, H: 1, N: nil, Na: 1, W: 2 }.freeze
|
|
9
|
+
|
|
10
|
+
def initialize(east_asian_width_txt_io)
|
|
11
|
+
@lookup_table = Array.new(2**21)
|
|
12
|
+
east_asian_width_txt_io.each_line do |line|
|
|
13
|
+
next unless line.start_with?(HEX_DIGIT_REGEXP)
|
|
14
|
+
|
|
15
|
+
code_point, property = line.split(' ').first.split(';')
|
|
16
|
+
if code_point.include?('..')
|
|
17
|
+
first, last = code_point.split('..')
|
|
18
|
+
@lookup_table.fill(property.to_sym, first.to_i(16)..last.to_i(16))
|
|
19
|
+
else
|
|
20
|
+
@lookup_table[code_point.to_i(16)] = property.to_sym
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def string_width(string)
|
|
26
|
+
string = string.encode(Encoding::UTF_8) unless string.encoding == Encoding::UTF_8
|
|
27
|
+
string.codepoints.sum { |codepoint| lookup_width(codepoint) }
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def lookup_width(codepoint)
|
|
31
|
+
property = lookup(codepoint)
|
|
32
|
+
width = PROPERTY_TO_WIDTH_MAP[property]
|
|
33
|
+
if width.nil?
|
|
34
|
+
warn <<~WARNING_MESSAGE
|
|
35
|
+
The code point 0x#{codepoint.to_s(16)} has the property "#{property}" \
|
|
36
|
+
whose width is unknown.
|
|
37
|
+
WARNING_MESSAGE
|
|
38
|
+
return 1
|
|
39
|
+
end
|
|
40
|
+
width
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def lookup(codepoint)
|
|
44
|
+
ret = @lookup_table[codepoint]
|
|
45
|
+
if ret.nil?
|
|
46
|
+
raise(
|
|
47
|
+
MissingCodePointError,
|
|
48
|
+
"Cannot find the code point 0x#{codepoint.to_s(16)} " \
|
|
49
|
+
'in the lookup table.'
|
|
50
|
+
)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
ret
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def inspect
|
|
57
|
+
"#<#{self.class}:#{object_id}>"
|
|
58
|
+
end
|
|
59
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: east_asian_width_simple
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 1.0.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Weihang Jian
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: bin
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2022-06-11 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: rake
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '13.0'
|
|
20
|
+
type: :development
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '13.0'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: minitest
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '5.0'
|
|
34
|
+
type: :development
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '5.0'
|
|
41
|
+
description: 'east_asian_width_simple is a Ruby gem that calculates the visual width
|
|
42
|
+
of strings by parsing and looking up East Asian Width Property Data File, aka EastAsianWidth.txt,
|
|
43
|
+
which is defined in Unicode Standard Annex #11: East Asian Width. It aims to be
|
|
44
|
+
time-performant and easy to use.
|
|
45
|
+
|
|
46
|
+
'
|
|
47
|
+
email:
|
|
48
|
+
- tonytonyjan@gmail.com
|
|
49
|
+
executables: []
|
|
50
|
+
extensions: []
|
|
51
|
+
extra_rdoc_files: []
|
|
52
|
+
files:
|
|
53
|
+
- lib/east_asian_width_simple.rb
|
|
54
|
+
homepage: https://github.com/tonytonyjan/east_asian_width_simple
|
|
55
|
+
licenses:
|
|
56
|
+
- MIT
|
|
57
|
+
metadata: {}
|
|
58
|
+
post_install_message:
|
|
59
|
+
rdoc_options: []
|
|
60
|
+
require_paths:
|
|
61
|
+
- lib
|
|
62
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
63
|
+
requirements:
|
|
64
|
+
- - ">="
|
|
65
|
+
- !ruby/object:Gem::Version
|
|
66
|
+
version: '0'
|
|
67
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
68
|
+
requirements:
|
|
69
|
+
- - ">="
|
|
70
|
+
- !ruby/object:Gem::Version
|
|
71
|
+
version: '0'
|
|
72
|
+
requirements: []
|
|
73
|
+
rubygems_version: 3.3.11
|
|
74
|
+
signing_key:
|
|
75
|
+
specification_version: 4
|
|
76
|
+
summary: 'Calculate string visual width by looking up EastAsianWidth.txt from UAX
|
|
77
|
+
#11: East Asian Width'
|
|
78
|
+
test_files: []
|