east_asian_width_simple 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/east_asian_width_simple.rb +59 -0
  3. metadata +78 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 9bdea9f77fd2c6429ec5e7cd8e541717c1f06ab33f1c3b270cc2bb97c6723d7a
4
+ data.tar.gz: 27375d92f4329323c09fa6dba5cae653757dd2e32102c7f03dbb1f691dde8be9
5
+ SHA512:
6
+ metadata.gz: 1a71a14256e90f3d87cd0d9533cef3552aad0a9ac78a60a4f066f063598148a261f9b549bb76e11e5d4d28575853802a293d1b1ee1b11881a0f684ea67b166cb
7
+ data.tar.gz: 719022070f2f7afa493ddd58cb4cd9e8e8f662eb6ddfd3334e50335c9bc809744ad8cc92983ac7954a4024737713d2d870754e4638a6e6e9eca630fa28efebe1
@@ -0,0 +1,59 @@
1
+ # frozen_string_literal: true
2
+
3
+ class EastAsianWidthSimple
4
+ Error = Class.new(StandardError)
5
+ MissingCodePointError = Class.new(Error)
6
+
7
+ HEX_DIGIT_REGEXP = /\h/
8
+ PROPERTY_TO_WIDTH_MAP = { A: nil, F: 2, H: 1, N: nil, Na: 1, W: 2 }.freeze
9
+
10
+ def initialize(east_asian_width_txt_io)
11
+ @lookup_table = Array.new(2**21)
12
+ east_asian_width_txt_io.each_line do |line|
13
+ next unless line.start_with?(HEX_DIGIT_REGEXP)
14
+
15
+ code_point, property = line.split(' ').first.split(';')
16
+ if code_point.include?('..')
17
+ first, last = code_point.split('..')
18
+ @lookup_table.fill(property.to_sym, first.to_i(16)..last.to_i(16))
19
+ else
20
+ @lookup_table[code_point.to_i(16)] = property.to_sym
21
+ end
22
+ end
23
+ end
24
+
25
+ def string_width(string)
26
+ string = string.encode(Encoding::UTF_8) unless string.encoding == Encoding::UTF_8
27
+ string.codepoints.sum { |codepoint| lookup_width(codepoint) }
28
+ end
29
+
30
+ def lookup_width(codepoint)
31
+ property = lookup(codepoint)
32
+ width = PROPERTY_TO_WIDTH_MAP[property]
33
+ if width.nil?
34
+ warn <<~WARNING_MESSAGE
35
+ The code point 0x#{codepoint.to_s(16)} has the property "#{property}" \
36
+ whose width is unknown.
37
+ WARNING_MESSAGE
38
+ return 1
39
+ end
40
+ width
41
+ end
42
+
43
+ def lookup(codepoint)
44
+ ret = @lookup_table[codepoint]
45
+ if ret.nil?
46
+ raise(
47
+ MissingCodePointError,
48
+ "Cannot find the code point 0x#{codepoint.to_s(16)} " \
49
+ 'in the lookup table.'
50
+ )
51
+ end
52
+
53
+ ret
54
+ end
55
+
56
+ def inspect
57
+ "#<#{self.class}:#{object_id}>"
58
+ end
59
+ end
metadata ADDED
@@ -0,0 +1,78 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: east_asian_width_simple
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Weihang Jian
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-06-11 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: rake
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '13.0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '13.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: minitest
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '5.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '5.0'
41
+ description: 'east_asian_width_simple is a Ruby gem that calculates the visual width
42
+ of strings by parsing and looking up East Asian Width Property Data File, aka EastAsianWidth.txt,
43
+ which is defined in Unicode Standard Annex #11: East Asian Width. It aims to be
44
+ time-performant and easy to use.
45
+
46
+ '
47
+ email:
48
+ - tonytonyjan@gmail.com
49
+ executables: []
50
+ extensions: []
51
+ extra_rdoc_files: []
52
+ files:
53
+ - lib/east_asian_width_simple.rb
54
+ homepage: https://github.com/tonytonyjan/east_asian_width_simple
55
+ licenses:
56
+ - MIT
57
+ metadata: {}
58
+ post_install_message:
59
+ rdoc_options: []
60
+ require_paths:
61
+ - lib
62
+ required_ruby_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
+ - - ">="
70
+ - !ruby/object:Gem::Version
71
+ version: '0'
72
+ requirements: []
73
+ rubygems_version: 3.3.11
74
+ signing_key:
75
+ specification_version: 4
76
+ summary: 'Calculate string visual width by looking up EastAsianWidth.txt from UAX
77
+ #11: East Asian Width'
78
+ test_files: []