coaster 1.4.1 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/coaster/core_ext/string.rb +254 -0
- data/lib/coaster/core_ext.rb +1 -0
- data/lib/coaster/version.rb +1 -1
- data/test/test_string.rb +128 -0
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8110bd462989d43e85e644085dd7dc030b567965396590104214cc68601de3bf
|
4
|
+
data.tar.gz: 0d4c15ba71199d0843f959c8761be6a1ed65b59660d3600c15c18136e0bab995
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 75d8e95a1f78b729222f80d06becde21150714ee7a8f6c3946d5b53e9236fb53b58caad0584f11d8ad36695921ebd66227132661dab2d60aa71618f57b0197e2
|
7
|
+
data.tar.gz: 701b12b20bbe4ff77b339fb4f0e6812f5662ba0d1bdbaf37265cb14574adfb055da8caa6b5a5686eda61bb08c2009c018891932cf5be6e767105a6ad6fe266ba
|
@@ -0,0 +1,254 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
# :nodoc:
|
4
|
+
class String
|
5
|
+
attr_accessor :lang
|
6
|
+
|
7
|
+
# 작품명에 쓰는 인용부호
|
8
|
+
def quote
|
9
|
+
case Hop.current_language.code
|
10
|
+
when "ja"
|
11
|
+
"『#{self}』"
|
12
|
+
else
|
13
|
+
"\"#{self}\""
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def contain_not_latin?; match?(/[^\p{Latin}\p{Common}]+/) end
|
18
|
+
def contain_hangul?; match?(/\p{Hangul}/) end
|
19
|
+
def contain_kana?; match?(/\p{Katakana}|\p{Hiragana}/) end
|
20
|
+
def contain_hira?; match?(/\p{Hiragana}/) end
|
21
|
+
def contain_kata?; match?(/\p{Katakana}/) end
|
22
|
+
def contain_han?; match?(/\p{Han}/) end
|
23
|
+
def contain_cjk?; match?(/\p{Han}|\p{Katakana}|\p{Hiragana}|\p{Hangul}/) end
|
24
|
+
def any_whitespace?; match?(/\s/) end
|
25
|
+
alias_method :hira?, :contain_hira?
|
26
|
+
alias_method :kata?, :contain_kata?
|
27
|
+
alias_method :hangul?, :contain_hangul?
|
28
|
+
|
29
|
+
def number?
|
30
|
+
/\A[-+]?\d+\z/ === self
|
31
|
+
end
|
32
|
+
|
33
|
+
def number_or_blank?
|
34
|
+
number? || blank?
|
35
|
+
end
|
36
|
+
|
37
|
+
# ko, ja, en 에 대해서만 사용해야함
|
38
|
+
def to_639_1
|
39
|
+
if contain_hangul? then "ko"
|
40
|
+
elsif contain_kana? || contain_han? then "ja"
|
41
|
+
else "en"
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def to_639_i6
|
46
|
+
number? ? "i6" : to_639_1
|
47
|
+
end
|
48
|
+
|
49
|
+
def to_utf8
|
50
|
+
return self if encoding.name == "UTF-8"
|
51
|
+
begin
|
52
|
+
encode("UTF-8")
|
53
|
+
rescue EncodingError
|
54
|
+
encode("UTF-8", invalid: :replace, undef: :replace, replace: "?")
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def markdown_escape(only_bold_mark = false)
|
59
|
+
if only_bold_mark
|
60
|
+
gsub(/([*])/) { "\\#{$1}" }
|
61
|
+
else
|
62
|
+
gsub(/([\\\`\*\_{}\[\]()\#\+\-\.\!])/) { "\\#{$1}" }
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# @note 반각 문자 -> 전각 문자
|
67
|
+
# @param [TrueClass, FalseClass] alpha
|
68
|
+
# @param [TrueClass, FalseClass] number
|
69
|
+
# @param [TrueClass, FalseClass] symbol
|
70
|
+
# @return [String]
|
71
|
+
def to_full_characters(alpha: true, number: true, symbol: false)
|
72
|
+
result = String.new
|
73
|
+
return result if self.blank?
|
74
|
+
|
75
|
+
(0...self.size).each do |i|
|
76
|
+
half_ord = self[i].ord
|
77
|
+
full_ord = half_ord + 0xfee0
|
78
|
+
char_ord = case half_ord
|
79
|
+
when 0x20 then 0x3000
|
80
|
+
when ('0'.ord)..('9'.ord) then number ? full_ord : half_ord
|
81
|
+
when ('A'.ord)..('Z'.ord) then alpha ? full_ord : half_ord
|
82
|
+
when ('a'.ord)..('z'.ord) then alpha ? full_ord : half_ord
|
83
|
+
when ('!'.ord)..('~'.ord) then symbol ? full_ord : half_ord
|
84
|
+
else half_ord
|
85
|
+
end
|
86
|
+
result << char_ord.chr('UTF-8')
|
87
|
+
end
|
88
|
+
result
|
89
|
+
end
|
90
|
+
|
91
|
+
# @note 전각 문자 -> 반각 문자
|
92
|
+
# @param [TrueClass, FalseClass] alpha
|
93
|
+
# @param [TrueClass, FalseClass] number
|
94
|
+
# @param [TrueClass, FalseClass] symbol
|
95
|
+
# @return [String]
|
96
|
+
def to_half_characters(alpha: true, number: true, symbol: false)
|
97
|
+
result = String.new
|
98
|
+
return result if self.blank?
|
99
|
+
|
100
|
+
(0...self.size).each do |i|
|
101
|
+
full_ord = self[i].ord
|
102
|
+
half_ord = full_ord - 0xfee0
|
103
|
+
char_ord = case full_ord
|
104
|
+
when 0x3000 then 0x20
|
105
|
+
when ('0'.ord + 0xfee0)..('9'.ord + 0xfee0) then number ? half_ord : full_ord
|
106
|
+
when ('A'.ord + 0xfee0)..('Z'.ord + 0xfee0) then alpha ? half_ord : full_ord
|
107
|
+
when ('a'.ord + 0xfee0)..('z'.ord + 0xfee0) then alpha ? half_ord : full_ord
|
108
|
+
when ('!'.ord + 0xfee0)..('~'.ord + 0xfee0) then symbol ? half_ord : full_ord
|
109
|
+
else full_ord
|
110
|
+
end
|
111
|
+
result << char_ord.chr('UTF-8')
|
112
|
+
end
|
113
|
+
result
|
114
|
+
end
|
115
|
+
|
116
|
+
def regularize_linebreak
|
117
|
+
gsub("\r\n", "\n").gsub("\r", "\n")
|
118
|
+
end
|
119
|
+
|
120
|
+
def sanitize_unprintable_character
|
121
|
+
# Sanitize Control Character Except \n\t
|
122
|
+
gsub(/(?![\n\t])\p{Cntrl}/, '')
|
123
|
+
end
|
124
|
+
|
125
|
+
def strip_tags
|
126
|
+
plain = self.regularize_linebreak.split("\n").map(&:strip)
|
127
|
+
.select(&:present?).join("\n")
|
128
|
+
plain.gsub!(/<(br(|\/| \/)|p(\/| \/))>+(?!\r|\n)/i, "\n")
|
129
|
+
plain.gsub!(/(<br>|<strong>|<i>|<\/?b>|<\/?p>|<\/?li>)/i, '')
|
130
|
+
plain.gsub!(/<?/i, '<')
|
131
|
+
plain.gsub!(/>?/i, '>')
|
132
|
+
plain.gsub!(/&#(5529[6-9]|55[3-9][0-9][0-9]|56[0-9][0-9][0-9]|57[0-2][0-9][0-9]|573[0-3][0-9]|5734[0-3]);/, '') # remove unsupported HTML entities. � ~ �
|
133
|
+
plain.scan(/\<+[\A(\p{Hangul}|\s|\w),!@&*?~]+\>+/).each do |text|
|
134
|
+
plain.gsub!(text, CGI.escapeHTML(text))
|
135
|
+
end
|
136
|
+
plain.strip!
|
137
|
+
CGI.unescapeHTML(plain.gsub(/<.+?>/, ""))
|
138
|
+
end
|
139
|
+
|
140
|
+
EMOJI_REGEX = /\u{1F3F4}\u{E0067}\u{E0062}(?:\u{E0077}\u{E006C}\u{E0073}|\u{E0073}\u{E0063}\u{E0074}|\u{E0065}\u{E006E}\u{E0067})\u{E007F}|(?:\u{1F9D1}\u{1F3FF}\u200D\u2764(?:\uFE0F\u200D(?:\u{1F48B}\u200D)?|\u200D(?:\u{1F48B}\u200D)?)\u{1F9D1}|\u{1F469}\u{1F3FF}\u200D\u{1F91D}\u200D[\u{1F468}\u{1F469}]|\u{1FAF1}\u{1F3FF}\u200D\u{1FAF2})[\u{1F3FB}-\u{1F3FE}]|(?:\u{1F9D1}\u{1F3FE}\u200D\u2764(?:\uFE0F\u200D(?:\u{1F48B}\u200D)?|\u200D(?:\u{1F48B}\u200D)?)\u{1F9D1}|\u{1F469}\u{1F3FE}\u200D\u{1F91D}\u200D[\u{1F468}\u{1F469}]|\u{1FAF1}\u{1F3FE}\u200D\u{1FAF2})[\u{1F3FB}-\u{1F3FD}\u{1F3FF}]|(?:\u{1F9D1}\u{1F3FD}\u200D\u2764(?:\uFE0F\u200D(?:\u{1F48B}\u200D)?|\u200D(?:\u{1F48B}\u200D)?)\u{1F9D1}|\u{1F469}\u{1F3FD}\u200D\u{1F91D}\u200D[\u{1F468}\u{1F469}]|\u{1FAF1}\u{1F3FD}\u200D\u{1FAF2})[\u{1F3FB}\u{1F3FC}\u{1F3FE}\u{1F3FF}]|(?:\u{1F9D1}\u{1F3FC}\u200D\u2764(?:\uFE0F\u200D(?:\u{1F48B}\u200D)?|\u200D(?:\u{1F48B}\u200D)?)\u{1F9D1}|\u{1F469}\u{1F3FC}\u200D\u{1F91D}\u200D[\u{1F468}\u{1F469}]|\u{1FAF1}\u{1F3FC}\u200D\u{1FAF2})[\u{1F3FB}\u{1F3FD}-\u{1F3FF}]|(?:\u{1F9D1}\u{1F3FB}\u200D\u2764(?:\uFE0F\u200D(?:\u{1F48B}\u200D)?|\u200D(?:\u{1F48B}\u200D)?)\u{1F9D1}|\u{1F469}\u{1F3FB}\u200D\u{1F91D}\u200D[\u{1F468}\u{1F469}]|\u{1FAF1}\u{1F3FB}\u200D\u{1FAF2})[\u{1F3FC}-\u{1F3FF}]|\u{1F468}(?:\u{1F3FB}(?:\u200D(?:\u2764(?:\uFE0F\u200D(?:\u{1F48B}\u200D\u{1F468}[\u{1F3FB}-\u{1F3FF}]|\u{1F468}[\u{1F3FB}-\u{1F3FF}])|\u200D(?:\u{1F48B}\u200D\u{1F468}[\u{1F3FB}-\u{1F3FF}]|\u{1F468}[\u{1F3FB}-\u{1F3FF}]))|\u{1F91D}\u200D\u{1F468}[\u{1F3FC}-\u{1F3FF}]|[\u2695\u2696\u2708]\uFE0F|[\u2695\u2696\u2708]|[\u{1F33E}\u{1F373}\u{1F37C}\u{1F393}\u{1F3A4}\u{1F3A8}\u{1F3EB}\u{1F3ED}\u{1F4BB}\u{1F4BC}\u{1F527}\u{1F52C}\u{1F680}\u{1F692}\u{1F9AF}-\u{1F9B3}\u{1F9BC}\u{1F9BD}]))?|[\u{1F3FC}-\u{1F3FF}]\u200D\u2764(?:\uFE0F\u200D(?:\u{1F48B}\u200D\u{1F468}[\u{1F3FB}-\u{1F3FF}]|\u{1F468}[\u{1F3FB}-\u{1F3FF}])|\u200D(?:\u{1F48B}\u200D\u{1F468}[\u{1F3FB}-\u{1F3FF}]|\u{1F468}[\u{1F3FB}-\u{1F3FF}]))|\u200D(?:\u2764(?:\uFE0F\u200D(?:\u{1F48B}\u200D)?|\u200D(?:\u{1F48B}\u200D)?)\u{1F468}|[\u{1F468}\u{1F469}]\u200D(?:\u{1F466}\u200D\u{1F466}|\u{1F467}\u200D[\u{1F466}\u{1F467}])|\u{1F466}\u200D\u{1F466}|\u{1F467}\u200D[\u{1F466}\u{1F467}]|[\u{1F33E}\u{1F373}\u{1F37C}\u{1F393}\u{1F3A4}\u{1F3A8}\u{1F3EB}\u{1F3ED}\u{1F4BB}\u{1F4BC}\u{1F527}\u{1F52C}\u{1F680}\u{1F692}\u{1F9AF}-\u{1F9B3}\u{1F9BC}\u{1F9BD}])|\u{1F3FF}\u200D(?:\u{1F91D}\u200D\u{1F468}[\u{1F3FB}-\u{1F3FE}]|[\u{1F33E}\u{1F373}\u{1F37C}\u{1F393}\u{1F3A4}\u{1F3A8}\u{1F3EB}\u{1F3ED}\u{1F4BB}\u{1F4BC}\u{1F527}\u{1F52C}\u{1F680}\u{1F692}\u{1F9AF}-\u{1F9B3}\u{1F9BC}\u{1F9BD}])|\u{1F3FE}\u200D(?:\u{1F91D}\u200D\u{1F468}[\u{1F3FB}-\u{1F3FD}\u{1F3FF}]|[\u{1F33E}\u{1F373}\u{1F37C}\u{1F393}\u{1F3A4}\u{1F3A8}\u{1F3EB}\u{1F3ED}\u{1F4BB}\u{1F4BC}\u{1F527}\u{1F52C}\u{1F680}\u{1F692}\u{1F9AF}-\u{1F9B3}\u{1F9BC}\u{1F9BD}])|\u{1F3FD}\u200D(?:\u{1F91D}\u200D\u{1F468}[\u{1F3FB}\u{1F3FC}\u{1F3FE}\u{1F3FF}]|[\u{1F33E}\u{1F373}\u{1F37C}\u{1F393}\u{1F3A4}\u{1F3A8}\u{1F3EB}\u{1F3ED}\u{1F4BB}\u{1F4BC}\u{1F527}\u{1F52C}\u{1F680}\u{1F692}\u{1F9AF}-\u{1F9B3}\u{1F9BC}\u{1F9BD}])|\u{1F3FC}\u200D(?:\u{1F91D}\u200D\u{1F468}[\u{1F3FB}\u{1F3FD}-\u{1F3FF}]|[\u{1F33E}\u{1F373}\u{1F37C}\u{1F393}\u{1F3A4}\u{1F3A8}\u{1F3EB}\u{1F3ED}\u{1F4BB}\u{1F4BC}\u{1F527}\u{1F52C}\u{1F680}\u{1F692}\u{1F9AF}-\u{1F9B3}\u{1F9BC}\u{1F9BD}])|(?:\u{1F3FF}\u200D[\u2695\u2696\u2708]|\u{1F3FE}\u200D[\u2695\u2696\u2708]|\u{1F3FD}\u200D[\u2695\u2696\u2708]|\u{1F3FC}\u200D[\u2695\u2696\u2708]|\u200D[\u2695\u2696\u2708])\uFE0F|\u200D(?:[\u{1F468}\u{1F469}]\u200D[\u{1F466}\u{1F467}]|[\u{1F466}\u{1F467}])|\u{1F3FF}\u200D[\u2695\u2696\u2708]|\u{1F3FE}\u200D[\u2695\u2696\u2708]|\u{1F3FD}\u200D[\u2695\u2696\u2708]|\u{1F3FC}\u200D[\u2695\u2696\u2708]|\u{1F3FF}|\u{1F3FE}|\u{1F3FD}|\u{1F3FC}|\u200D[\u2695\u2696\u2708])?|(?:\u{1F469}(?:\u{1F3FB}\u200D\u2764(?:\uFE0F\u200D(?:\u{1F48B}\u200D[\u{1F468}\u{1F469}]|[\u{1F468}\u{1F469}])|\u200D(?:\u{1F48B}\u200D[\u{1F468}\u{1F469}]|[\u{1F468}\u{1F469}]))|[\u{1F3FC}-\u{1F3FF}]\u200D\u2764(?:\uFE0F\u200D(?:\u{1F48B}\u200D[\u{1F468}\u{1F469}]|[\u{1F468}\u{1F469}])|\u200D(?:\u{1F48B}\u200D[\u{1F468}\u{1F469}]|[\u{1F468}\u{1F469}])))|\u{1F9D1}[\u{1F3FB}-\u{1F3FF}]\u200D\u{1F91D}\u200D\u{1F9D1})[\u{1F3FB}-\u{1F3FF}]|\u{1F469}\u200D\u{1F469}\u200D(?:\u{1F466}\u200D\u{1F466}|\u{1F467}\u200D[\u{1F466}\u{1F467}])|\u{1F469}(?:\u200D(?:\u2764(?:\uFE0F\u200D(?:\u{1F48B}\u200D[\u{1F468}\u{1F469}]|[\u{1F468}\u{1F469}])|\u200D(?:\u{1F48B}\u200D[\u{1F468}\u{1F469}]|[\u{1F468}\u{1F469}]))|[\u{1F33E}\u{1F373}\u{1F37C}\u{1F393}\u{1F3A4}\u{1F3A8}\u{1F3EB}\u{1F3ED}\u{1F4BB}\u{1F4BC}\u{1F527}\u{1F52C}\u{1F680}\u{1F692}\u{1F9AF}-\u{1F9B3}\u{1F9BC}\u{1F9BD}])|\u{1F3FF}\u200D[\u{1F33E}\u{1F373}\u{1F37C}\u{1F393}\u{1F3A4}\u{1F3A8}\u{1F3EB}\u{1F3ED}\u{1F4BB}\u{1F4BC}\u{1F527}\u{1F52C}\u{1F680}\u{1F692}\u{1F9AF}-\u{1F9B3}\u{1F9BC}\u{1F9BD}]|\u{1F3FE}\u200D[\u{1F33E}\u{1F373}\u{1F37C}\u{1F393}\u{1F3A4}\u{1F3A8}\u{1F3EB}\u{1F3ED}\u{1F4BB}\u{1F4BC}\u{1F527}\u{1F52C}\u{1F680}\u{1F692}\u{1F9AF}-\u{1F9B3}\u{1F9BC}\u{1F9BD}]|\u{1F3FD}\u200D[\u{1F33E}\u{1F373}\u{1F37C}\u{1F393}\u{1F3A4}\u{1F3A8}\u{1F3EB}\u{1F3ED}\u{1F4BB}\u{1F4BC}\u{1F527}\u{1F52C}\u{1F680}\u{1F692}\u{1F9AF}-\u{1F9B3}\u{1F9BC}\u{1F9BD}]|\u{1F3FC}\u200D[\u{1F33E}\u{1F373}\u{1F37C}\u{1F393}\u{1F3A4}\u{1F3A8}\u{1F3EB}\u{1F3ED}\u{1F4BB}\u{1F4BC}\u{1F527}\u{1F52C}\u{1F680}\u{1F692}\u{1F9AF}-\u{1F9B3}\u{1F9BC}\u{1F9BD}]|\u{1F3FB}\u200D[\u{1F33E}\u{1F373}\u{1F37C}\u{1F393}\u{1F3A4}\u{1F3A8}\u{1F3EB}\u{1F3ED}\u{1F4BB}\u{1F4BC}\u{1F527}\u{1F52C}\u{1F680}\u{1F692}\u{1F9AF}-\u{1F9B3}\u{1F9BC}\u{1F9BD}])|\u{1F9D1}(?:\u200D(?:\u{1F91D}\u200D\u{1F9D1}|[\u{1F33E}\u{1F373}\u{1F37C}\u{1F384}\u{1F393}\u{1F3A4}\u{1F3A8}\u{1F3EB}\u{1F3ED}\u{1F4BB}\u{1F4BC}\u{1F527}\u{1F52C}\u{1F680}\u{1F692}\u{1F9AF}-\u{1F9B3}\u{1F9BC}\u{1F9BD}])|\u{1F3FF}\u200D[\u{1F33E}\u{1F373}\u{1F37C}\u{1F384}\u{1F393}\u{1F3A4}\u{1F3A8}\u{1F3EB}\u{1F3ED}\u{1F4BB}\u{1F4BC}\u{1F527}\u{1F52C}\u{1F680}\u{1F692}\u{1F9AF}-\u{1F9B3}\u{1F9BC}\u{1F9BD}]|\u{1F3FE}\u200D[\u{1F33E}\u{1F373}\u{1F37C}\u{1F384}\u{1F393}\u{1F3A4}\u{1F3A8}\u{1F3EB}\u{1F3ED}\u{1F4BB}\u{1F4BC}\u{1F527}\u{1F52C}\u{1F680}\u{1F692}\u{1F9AF}-\u{1F9B3}\u{1F9BC}\u{1F9BD}]|\u{1F3FD}\u200D[\u{1F33E}\u{1F373}\u{1F37C}\u{1F384}\u{1F393}\u{1F3A4}\u{1F3A8}\u{1F3EB}\u{1F3ED}\u{1F4BB}\u{1F4BC}\u{1F527}\u{1F52C}\u{1F680}\u{1F692}\u{1F9AF}-\u{1F9B3}\u{1F9BC}\u{1F9BD}]|\u{1F3FC}\u200D[\u{1F33E}\u{1F373}\u{1F37C}\u{1F384}\u{1F393}\u{1F3A4}\u{1F3A8}\u{1F3EB}\u{1F3ED}\u{1F4BB}\u{1F4BC}\u{1F527}\u{1F52C}\u{1F680}\u{1F692}\u{1F9AF}-\u{1F9B3}\u{1F9BC}\u{1F9BD}]|\u{1F3FB}\u200D[\u{1F33E}\u{1F373}\u{1F37C}\u{1F384}\u{1F393}\u{1F3A4}\u{1F3A8}\u{1F3EB}\u{1F3ED}\u{1F4BB}\u{1F4BC}\u{1F527}\u{1F52C}\u{1F680}\u{1F692}\u{1F9AF}-\u{1F9B3}\u{1F9BC}\u{1F9BD}])|\u{1F469}\u200D\u{1F466}\u200D\u{1F466}|\u{1F469}\u200D\u{1F469}\u200D[\u{1F466}\u{1F467}]|\u{1F469}\u200D\u{1F467}\u200D[\u{1F466}\u{1F467}]|(?:\u{1F441}\uFE0F?\u200D\u{1F5E8}|\u{1F9D1}(?:\u{1F3FF}\u200D[\u2695\u2696\u2708]|\u{1F3FE}\u200D[\u2695\u2696\u2708]|\u{1F3FD}\u200D[\u2695\u2696\u2708]|\u{1F3FC}\u200D[\u2695\u2696\u2708]|\u{1F3FB}\u200D[\u2695\u2696\u2708]|\u200D[\u2695\u2696\u2708])|\u{1F469}(?:\u{1F3FF}\u200D[\u2695\u2696\u2708]|\u{1F3FE}\u200D[\u2695\u2696\u2708]|\u{1F3FD}\u200D[\u2695\u2696\u2708]|\u{1F3FC}\u200D[\u2695\u2696\u2708]|\u{1F3FB}\u200D[\u2695\u2696\u2708]|\u200D[\u2695\u2696\u2708])|\u{1F636}\u200D\u{1F32B}|\u{1F3F3}\uFE0F?\u200D\u26A7|\u{1F43B}\u200D\u2744|(?:[\u{1F3C3}\u{1F3C4}\u{1F3CA}\u{1F46E}\u{1F470}\u{1F471}\u{1F473}\u{1F477}\u{1F481}\u{1F482}\u{1F486}\u{1F487}\u{1F645}-\u{1F647}\u{1F64B}\u{1F64D}\u{1F64E}\u{1F6A3}\u{1F6B4}-\u{1F6B6}\u{1F926}\u{1F935}\u{1F937}-\u{1F939}\u{1F93D}\u{1F93E}\u{1F9B8}\u{1F9B9}\u{1F9CD}-\u{1F9CF}\u{1F9D4}\u{1F9D6}-\u{1F9DD}][\u{1F3FB}-\u{1F3FF}]|[\u{1F46F}\u{1F9DE}\u{1F9DF}])\u200D[\u2640\u2642]|[\u26F9\u{1F3CB}\u{1F3CC}\u{1F575}](?:[\uFE0F\u{1F3FB}-\u{1F3FF}]\u200D[\u2640\u2642]|\u200D[\u2640\u2642])|\u{1F3F4}\u200D\u2620|[\u{1F3C3}\u{1F3C4}\u{1F3CA}\u{1F46E}\u{1F470}\u{1F471}\u{1F473}\u{1F477}\u{1F481}\u{1F482}\u{1F486}\u{1F487}\u{1F645}-\u{1F647}\u{1F64B}\u{1F64D}\u{1F64E}\u{1F6A3}\u{1F6B4}-\u{1F6B6}\u{1F926}\u{1F935}\u{1F937}-\u{1F939}\u{1F93C}-\u{1F93E}\u{1F9B8}\u{1F9B9}\u{1F9CD}-\u{1F9CF}\u{1F9D4}\u{1F9D6}-\u{1F9DD}]\u200D[\u2640\u2642]|[\u00A9\u00AE\u203C\u2049\u2122\u2139\u2194-\u2199\u21A9\u21AA\u231A\u231B\u2328\u23CF\u23ED-\u23EF\u23F1\u23F2\u23F8-\u23FA\u24C2\u25AA\u25AB\u25B6\u25C0\u25FB\u25FC\u25FE\u2600-\u2604\u260E\u2611\u2614\u2615\u2618\u2620\u2622\u2623\u2626\u262A\u262E\u262F\u2638-\u263A\u2640\u2642\u2648-\u2653\u265F\u2660\u2663\u2665\u2666\u2668\u267B\u267E\u267F\u2692\u2694-\u2697\u2699\u269B\u269C\u26A0\u26A7\u26AA\u26B0\u26B1\u26BD\u26BE\u26C4\u26C8\u26CF\u26D1\u26D3\u26E9\u26F0-\u26F5\u26F7\u26F8\u26FA\u2702\u2708\u2709\u270F\u2712\u2714\u2716\u271D\u2721\u2733\u2734\u2744\u2747\u2763\u27A1\u2934\u2935\u2B05-\u2B07\u2B1B\u2B1C\u2B55\u3030\u303D\u3297\u3299\u{1F004}\u{1F170}\u{1F171}\u{1F17E}\u{1F17F}\u{1F202}\u{1F237}\u{1F321}\u{1F324}-\u{1F32C}\u{1F336}\u{1F37D}\u{1F396}\u{1F397}\u{1F399}-\u{1F39B}\u{1F39E}\u{1F39F}\u{1F3CD}\u{1F3CE}\u{1F3D4}-\u{1F3DF}\u{1F3F5}\u{1F3F7}\u{1F43F}\u{1F4FD}\u{1F549}\u{1F54A}\u{1F56F}\u{1F570}\u{1F573}\u{1F576}-\u{1F579}\u{1F587}\u{1F58A}-\u{1F58D}\u{1F5A5}\u{1F5A8}\u{1F5B1}\u{1F5B2}\u{1F5BC}\u{1F5C2}-\u{1F5C4}\u{1F5D1}-\u{1F5D3}\u{1F5DC}-\u{1F5DE}\u{1F5E1}\u{1F5E3}\u{1F5E8}\u{1F5EF}\u{1F5F3}\u{1F5FA}\u{1F6CB}\u{1F6CD}-\u{1F6CF}\u{1F6E0}-\u{1F6E5}\u{1F6E9}\u{1F6F0}\u{1F6F3}])\uFE0F|\u{1F441}\uFE0F?\u200D\u{1F5E8}|\u{1F9D1}(?:\u{1F3FF}\u200D[\u2695\u2696\u2708]|\u{1F3FE}\u200D[\u2695\u2696\u2708]|\u{1F3FD}\u200D[\u2695\u2696\u2708]|\u{1F3FC}\u200D[\u2695\u2696\u2708]|\u{1F3FB}\u200D[\u2695\u2696\u2708]|\u200D[\u2695\u2696\u2708])|\u{1F469}(?:\u{1F3FF}\u200D[\u2695\u2696\u2708]|\u{1F3FE}\u200D[\u2695\u2696\u2708]|\u{1F3FD}\u200D[\u2695\u2696\u2708]|\u{1F3FC}\u200D[\u2695\u2696\u2708]|\u{1F3FB}\u200D[\u2695\u2696\u2708]|\u200D[\u2695\u2696\u2708])|\u{1F3F3}\uFE0F?\u200D\u{1F308}|\u{1F469}\u200D\u{1F467}|\u{1F469}\u200D\u{1F466}|\u{1F636}\u200D\u{1F32B}|\u{1F3F3}\uFE0F?\u200D\u26A7|\u{1F635}\u200D\u{1F4AB}|\u{1F62E}\u200D\u{1F4A8}|\u{1F415}\u200D\u{1F9BA}|\u{1FAF1}(?:\u{1F3FF}|\u{1F3FE}|\u{1F3FD}|\u{1F3FC}|\u{1F3FB})?|\u{1F9D1}(?:\u{1F3FF}|\u{1F3FE}|\u{1F3FD}|\u{1F3FC}|\u{1F3FB})?|\u{1F469}(?:\u{1F3FF}|\u{1F3FE}|\u{1F3FD}|\u{1F3FC}|\u{1F3FB})?|\u{1F43B}\u200D\u2744|(?:[\u{1F3C3}\u{1F3C4}\u{1F3CA}\u{1F46E}\u{1F470}\u{1F471}\u{1F473}\u{1F477}\u{1F481}\u{1F482}\u{1F486}\u{1F487}\u{1F645}-\u{1F647}\u{1F64B}\u{1F64D}\u{1F64E}\u{1F6A3}\u{1F6B4}-\u{1F6B6}\u{1F926}\u{1F935}\u{1F937}-\u{1F939}\u{1F93D}\u{1F93E}\u{1F9B8}\u{1F9B9}\u{1F9CD}-\u{1F9CF}\u{1F9D4}\u{1F9D6}-\u{1F9DD}][\u{1F3FB}-\u{1F3FF}]|[\u{1F46F}\u{1F9DE}\u{1F9DF}])\u200D[\u2640\u2642]|[\u26F9\u{1F3CB}\u{1F3CC}\u{1F575}](?:[\uFE0F\u{1F3FB}-\u{1F3FF}]\u200D[\u2640\u2642]|\u200D[\u2640\u2642])|\u{1F3F4}\u200D\u2620|\u{1F1FD}\u{1F1F0}|\u{1F1F6}\u{1F1E6}|\u{1F1F4}\u{1F1F2}|\u{1F408}\u200D\u2B1B|\u2764(?:\uFE0F\u200D[\u{1F525}\u{1FA79}]|\u200D[\u{1F525}\u{1FA79}])|\u{1F441}\uFE0F?|\u{1F3F3}\uFE0F?|[\u{1F3C3}\u{1F3C4}\u{1F3CA}\u{1F46E}\u{1F470}\u{1F471}\u{1F473}\u{1F477}\u{1F481}\u{1F482}\u{1F486}\u{1F487}\u{1F645}-\u{1F647}\u{1F64B}\u{1F64D}\u{1F64E}\u{1F6A3}\u{1F6B4}-\u{1F6B6}\u{1F926}\u{1F935}\u{1F937}-\u{1F939}\u{1F93C}-\u{1F93E}\u{1F9B8}\u{1F9B9}\u{1F9CD}-\u{1F9CF}\u{1F9D4}\u{1F9D6}-\u{1F9DD}]\u200D[\u2640\u2642]|\u{1F1FF}[\u{1F1E6}\u{1F1F2}\u{1F1FC}]|\u{1F1FE}[\u{1F1EA}\u{1F1F9}]|\u{1F1FC}[\u{1F1EB}\u{1F1F8}]|\u{1F1FB}[\u{1F1E6}\u{1F1E8}\u{1F1EA}\u{1F1EC}\u{1F1EE}\u{1F1F3}\u{1F1FA}]|\u{1F1FA}[\u{1F1E6}\u{1F1EC}\u{1F1F2}\u{1F1F3}\u{1F1F8}\u{1F1FE}\u{1F1FF}]|\u{1F1F9}[\u{1F1E6}\u{1F1E8}\u{1F1E9}\u{1F1EB}-\u{1F1ED}\u{1F1EF}-\u{1F1F4}\u{1F1F7}\u{1F1F9}\u{1F1FB}\u{1F1FC}\u{1F1FF}]|\u{1F1F8}[\u{1F1E6}-\u{1F1EA}\u{1F1EC}-\u{1F1F4}\u{1F1F7}-\u{1F1F9}\u{1F1FB}\u{1F1FD}-\u{1F1FF}]|\u{1F1F7}[\u{1F1EA}\u{1F1F4}\u{1F1F8}\u{1F1FA}\u{1F1FC}]|\u{1F1F5}[\u{1F1E6}\u{1F1EA}-\u{1F1ED}\u{1F1F0}-\u{1F1F3}\u{1F1F7}-\u{1F1F9}\u{1F1FC}\u{1F1FE}]|\u{1F1F3}[\u{1F1E6}\u{1F1E8}\u{1F1EA}-\u{1F1EC}\u{1F1EE}\u{1F1F1}\u{1F1F4}\u{1F1F5}\u{1F1F7}\u{1F1FA}\u{1F1FF}]|\u{1F1F2}[\u{1F1E6}\u{1F1E8}-\u{1F1ED}\u{1F1F0}-\u{1F1FF}]|\u{1F1F1}[\u{1F1E6}-\u{1F1E8}\u{1F1EE}\u{1F1F0}\u{1F1F7}-\u{1F1FB}\u{1F1FE}]|\u{1F1F0}[\u{1F1EA}\u{1F1EC}-\u{1F1EE}\u{1F1F2}\u{1F1F3}\u{1F1F5}\u{1F1F7}\u{1F1FC}\u{1F1FE}\u{1F1FF}]|\u{1F1EF}[\u{1F1EA}\u{1F1F2}\u{1F1F4}\u{1F1F5}]|\u{1F1EE}[\u{1F1E8}-\u{1F1EA}\u{1F1F1}-\u{1F1F4}\u{1F1F6}-\u{1F1F9}]|\u{1F1ED}[\u{1F1F0}\u{1F1F2}\u{1F1F3}\u{1F1F7}\u{1F1F9}\u{1F1FA}]|\u{1F1EC}[\u{1F1E6}\u{1F1E7}\u{1F1E9}-\u{1F1EE}\u{1F1F1}-\u{1F1F3}\u{1F1F5}-\u{1F1FA}\u{1F1FC}\u{1F1FE}]|\u{1F1EB}[\u{1F1EE}-\u{1F1F0}\u{1F1F2}\u{1F1F4}\u{1F1F7}]|\u{1F1EA}[\u{1F1E6}\u{1F1E8}\u{1F1EA}\u{1F1EC}\u{1F1ED}\u{1F1F7}-\u{1F1FA}]|\u{1F1E9}[\u{1F1EA}\u{1F1EC}\u{1F1EF}\u{1F1F0}\u{1F1F2}\u{1F1F4}\u{1F1FF}]|\u{1F1E8}[\u{1F1E6}\u{1F1E8}\u{1F1E9}\u{1F1EB}-\u{1F1EE}\u{1F1F0}-\u{1F1F5}\u{1F1F7}\u{1F1FA}-\u{1F1FF}]|\u{1F1E7}[\u{1F1E6}\u{1F1E7}\u{1F1E9}-\u{1F1EF}\u{1F1F1}-\u{1F1F4}\u{1F1F6}-\u{1F1F9}\u{1F1FB}\u{1F1FC}\u{1F1FE}\u{1F1FF}]|\u{1F1E6}[\u{1F1E8}-\u{1F1EC}\u{1F1EE}\u{1F1F1}\u{1F1F2}\u{1F1F4}\u{1F1F6}-\u{1F1FA}\u{1F1FC}\u{1F1FD}\u{1F1FF}]|[#\*0-9]\uFE0F?\u20E3|\u{1F93C}[\u{1F3FB}-\u{1F3FF}]|\u2764\uFE0F?|[\u{1F3C3}\u{1F3C4}\u{1F3CA}\u{1F46E}\u{1F470}\u{1F471}\u{1F473}\u{1F477}\u{1F481}\u{1F482}\u{1F486}\u{1F487}\u{1F645}-\u{1F647}\u{1F64B}\u{1F64D}\u{1F64E}\u{1F6A3}\u{1F6B4}-\u{1F6B6}\u{1F926}\u{1F935}\u{1F937}-\u{1F939}\u{1F93D}\u{1F93E}\u{1F9B8}\u{1F9B9}\u{1F9CD}-\u{1F9CF}\u{1F9D4}\u{1F9D6}-\u{1F9DD}][\u{1F3FB}-\u{1F3FF}]|[\u26F9\u{1F3CB}\u{1F3CC}\u{1F575}][\uFE0F\u{1F3FB}-\u{1F3FF}]?|\u{1F3F4}|[\u270A\u270B\u{1F385}\u{1F3C2}\u{1F3C7}\u{1F442}\u{1F443}\u{1F446}-\u{1F450}\u{1F466}\u{1F467}\u{1F46B}-\u{1F46D}\u{1F472}\u{1F474}-\u{1F476}\u{1F478}\u{1F47C}\u{1F483}\u{1F485}\u{1F48F}\u{1F491}\u{1F4AA}\u{1F57A}\u{1F595}\u{1F596}\u{1F64C}\u{1F64F}\u{1F6C0}\u{1F6CC}\u{1F90C}\u{1F90F}\u{1F918}-\u{1F91F}\u{1F930}-\u{1F934}\u{1F936}\u{1F977}\u{1F9B5}\u{1F9B6}\u{1F9BB}\u{1F9D2}\u{1F9D3}\u{1F9D5}\u{1FAC3}-\u{1FAC5}\u{1FAF0}\u{1FAF2}-\u{1FAF6}][\u{1F3FB}-\u{1F3FF}]|[\u261D\u270C\u270D\u{1F574}\u{1F590}][\uFE0F\u{1F3FB}-\u{1F3FF}]|[\u261D\u270A-\u270D\u{1F385}\u{1F3C2}\u{1F3C7}\u{1F408}\u{1F415}\u{1F43B}\u{1F442}\u{1F443}\u{1F446}-\u{1F450}\u{1F466}\u{1F467}\u{1F46B}-\u{1F46D}\u{1F472}\u{1F474}-\u{1F476}\u{1F478}\u{1F47C}\u{1F483}\u{1F485}\u{1F48F}\u{1F491}\u{1F4AA}\u{1F574}\u{1F57A}\u{1F590}\u{1F595}\u{1F596}\u{1F62E}\u{1F635}\u{1F636}\u{1F64C}\u{1F64F}\u{1F6C0}\u{1F6CC}\u{1F90C}\u{1F90F}\u{1F918}-\u{1F91F}\u{1F930}-\u{1F934}\u{1F936}\u{1F93C}\u{1F977}\u{1F9B5}\u{1F9B6}\u{1F9BB}\u{1F9D2}\u{1F9D3}\u{1F9D5}\u{1FAC3}-\u{1FAC5}\u{1FAF0}\u{1FAF2}-\u{1FAF6}]|[\u{1F3C3}\u{1F3C4}\u{1F3CA}\u{1F46E}\u{1F470}\u{1F471}\u{1F473}\u{1F477}\u{1F481}\u{1F482}\u{1F486}\u{1F487}\u{1F645}-\u{1F647}\u{1F64B}\u{1F64D}\u{1F64E}\u{1F6A3}\u{1F6B4}-\u{1F6B6}\u{1F926}\u{1F935}\u{1F937}-\u{1F939}\u{1F93D}\u{1F93E}\u{1F9B8}\u{1F9B9}\u{1F9CD}-\u{1F9CF}\u{1F9D4}\u{1F9D6}-\u{1F9DD}]|[\u{1F46F}\u{1F9DE}\u{1F9DF}]|[\u00A9\u00AE\u203C\u2049\u2122\u2139\u2194-\u2199\u21A9\u21AA\u231A\u231B\u2328\u23CF\u23ED-\u23EF\u23F1\u23F2\u23F8-\u23FA\u24C2\u25AA\u25AB\u25B6\u25C0\u25FB\u25FC\u25FE\u2600-\u2604\u260E\u2611\u2614\u2615\u2618\u2620\u2622\u2623\u2626\u262A\u262E\u262F\u2638-\u263A\u2640\u2642\u2648-\u2653\u265F\u2660\u2663\u2665\u2666\u2668\u267B\u267E\u267F\u2692\u2694-\u2697\u2699\u269B\u269C\u26A0\u26A7\u26AA\u26B0\u26B1\u26BD\u26BE\u26C4\u26C8\u26CF\u26D1\u26D3\u26E9\u26F0-\u26F5\u26F7\u26F8\u26FA\u2702\u2708\u2709\u270F\u2712\u2714\u2716\u271D\u2721\u2733\u2734\u2744\u2747\u2763\u27A1\u2934\u2935\u2B05-\u2B07\u2B1B\u2B1C\u2B55\u3030\u303D\u3297\u3299\u{1F004}\u{1F170}\u{1F171}\u{1F17E}\u{1F17F}\u{1F202}\u{1F237}\u{1F321}\u{1F324}-\u{1F32C}\u{1F336}\u{1F37D}\u{1F396}\u{1F397}\u{1F399}-\u{1F39B}\u{1F39E}\u{1F39F}\u{1F3CD}\u{1F3CE}\u{1F3D4}-\u{1F3DF}\u{1F3F5}\u{1F3F7}\u{1F43F}\u{1F4FD}\u{1F549}\u{1F54A}\u{1F56F}\u{1F570}\u{1F573}\u{1F576}-\u{1F579}\u{1F587}\u{1F58A}-\u{1F58D}\u{1F5A5}\u{1F5A8}\u{1F5B1}\u{1F5B2}\u{1F5BC}\u{1F5C2}-\u{1F5C4}\u{1F5D1}-\u{1F5D3}\u{1F5DC}-\u{1F5DE}\u{1F5E1}\u{1F5E3}\u{1F5E8}\u{1F5EF}\u{1F5F3}\u{1F5FA}\u{1F6CB}\u{1F6CD}-\u{1F6CF}\u{1F6E0}-\u{1F6E5}\u{1F6E9}\u{1F6F0}\u{1F6F3}]|[\u23E9-\u23EC\u23F0\u23F3\u25FD\u2693\u26A1\u26AB\u26C5\u26CE\u26D4\u26EA\u26FD\u2705\u2728\u274C\u274E\u2753-\u2755\u2757\u2795-\u2797\u27B0\u27BF\u2B50\u{1F0CF}\u{1F18E}\u{1F191}-\u{1F19A}\u{1F201}\u{1F21A}\u{1F22F}\u{1F232}-\u{1F236}\u{1F238}-\u{1F23A}\u{1F250}\u{1F251}\u{1F300}-\u{1F320}\u{1F32D}-\u{1F335}\u{1F337}-\u{1F37C}\u{1F37E}-\u{1F384}\u{1F386}-\u{1F393}\u{1F3A0}-\u{1F3C1}\u{1F3C5}\u{1F3C6}\u{1F3C8}\u{1F3C9}\u{1F3CF}-\u{1F3D3}\u{1F3E0}-\u{1F3F0}\u{1F3F8}-\u{1F407}\u{1F409}-\u{1F414}\u{1F416}-\u{1F43A}\u{1F43C}-\u{1F43E}\u{1F440}\u{1F444}\u{1F445}\u{1F451}-\u{1F465}\u{1F46A}\u{1F479}-\u{1F47B}\u{1F47D}-\u{1F480}\u{1F484}\u{1F488}-\u{1F48E}\u{1F490}\u{1F492}-\u{1F4A9}\u{1F4AB}-\u{1F4FC}\u{1F4FF}-\u{1F53D}\u{1F54B}-\u{1F54E}\u{1F550}-\u{1F567}\u{1F5A4}\u{1F5FB}-\u{1F62D}\u{1F62F}-\u{1F634}\u{1F637}-\u{1F644}\u{1F648}-\u{1F64A}\u{1F680}-\u{1F6A2}\u{1F6A4}-\u{1F6B3}\u{1F6B7}-\u{1F6BF}\u{1F6C1}-\u{1F6C5}\u{1F6D0}-\u{1F6D2}\u{1F6D5}-\u{1F6D7}\u{1F6DD}-\u{1F6DF}\u{1F6EB}\u{1F6EC}\u{1F6F4}-\u{1F6FC}\u{1F7E0}-\u{1F7EB}\u{1F7F0}\u{1F90D}\u{1F90E}\u{1F910}-\u{1F917}\u{1F920}-\u{1F925}\u{1F927}-\u{1F92F}\u{1F93A}\u{1F93F}-\u{1F945}\u{1F947}-\u{1F976}\u{1F978}-\u{1F9B4}\u{1F9B7}\u{1F9BA}\u{1F9BC}-\u{1F9CC}\u{1F9D0}\u{1F9E0}-\u{1F9FF}\u{1FA70}-\u{1FA74}\u{1FA78}-\u{1FA7C}\u{1FA80}-\u{1FA86}\u{1FA90}-\u{1FAAC}\u{1FAB0}-\u{1FABA}\u{1FAC0}-\u{1FAC2}\u{1FAD0}-\u{1FAD9}\u{1FAE0}-\u{1FAE7}]/
|
141
|
+
def contain_non_euc_kr_character?(ignore_emoji: true)
|
142
|
+
str = ignore_emoji ? gsub(EMOJI_REGEX, "_") : self
|
143
|
+
new_str = str.encode("EUC-KR", invalid: :replace, undef: :replace, replace: ".")
|
144
|
+
.encode('UTF-8', invalid: :replace, undef: :replace, replace: ".")
|
145
|
+
str != new_str
|
146
|
+
end
|
147
|
+
|
148
|
+
def length_with_emoji
|
149
|
+
each_grapheme_cluster.size
|
150
|
+
end
|
151
|
+
|
152
|
+
def full_width_strip
|
153
|
+
# override to strip fullwidth spaces
|
154
|
+
gsub(/(\A[\x00\t\n\v\f\r ]+)|([\x00\t\n\v\f\r ]+\z)/, '')
|
155
|
+
end
|
156
|
+
|
157
|
+
def full_width_strip!
|
158
|
+
gsub!(/(\A[\x00\t\n\v\f\r ]+)|([\x00\t\n\v\f\r ]+\z)/, '')
|
159
|
+
end
|
160
|
+
|
161
|
+
# https://github.com/jhawthorn/fullwidth/blob/master/lib/fullwidth/string_ext.rb
|
162
|
+
HALFWIDTH_KANA = '。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゙゚'
|
163
|
+
FULLWIDTH_KANA = '。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゛゜'
|
164
|
+
def to_fullwidth
|
165
|
+
tr(' !-~' + HALFWIDTH_KANA, "\u3000" + (0xFF01...0xFF5f).to_a.pack('U*') + FULLWIDTH_KANA)
|
166
|
+
end
|
167
|
+
|
168
|
+
HIRA_KATA_DIFF = ('ァ'.ord - 'ぁ'.ord).freeze
|
169
|
+
|
170
|
+
MIN_HIRA_NUMBER = 'ぁ'.ord.freeze
|
171
|
+
MAX_HIRA_NUMBER = 'ゖ'.ord.freeze
|
172
|
+
HIRA_SUFFIX_SET = ['ゝ', 'ゞ'].map { |c| c.ord }.to_set.freeze
|
173
|
+
|
174
|
+
MIN_KATA_NUMBER = (MIN_HIRA_NUMBER + HIRA_KATA_DIFF).freeze
|
175
|
+
MAX_KATA_NUMBER = (MAX_HIRA_NUMBER + HIRA_KATA_DIFF).freeze
|
176
|
+
KATA_SUFFIX_SET = HIRA_SUFFIX_SET.map { |num| num + HIRA_KATA_DIFF }.to_set.freeze
|
177
|
+
|
178
|
+
def to_hiragana!
|
179
|
+
(0...self.size).each do |i|
|
180
|
+
num = self[i].ord
|
181
|
+
self[i] = (num - HIRA_KATA_DIFF).chr if (MIN_KATA_NUMBER <= num && num <= MAX_KATA_NUMBER) || KATA_SUFFIX_SET.include?(num)
|
182
|
+
end
|
183
|
+
self
|
184
|
+
end
|
185
|
+
def to_hiragana
|
186
|
+
self.dup.to_hiragana!
|
187
|
+
end
|
188
|
+
|
189
|
+
def to_katakana!
|
190
|
+
(0...self.size).each do |i|
|
191
|
+
num = self[i].ord
|
192
|
+
self[i] = (num + HIRA_KATA_DIFF).chr if (MIN_HIRA_NUMBER <= num && num <= MAX_HIRA_NUMBER) || HIRA_SUFFIX_SET.include?(num)
|
193
|
+
end
|
194
|
+
self
|
195
|
+
end
|
196
|
+
def to_katakana
|
197
|
+
self.dup.to_katakana!
|
198
|
+
end
|
199
|
+
|
200
|
+
# 문자열에서 모든 공백 제거
|
201
|
+
def strip_spaces
|
202
|
+
gsub(/[[:space:]]/, "")
|
203
|
+
end
|
204
|
+
|
205
|
+
# 라인브레이크 제거
|
206
|
+
def strip_linebreaks
|
207
|
+
gsub(/\r\n/, " ").gsub(/[\r\n]/, " ")
|
208
|
+
end
|
209
|
+
|
210
|
+
def levenshtein(t)
|
211
|
+
String.levenshtein(self, t)
|
212
|
+
end
|
213
|
+
|
214
|
+
def match_all(needles)
|
215
|
+
String.match_all(self, needles)
|
216
|
+
end
|
217
|
+
|
218
|
+
class << self
|
219
|
+
# 두 문자열의 distance 계산
|
220
|
+
def levenshtein(s, t)
|
221
|
+
m = s.length
|
222
|
+
n = t.length
|
223
|
+
return m if n == 0
|
224
|
+
return n if m == 0
|
225
|
+
d = Array.new(m+1) {Array.new(n+1)}
|
226
|
+
|
227
|
+
(0..m).each {|i| d[i][0] = i}
|
228
|
+
(0..n).each {|j| d[0][j] = j}
|
229
|
+
(1..n).each do |j|
|
230
|
+
(1..m).each do |i|
|
231
|
+
d[i][j] = if s[i-1] == t[j-1] # adjust index into string
|
232
|
+
d[i-1][j-1] # no operation required
|
233
|
+
else
|
234
|
+
[ d[i-1][j]+1, # deletion
|
235
|
+
d[i][j-1]+1, # insertion
|
236
|
+
d[i-1][j-1]+1, # substitution
|
237
|
+
].min
|
238
|
+
end
|
239
|
+
end
|
240
|
+
end
|
241
|
+
d[m][n]
|
242
|
+
end
|
243
|
+
|
244
|
+
# 문자열을 haystack (전체 문자열)로 하여, 부분 문자열 list (needles)와 매칭되는 모든 문자열 반환
|
245
|
+
# ex. haystack = "abcabc", needles = ["a", "ab", "bc", "abc"]
|
246
|
+
# => ["a", "ab", "abc", "bc", "a", "ab", "abc", "bc"]
|
247
|
+
def match_all(haystack, needles)
|
248
|
+
return [] if needles.blank?
|
249
|
+
needles = Array.wrap(needles).flatten
|
250
|
+
matcher = AhoCorasickMatcher.new(needles)
|
251
|
+
matcher.match(haystack)
|
252
|
+
end
|
253
|
+
end
|
254
|
+
end
|
data/lib/coaster/core_ext.rb
CHANGED
data/lib/coaster/version.rb
CHANGED
data/test/test_string.rb
ADDED
@@ -0,0 +1,128 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
require 'minitest/autorun'
|
3
|
+
|
4
|
+
module Coaster
|
5
|
+
class TestString < Minitest::Test
|
6
|
+
def test_string
|
7
|
+
# hiragana -> katakana (to_katakana)
|
8
|
+
hiragana_seq = 'ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖゝゞ'
|
9
|
+
katakana_seq = 'ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヽヾ'
|
10
|
+
not_japanese_seq='안녕하세요_hello~world!_一二三'
|
11
|
+
mixed_str = hiragana_seq + not_japanese_seq
|
12
|
+
# 히라가나 -> 카타카나 1:1 변환 가능한가?
|
13
|
+
assert_equal hiragana_seq.to_katakana, katakana_seq
|
14
|
+
# 히라가나 -> 카타카나 -> 히라가나 변환시, 원래 string 유지되는가?
|
15
|
+
assert_equal hiragana_seq.to_katakana.to_hiragana, hiragana_seq
|
16
|
+
# 일본어가 아닌 string은 카타카나 변환 시도시 원본이 유지되는가?
|
17
|
+
assert_equal not_japanese_seq.to_katakana, not_japanese_seq
|
18
|
+
# 일본어와 아닌것이 섞여있는 문장에서, 히라가나'만' 카타카나로 변환되는가?
|
19
|
+
assert_equal mixed_str.to_katakana, katakana_seq + not_japanese_seq
|
20
|
+
# 일본어와 아닌것이 섞여있는 문장에서, 히라가나 -> 카타카나 -> 히라가나 변환시, 원래 string 유지되는가?
|
21
|
+
assert_equal mixed_str.to_katakana.to_hiragana, mixed_str
|
22
|
+
# 공백문자열에 다른게 추가되지는 않는가?
|
23
|
+
assert_equal ''.to_katakana, ''
|
24
|
+
|
25
|
+
# katakana -> hiragana (to_hiragana)
|
26
|
+
katakana_seq = 'ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヰヱヲンヴヵヶヽヾ'
|
27
|
+
hiragana_seq = 'ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖゝゞ'
|
28
|
+
not_japanese_seq='안녕하세요_hello~world!_一二三'
|
29
|
+
mixed_str = katakana_seq + not_japanese_seq
|
30
|
+
# 카타카나 -> 히라가나 1:1 변환 가능한가?
|
31
|
+
assert_equal katakana_seq.to_hiragana, hiragana_seq
|
32
|
+
# 카타카나 -> 히라가나 -> 카타카나 변환시, 원래 string 유지되는가?
|
33
|
+
assert_equal katakana_seq.to_hiragana.to_katakana, katakana_seq
|
34
|
+
# 일본어가 아닌 string은 히라가나 변환 시도시 원본이 유지되는가?
|
35
|
+
assert_equal not_japanese_seq.to_hiragana, not_japanese_seq
|
36
|
+
# 일본어와 아닌것이 섞여있는 문장에서, 카타카나'만' 히라가나로 변환되는가?
|
37
|
+
assert_equal mixed_str.to_hiragana, hiragana_seq + not_japanese_seq
|
38
|
+
# 일본어와 아닌것이 섞여있는 문장에서, 카타카나 -> 히라가나 -> 카타카나 변환시, 원래 string 유지되는가?
|
39
|
+
assert_equal mixed_str.to_hiragana.to_katakana, mixed_str
|
40
|
+
# 공백문자열에 다른게 추가되지는 않는가?
|
41
|
+
assert_equal ''.to_hiragana, ''
|
42
|
+
|
43
|
+
# half -> full width (to_full_characters)
|
44
|
+
half_seq = ' 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
|
45
|
+
full_seq = ' 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
|
46
|
+
not_target_half_seq = ((33..255).map { |c| c.chr('UTF-8') }.join.chars - half_seq.chars).join + '일이삼いちにさんイチニサン一二三'
|
47
|
+
mixed_str = half_seq + not_target_half_seq
|
48
|
+
# half -> full 1:1 잘 변환되는가?
|
49
|
+
assert_equal half_seq.to_full_characters, full_seq
|
50
|
+
# half -> full -> half 변환 시, 원래 string 유지되는가?
|
51
|
+
assert_equal half_seq.to_full_characters.to_half_characters, half_seq
|
52
|
+
# not_target string은 to full 변환 시도시 원본이 유지되는가?
|
53
|
+
assert_equal not_target_half_seq.to_full_characters, not_target_half_seq
|
54
|
+
# target / not_target이 섞여있는 문장에서, target'만' full로 변환되는가?
|
55
|
+
assert_equal mixed_str.to_full_characters, full_seq + not_target_half_seq
|
56
|
+
# target / not_target이 섞여있는 문장에서, half -> full -> half 변환시, 원래 string 유지되는가?
|
57
|
+
assert_equal mixed_str.to_full_characters.to_half_characters, mixed_str
|
58
|
+
# 공백문자열에 다른게 추가되지는 않는가?
|
59
|
+
assert_equal ''.to_full_characters, ''
|
60
|
+
|
61
|
+
# half -> full width (to_full_characters with symbol)
|
62
|
+
half_seq = ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'
|
63
|
+
full_seq = ' !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'
|
64
|
+
not_target_half_seq = ((33..255).map { |c| c.chr('UTF-8') }.join.chars - half_seq.chars).join + '일이삼いちにさんイチニサン一二三'
|
65
|
+
mixed_str = half_seq + not_target_half_seq
|
66
|
+
# half -> full 1:1 잘 변환되는가?
|
67
|
+
assert_equal half_seq.to_full_characters(symbol: true), full_seq
|
68
|
+
# half -> full -> half 변환 시, 원래 string 유지되는가?
|
69
|
+
assert_equal half_seq.to_full_characters(symbol: true).to_half_characters(symbol: true), half_seq
|
70
|
+
# not_target string은 to full 변환 시도시 원본이 유지되는가?
|
71
|
+
assert_equal not_target_half_seq.to_full_characters(symbol: true), not_target_half_seq
|
72
|
+
# target / not_target이 섞여있는 문장에서, target'만' full로 변환되는가?
|
73
|
+
assert_equal mixed_str.to_full_characters(symbol: true), full_seq + not_target_half_seq
|
74
|
+
# target / not_target이 섞여있는 문장에서, half -> full -> half 변환시, 원래 string 유지되는가?
|
75
|
+
assert_equal mixed_str.to_full_characters(symbol: true).to_half_characters(symbol: true), mixed_str
|
76
|
+
# 공백문자열에 다른게 추가되지는 않는가?
|
77
|
+
assert_equal ''.to_full_characters, ''
|
78
|
+
|
79
|
+
# full -> half width (to_half_characters)
|
80
|
+
half_seq = ' 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
|
81
|
+
full_seq = ' 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz'
|
82
|
+
not_target_full_seq = ((33..255).map { |c| (c + 0xfee0).chr('UTF-8') }.join.chars - full_seq.chars).join + '일이삼いちにさんイチニサン一二三'
|
83
|
+
mixed_str = full_seq + not_target_full_seq
|
84
|
+
# full -> half 1:1 잘 변환되는가?
|
85
|
+
assert_equal full_seq.to_half_characters, half_seq
|
86
|
+
# full -> half -> full 변환 시, 원래 string 유지되는가?
|
87
|
+
assert_equal full_seq.to_half_characters.to_full_characters, full_seq
|
88
|
+
# not_target string은 to half 변환 시도시 원본이 유지되는가?
|
89
|
+
assert_equal not_target_full_seq.to_half_characters, not_target_full_seq
|
90
|
+
# target / not_target이 섞여있는 문장에서, target'만' half로 변환되는가?
|
91
|
+
assert_equal mixed_str.to_half_characters, half_seq + not_target_full_seq
|
92
|
+
# target / not_target이 섞여있는 문장에서, full -> half -> full 변환시, 원래 string 유지되는가?
|
93
|
+
assert_equal mixed_str.to_half_characters.to_full_characters, mixed_str
|
94
|
+
# 공백문자열에 다른게 추가되지는 않는가?
|
95
|
+
assert_equal ''.to_half_characters, ''
|
96
|
+
|
97
|
+
# full -> half width (to_half_characters with symbol)
|
98
|
+
half_seq = ' !"#$%&\'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'
|
99
|
+
full_seq = ' !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~'
|
100
|
+
not_target_full_seq = ((33..255).map { |c| (c + 0xfee0).chr('UTF-8') }.join.chars - full_seq.chars).join + '일이삼いちにさんイチニサン一二三'
|
101
|
+
mixed_str = full_seq + not_target_full_seq
|
102
|
+
# full -> half 1:1 잘 변환되는가?
|
103
|
+
expect(full_seq.to_half_characters(symbol: true)).to eq half_seq
|
104
|
+
assert_equal full_seq.to_half_characters(symbol: true), half_seq
|
105
|
+
# full -> half -> full 변환 시, 원래 string 유지되는가?
|
106
|
+
expect(full_seq.to_half_characters(symbol: true).to_full_characters(symbol: true)).to eq full_seq
|
107
|
+
assert_equal full_seq.to_half_characters(symbol: true).to_full_characters(symbol: true), full_seq
|
108
|
+
# not_target string은 to half 변환 시도시 원본이 유지되는가?
|
109
|
+
expect(not_target_full_seq.to_half_characters(symbol: true)).to eq not_target_full_seq
|
110
|
+
assert_equal not_target_full_seq.to_half_characters(symbol: true), not_target_full_seq
|
111
|
+
# target / not_target이 섞여있는 문장에서, target'만' half로 변환되는가?
|
112
|
+
expect(mixed_str.to_half_characters(symbol: true)).to eq half_seq + not_target_full_seq
|
113
|
+
assert_equal mixed_str.to_half_characters(symbol: true), half_seq + not_target_full_seq
|
114
|
+
# target / not_target이 섞여있는 문장에서, full -> half -> full 변환시, 원래 string 유지되는가?
|
115
|
+
expect(mixed_str.to_half_characters(symbol: true).to_full_characters(symbol: true)).to eq mixed_str
|
116
|
+
assert_equal mixed_str.to_half_characters(symbol: true).to_full_characters(symbol: true), mixed_str
|
117
|
+
# 공백문자열에 다른게 추가되지는 않는가?
|
118
|
+
expect(''.to_half_characters(symbol: true)).to eq ''
|
119
|
+
assert_equal ''.to_half_characters(symbol: true), ''
|
120
|
+
|
121
|
+
# strips tags correctly
|
122
|
+
# 알라딘 "독거소녀 삐삐" 중 contents
|
123
|
+
old_str = "<p>목차<BR>\r\n\n<BR>\r\n\n<B>1부 괜찮아 사람이 되어도</B><BR>\r\n\n<BR>\r\n\n거절학개론 - 이 필수 교양서의 목차를 지운다 19<BR>\r\n\n소프트아이스크림 20<BR>\r\n\n말과 투구와 노새와 랩 22<BR>\r\n\n헝거 문Hunger Moon 24<BR>\r\n\n술병은 비고 스파이는 떠나요 25<BR>\r\n\n눈사람 소년 28<BR>\r\n\n유리로 망치를 깨서 탈출할까요 30<BR>\r\n\n목단꽃 무늬 접시 32<BR>\r\n\n포도 잎 일곱 장 33<BR>\r\n\n프랑스 자수가 놓인 식탁보 34<BR>\r\n\n흰 시간 검은 시간 36<BR>\r\n\n맑음 37<BR>\r\n\n해피 어스 데이 투 유Happy Earth Day to You 40<BR>\r\n\n아득한 아카펠라 42<BR>\r\n\n일기예보 45 <BR>\r\n\n<BR>\r\n\n<B>2부 농담의 힘을 믿고 끝까지</B><BR>\r\n\n<BR>\r\n\n지그시 51<BR>\r\n\n드디어 52<BR>\r\n\n아마도 53<BR>\r\n\n공중사원 56<BR>\r\n\n6월 60<BR>\r\n\n피노키오 62<BR>\r\n\n고군분투 63<BR>\r\n\n한 번도 본 적 없는 목소리가 손을 흔들면 64<BR>\r\n\n볕멍 67<BR>\r\n\n판탈롱 68<BR>\r\n\n만화소녀시대 70<BR>\r\n\n아침은 맑음, 오후는 모르겠어요 73<BR>\r\n\n뜨거운 취미 76<BR>\r\n\n눈물광대 80<BR>\r\n\n막막광대 81<BR>\r\n\n회의광대 84<BR>\r\n\n위임광대 86<BR>\r\n\n<BR>\r\n\n<B>3부 환하고 말랑말랑한</B><BR>\r\n\n<BR>\r\n\n소녀들이 소풍을 가요 91<BR>\r\n\n무적 92<BR>\r\n\n올리브 vs 올리브 96<BR>\r\n\n사슴뿔선인장 97<BR>\r\n\n달려라 하니 100<BR>\r\n\n버뮤다 제라늄 102<BR>\r\n\n독거소녀 삐삐 104<BR>\r\n\n초승달편의점 105<BR>\r\n\n반상회 508 108<BR>\r\n\n반상회 401 110<BR>\r\n\n후크선장 112<BR>\r\n\n사포 115<BR>\r\n\n사십 계단에서 훔쳐 온 사과 116<BR>\r\n\n검은 모자를 쓴 책상 118<BR>\r\n\n십자뜨기 119<BR>\r\n\n목련 부메랑 120<BR>\r\n\n숨도둑 122<BR>\r\n\n눈의 결정을 뜨개질하는 소녀들 124<BR>\r\n\n<BR>\r\n\n<B>4부 놀이의 각도</B><BR>\r\n\n<BR>\r\n\n혼자 살아요 129<BR>\r\n\n지구력 130<BR>\r\n\n무희들 132<BR>\r\n\n신문지 놀이 134<BR>\r\n\n해바라기 137<BR>\r\n\n묘지지도 138<BR>\r\n\n뽁뽁이Bubble Wrap 140<BR>\r\n\n우수의 이차방정식 143<BR>\r\n\n셀카Selfie 144<BR>\r\n\n마리오네뜨의 동선 146<BR>\r\n\n빵과 칼의 거리 148<BR>\r\n\n단골이 되기에 너무 늦은 술집은 없다 150<BR>\r\n\n도마뱀이 나타난 저녁 153<BR>\r\n\n무 154<BR>\r\n\n전사의 시 157<BR>\r\n\n<BR>\r\n\n<B>부록 </B><BR>\r\n\n<BR>\r\n\n울음의 이정표 159<BR>\r\n\n숨죽여 우는 사람 160<BR>\r\n\n프롤로고스 161<BR>\r\n\n<BR>\r\n\n해설 _ 발랄과 우울, 그리고 그 사이 - 최정란 시집 ��독거소녀 삐삐�� 읽기 163<BR>\r\n\n오민석(문학평론가·단국대 교수)</p>"
|
124
|
+
new_str = old_str.strip_tags
|
125
|
+
assert_equal new_str.is_utf8?, true
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: coaster
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.4.
|
4
|
+
version: 1.4.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- buzz jung
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-09-
|
11
|
+
date: 2023-09-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: oj
|
@@ -200,6 +200,7 @@ files:
|
|
200
200
|
- lib/coaster/core_ext/standard_error.rb
|
201
201
|
- lib/coaster/core_ext/standard_error/raven.rb
|
202
202
|
- lib/coaster/core_ext/standard_error/sentry.rb
|
203
|
+
- lib/coaster/core_ext/string.rb
|
203
204
|
- lib/coaster/git.rb
|
204
205
|
- lib/coaster/git/options.rb
|
205
206
|
- lib/coaster/git/repository.rb
|
@@ -221,6 +222,7 @@ files:
|
|
221
222
|
- test/test_object_translation.rb
|
222
223
|
- test/test_serialized_property.rb
|
223
224
|
- test/test_standard_error.rb
|
225
|
+
- test/test_string.rb
|
224
226
|
- test/test_util.rb
|
225
227
|
homepage: http://github.com/frograms/coaster
|
226
228
|
licenses:
|
@@ -260,4 +262,5 @@ test_files:
|
|
260
262
|
- test/test_object_translation.rb
|
261
263
|
- test/test_serialized_property.rb
|
262
264
|
- test/test_standard_error.rb
|
265
|
+
- test/test_string.rb
|
263
266
|
- test/test_util.rb
|