unicode-namecode 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/bin/unicode-namecode +243 -0
- data/data/NameAliases.txt +575 -0
- data/data/UnicodeData.txt +40116 -0
- data/data/emoji-test.txt +5331 -0
- data/data/unicode_trie.cache +0 -0
- data/lib/unicode_namecode/aliases.rb +58 -0
- data/lib/unicode_namecode/data_loader.rb +97 -0
- data/lib/unicode_namecode/emoji.rb +68 -0
- data/lib/unicode_namecode/fuzzy.rb +60 -0
- data/lib/unicode_namecode/trie.rb +69 -0
- data/lib/unicode_namecode.rb +104 -0
- metadata +71 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: d2923b19be80b90be3c1c1f6cc29ccf38e0e0a25a62d5c77110f0a6b9190037c
|
4
|
+
data.tar.gz: bb16f78d180efa841e301aa0d219512ac1e3ceacc8b5c68e18619b90695c7d70
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 5f2e59d80d111539c5f60d095d0fda8f541e0187a65d3c9a606c053f896201f42dc94c608d97aab704ed155f83be52ba3ad6e00a757eefcb3ba75e7327097f0a
|
7
|
+
data.tar.gz: d1f6651e53d1f9530ff1c85d22b3a58e1bd290031f9dedfaa989fc0ef853061d2058c1488d907a5d26d363bc14de913544a6f0e7b0495b065d34118b078ec217
|
@@ -0,0 +1,243 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
# Command-line interface for UnicodeNamecode
|
5
|
+
require_relative '../lib/unicode_namecode'
|
6
|
+
|
7
|
+
# Parse command line arguments and determine the operation mode
|
8
|
+
def parse_args
|
9
|
+
if ARGV.empty?
|
10
|
+
show_usage
|
11
|
+
exit 1
|
12
|
+
end
|
13
|
+
|
14
|
+
command = ARGV[0].downcase
|
15
|
+
query = ARGV[1..-1].join(' ')
|
16
|
+
|
17
|
+
case command
|
18
|
+
when 'search', 's'
|
19
|
+
perform_search(query)
|
20
|
+
when 'prefix', 'p'
|
21
|
+
perform_prefix_search(query)
|
22
|
+
when 'fuzzy', 'f'
|
23
|
+
perform_fuzzy_search(query)
|
24
|
+
when 'emoji', 'e'
|
25
|
+
perform_emoji_lookup(query)
|
26
|
+
when 'reverse', 'r'
|
27
|
+
perform_reverse_lookup(query)
|
28
|
+
when 'alias', 'a'
|
29
|
+
perform_alias_lookup(query)
|
30
|
+
when 'help', 'h', '--help', '-h'
|
31
|
+
show_usage
|
32
|
+
else
|
33
|
+
# Default: treat as a search query
|
34
|
+
perform_search(command + ' ' + query)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# Perform a general search (tries exact match first, then alias, then fuzzy)
|
39
|
+
def perform_search(query)
|
40
|
+
return puts "Error: No search term provided" if query.empty?
|
41
|
+
|
42
|
+
# Try exact match first
|
43
|
+
codepoint = UnicodeNamecode.codepoint(query)
|
44
|
+
if codepoint
|
45
|
+
name = UnicodeNamecode.name_for_codepoint(codepoint)
|
46
|
+
is_alias = UnicodeNamecode.is_alias?(query)
|
47
|
+
result_type = is_alias ? "alias" : "official name"
|
48
|
+
puts "✓ Found: #{query.upcase} → U+#{codepoint.to_s(16).upcase} (#{codepoint}) [#{result_type}]"
|
49
|
+
return
|
50
|
+
end
|
51
|
+
|
52
|
+
# If no exact match, try fuzzy search
|
53
|
+
puts "No exact match found for '#{query}'. Trying fuzzy search..."
|
54
|
+
fuzzy_results = UnicodeNamecode.fuzzy_search(query, 5)
|
55
|
+
|
56
|
+
if fuzzy_results.any?
|
57
|
+
puts "Similar matches:"
|
58
|
+
fuzzy_results.each do |result|
|
59
|
+
codepoint = UnicodeNamecode.codepoint(result[:name])
|
60
|
+
similarity = (result[:similarity] * 100).round(1)
|
61
|
+
puts " #{result[:name]} → U+#{codepoint.to_s(16).upcase} (#{similarity}% match)"
|
62
|
+
end
|
63
|
+
else
|
64
|
+
puts "No similar matches found."
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
# Find all Unicode names starting with the given prefix
|
69
|
+
def perform_prefix_search(prefix)
|
70
|
+
return puts "Error: No prefix provided" if prefix.empty?
|
71
|
+
|
72
|
+
results = UnicodeNamecode.prefix_search(prefix, 20)
|
73
|
+
|
74
|
+
if results.any?
|
75
|
+
puts "Names starting with '#{prefix.upcase}':"
|
76
|
+
results.each do |result|
|
77
|
+
puts " #{result[:name]} → U+#{result[:codepoint].to_s(16).upcase}"
|
78
|
+
end
|
79
|
+
else
|
80
|
+
puts "No names found starting with '#{prefix}'."
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
# Find similar Unicode names (for handling typos)
|
85
|
+
def perform_fuzzy_search(query)
|
86
|
+
return puts "Error: No search term provided" if query.empty?
|
87
|
+
|
88
|
+
results = UnicodeNamecode.fuzzy_search(query, 10)
|
89
|
+
|
90
|
+
if results.any?
|
91
|
+
puts "Fuzzy matches for '#{query}':"
|
92
|
+
results.each do |result|
|
93
|
+
codepoint = UnicodeNamecode.codepoint(result[:name])
|
94
|
+
similarity = (result[:similarity] * 100).round(1)
|
95
|
+
puts " #{result[:name]} → U+#{codepoint.to_s(16).upcase} (#{similarity}% match)"
|
96
|
+
end
|
97
|
+
else
|
98
|
+
puts "No fuzzy matches found."
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
# Look up emoji information (emoji -> codepoint/name or codepoint -> emoji)
|
103
|
+
def perform_emoji_lookup(query)
|
104
|
+
return puts "Error: No emoji or codepoint provided" if query.empty?
|
105
|
+
|
106
|
+
# Check if query is a codepoint (hex or decimal)
|
107
|
+
if query =~ /^0x[0-9a-fA-F]+$/i
|
108
|
+
# Hex codepoint
|
109
|
+
codepoint = query.to_i(16)
|
110
|
+
emoji = UnicodeNamecode.emoji_for_codepoint(codepoint)
|
111
|
+
name = UnicodeNamecode.name_for_codepoint(codepoint)
|
112
|
+
if emoji
|
113
|
+
puts "U+#{codepoint.to_s(16).upcase} → #{emoji} (#{name})"
|
114
|
+
else
|
115
|
+
puts "No emoji found for codepoint U+#{codepoint.to_s(16).upcase}"
|
116
|
+
end
|
117
|
+
elsif query =~ /^\d+$/
|
118
|
+
# Decimal codepoint
|
119
|
+
codepoint = query.to_i
|
120
|
+
emoji = UnicodeNamecode.emoji_for_codepoint(codepoint)
|
121
|
+
name = UnicodeNamecode.name_for_codepoint(codepoint)
|
122
|
+
if emoji
|
123
|
+
puts "U+#{codepoint.to_s(16).upcase} → #{emoji} (#{name})"
|
124
|
+
else
|
125
|
+
puts "No emoji found for codepoint U+#{codepoint.to_s(16).upcase}"
|
126
|
+
end
|
127
|
+
else
|
128
|
+
# Treat as emoji character
|
129
|
+
codepoint = UnicodeNamecode.codepoint_for_emoji(query)
|
130
|
+
name = UnicodeNamecode.name_for_emoji(query)
|
131
|
+
if codepoint
|
132
|
+
if codepoint.is_a?(Array)
|
133
|
+
puts "#{query} → U+#{codepoint.map { |cp| cp.to_s(16).upcase }.join(' U+')} (#{name})"
|
134
|
+
else
|
135
|
+
puts "#{query} → U+#{codepoint.to_s(16).upcase} (#{name})"
|
136
|
+
end
|
137
|
+
else
|
138
|
+
puts "No emoji information found for '#{query}'"
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
# Reverse lookup: codepoint -> name and emoji
|
144
|
+
def perform_reverse_lookup(query)
|
145
|
+
return puts "Error: No codepoint provided" if query.empty?
|
146
|
+
|
147
|
+
# Parse codepoint (hex or decimal)
|
148
|
+
if query =~ /^0x[0-9a-fA-F]+$/i
|
149
|
+
codepoint = query.to_i(16)
|
150
|
+
elsif query =~ /^\d+$/
|
151
|
+
codepoint = query.to_i
|
152
|
+
else
|
153
|
+
puts "Error: Please provide a valid codepoint (decimal or hex with 0x)"
|
154
|
+
return
|
155
|
+
end
|
156
|
+
|
157
|
+
# Get name and emoji
|
158
|
+
name = UnicodeNamecode.name_for_codepoint(codepoint)
|
159
|
+
emoji = UnicodeNamecode.emoji_for_codepoint(codepoint)
|
160
|
+
aliases = UnicodeNamecode.aliases_for_codepoint(codepoint)
|
161
|
+
|
162
|
+
puts "U+#{codepoint.to_s(16).upcase}:"
|
163
|
+
puts " Name: #{name || 'Unknown'}"
|
164
|
+
puts " Emoji: #{emoji || 'None'}"
|
165
|
+
if aliases.any?
|
166
|
+
puts " Aliases:"
|
167
|
+
aliases.each do |alias_info|
|
168
|
+
puts " #{alias_info[:name]} (#{alias_info[:type]})"
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
172
|
+
|
173
|
+
# Look up alias information
|
174
|
+
def perform_alias_lookup(query)
|
175
|
+
return puts "Error: No alias or codepoint provided" if query.empty?
|
176
|
+
|
177
|
+
# Check if query is a codepoint
|
178
|
+
if query =~ /^0x[0-9a-fA-F]+$/i
|
179
|
+
codepoint = query.to_i(16)
|
180
|
+
elsif query =~ /^\d+$/
|
181
|
+
codepoint = query.to_i
|
182
|
+
else
|
183
|
+
# Treat as alias name
|
184
|
+
codepoint = UnicodeNamecode.codepoint_for_alias(query)
|
185
|
+
if codepoint
|
186
|
+
name = UnicodeNamecode.name_for_codepoint(codepoint)
|
187
|
+
puts "#{query.upcase} → U+#{codepoint.to_s(16).upcase} (#{name})"
|
188
|
+
else
|
189
|
+
puts "No alias found for '#{query}'"
|
190
|
+
end
|
191
|
+
return
|
192
|
+
end
|
193
|
+
|
194
|
+
# Get aliases for codepoint
|
195
|
+
aliases = UnicodeNamecode.aliases_for_codepoint(codepoint)
|
196
|
+
name = UnicodeNamecode.name_for_codepoint(codepoint)
|
197
|
+
|
198
|
+
puts "U+#{codepoint.to_s(16).upcase} (#{name}):"
|
199
|
+
if aliases.any?
|
200
|
+
puts " Aliases:"
|
201
|
+
aliases.each do |alias_info|
|
202
|
+
puts " #{alias_info[:name]} (#{alias_info[:type]})"
|
203
|
+
end
|
204
|
+
else
|
205
|
+
puts " No aliases found"
|
206
|
+
end
|
207
|
+
end
|
208
|
+
|
209
|
+
# Display usage information and examples
|
210
|
+
def show_usage
|
211
|
+
puts <<~USAGE
|
212
|
+
UnicodeNamecode - Unicode character lookup tool
|
213
|
+
|
214
|
+
Usage: unicode-namecode <command> [query]
|
215
|
+
|
216
|
+
Commands:
|
217
|
+
search, s <name> Search for Unicode name (exact + alias + fuzzy)
|
218
|
+
prefix, p <prefix> Find names starting with prefix
|
219
|
+
fuzzy, f <name> Find similar names (for typos)
|
220
|
+
emoji, e <emoji> Look up emoji information
|
221
|
+
reverse, r <codepoint> Reverse lookup: codepoint → name/emoji/aliases
|
222
|
+
alias, a <alias> Look up alias information
|
223
|
+
help, h Show this help
|
224
|
+
|
225
|
+
Examples:
|
226
|
+
unicode-namecode search SNOWMAN
|
227
|
+
unicode-namecode search NULL
|
228
|
+
unicode-namecode prefix SNOW
|
229
|
+
unicode-namecode fuzzy SNOWMN
|
230
|
+
unicode-namecode emoji 😊
|
231
|
+
unicode-namecode emoji 0x1F60A
|
232
|
+
unicode-namecode reverse 0x2603
|
233
|
+
unicode-namecode alias NULL
|
234
|
+
unicode-namecode alias 0x0000
|
235
|
+
|
236
|
+
If no command is specified, treats the first argument as a search query.
|
237
|
+
USAGE
|
238
|
+
end
|
239
|
+
|
240
|
+
# Main execution
|
241
|
+
if __FILE__ == $0
|
242
|
+
parse_args
|
243
|
+
end
|