ecdict 1.2 → 1.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/ecdict +110 -79
- data/lib/ecdict.rb +17 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ad90acc56dee6571bea87e3b2449f76d7cea6a8c6c36b359c6d639586e61ece3
|
4
|
+
data.tar.gz: e4e5da0487ef5534b973d5f9a9bef11d5bf1adf31a63feb9296dfbebae74830a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 64681ec2360f1324a4d1899399a571356ad1e74dcd6958a7355f58c5326f6a88332fa0a7a38cdf29be2c6285fb203e3b6000a7464093f406dce964ed958e0d50
|
7
|
+
data.tar.gz: ddd148dad372d8bddfcf07a6ab409df8cb852db99a0ca23de9270d89356abb280dec59a915c5e3644476e7191304eb36efb3529a1588b2e8a709ea3267a2b9e1
|
data/bin/ecdict
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
# File : ecdict.rb
|
4
4
|
# Authors : ccmywish <ccmywish@qq.com>
|
5
5
|
# Created on : <2020-10-18>
|
6
|
-
# Last modified : <2022-
|
6
|
+
# Last modified : <2022-04-30>
|
7
7
|
#
|
8
8
|
# ecdict:
|
9
9
|
#
|
@@ -26,26 +26,25 @@
|
|
26
26
|
# ------------------------------------------------------
|
27
27
|
|
28
28
|
require 'sqlite3'
|
29
|
+
require 'ecdict'
|
29
30
|
|
30
|
-
|
31
|
-
|
31
|
+
ECDict::STORAGE = File.expand_path("~/.local/share/ecdict")
|
32
|
+
ECDict::RAW_DATA = File.join(ECDict::STORAGE, 'ecdict-csv.7z')
|
32
33
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
ECDICT_GEM_VERSION = "1.2"
|
34
|
+
ECDict::DB = File.join(ECDict::STORAGE, 'ecdict.db')
|
35
|
+
ECDict::CSV = File.join(ECDict::STORAGE, 'ecdict.csv')
|
37
36
|
|
38
37
|
# 这个版本是我从上游下载的CSV后自己压缩的
|
39
38
|
# 这个文件的实际创建日期(上游作者创建)是2017/06/04
|
40
39
|
# 至2022/03/22,未见更新,估计以后上游维护这个词典的也不会经常更新
|
41
40
|
# 而340万的单词量已经足够,因此我们可以足够长的时间一直停留在此词典版本
|
42
|
-
|
43
|
-
|
41
|
+
ECDict::DICT_DOWNLOAD_LINK = "https://gitee.com/ccmywish/ecdict-data"
|
42
|
+
ECDict::DICT_SHA256 = "86782a0e5d05486b482be5a2e5fa99b1a9b2ae51240c853ecfe181886133818a"
|
44
43
|
|
45
44
|
|
46
45
|
require 'fileutils'
|
47
|
-
FileUtils.mkdir_p(
|
48
|
-
$DB = SQLite3::Database.new
|
46
|
+
FileUtils.mkdir_p(ECDict::STORAGE)
|
47
|
+
$DB = SQLite3::Database.new ECDict::DB
|
49
48
|
|
50
49
|
def close_db
|
51
50
|
$DB.close if $DB
|
@@ -84,9 +83,9 @@ def cyan(str) "\e[36m#{str}\e[0m" end
|
|
84
83
|
|
85
84
|
def check_download_integrity
|
86
85
|
require 'digest'
|
87
|
-
return false if !File.exists?(
|
88
|
-
sha256 = Digest::SHA256.file(
|
89
|
-
if
|
86
|
+
return false if !File.exists?(ECDict::RAW_DATA)
|
87
|
+
sha256 = Digest::SHA256.file(ECDict::RAW_DATA).hexdigest
|
88
|
+
if ECDict::DICT_SHA256 == sha256
|
90
89
|
true
|
91
90
|
else
|
92
91
|
false
|
@@ -96,40 +95,40 @@ end
|
|
96
95
|
|
97
96
|
def download_ecdict_raw_data
|
98
97
|
|
99
|
-
if File.exists?(
|
100
|
-
puts "=> 已存在原始数据文件#{
|
98
|
+
if File.exists?(ECDict::RAW_DATA) && check_download_integrity
|
99
|
+
puts "=> 已存在原始数据文件#{ECDict::RAW_DATA}, 无需再次下载"
|
101
100
|
else
|
102
101
|
|
103
|
-
if File.exists?(
|
102
|
+
if File.exists?(ECDict::RAW_DATA)
|
104
103
|
puts "=> 删除已存在但不完整的原始数据文件"
|
105
|
-
FileUtils.rm
|
104
|
+
FileUtils.rm ECDict::RAW_DATA
|
106
105
|
end
|
107
106
|
|
108
107
|
# 若已经有拉取的仓库,先检查里面的数据是否可用
|
109
|
-
raw_data_git_dir = File.join(
|
108
|
+
raw_data_git_dir = File.join(ECDict::STORAGE, 'ecdict-data')
|
110
109
|
raw_data_git_dir_data = File.join(raw_data_git_dir, 'ecdict-csv.7z')
|
111
110
|
|
112
111
|
if Dir.exists?(raw_data_git_dir)
|
113
112
|
if File.exists?(raw_data_git_dir_data)
|
114
|
-
FileUtils.cp(raw_data_git_dir_data,
|
113
|
+
FileUtils.cp(raw_data_git_dir_data, ECDict::STORAGE)
|
115
114
|
if check_download_integrity
|
116
115
|
puts "=> 从已经Git pull到的仓库中获取原始数据文件"
|
117
116
|
return true
|
118
117
|
else
|
119
|
-
FileUtils.rm
|
118
|
+
FileUtils.rm ECDict::RAW_DATA
|
120
119
|
end
|
121
120
|
end
|
122
121
|
FileUtils.rm_rf(raw_data_git_dir)
|
123
122
|
end
|
124
123
|
|
125
124
|
begin
|
126
|
-
puts "=> 使用Git从#{
|
127
|
-
ret = system("git -C #{
|
128
|
-
FileUtils.cp(File.join(
|
125
|
+
puts "=> 使用Git从#{ECDict::DICT_DOWNLOAD_LINK}获取原始数据库文件"
|
126
|
+
ret = system("git -C #{ECDict::STORAGE} clone #{ECDict::DICT_DOWNLOAD_LINK} ")
|
127
|
+
FileUtils.cp(File.join(ECDict::STORAGE, 'ecdict-data', 'ecdict-csv.7z'), ECDict::STORAGE) rescue nil
|
129
128
|
raise "Git拉取仓库过程失败,原始数据文件不完整!" if (ret != true || !check_download_integrity)
|
130
129
|
rescue StandardError => e
|
131
130
|
puts "=> #{e.message}"
|
132
|
-
FileUtils.rm(
|
131
|
+
FileUtils.rm(ECDict::RAW_DATA) rescue nil
|
133
132
|
return false
|
134
133
|
else
|
135
134
|
puts "=> 下载完成!"
|
@@ -143,10 +142,10 @@ def decompress_7z_to_csv()
|
|
143
142
|
require 'seven_zip_ruby'
|
144
143
|
|
145
144
|
# 正常解压出来的CSV应当至少200MB以上
|
146
|
-
if File.exists?(
|
147
|
-
if File.size(
|
145
|
+
if File.exists?(ECDict::CSV)
|
146
|
+
if File.size(ECDict::CSV) <= 200*1024*1024
|
148
147
|
puts "=> 删除旧有的不完全词典数据"
|
149
|
-
FileUtils.rm(
|
148
|
+
FileUtils.rm(ECDict::CSV)
|
150
149
|
else
|
151
150
|
puts "=> CSV数据已存在,无需再次解压,直接使用"
|
152
151
|
return true
|
@@ -154,12 +153,12 @@ def decompress_7z_to_csv()
|
|
154
153
|
end
|
155
154
|
|
156
155
|
|
157
|
-
File.open(
|
156
|
+
File.open(ECDict::RAW_DATA, "rb") do |file|
|
158
157
|
puts "=> 正在解压ecdict-csv.7z(#{ '%.2f' % (file.size/1024.0/1024) }MB)"
|
159
|
-
SevenZipRuby::Reader.extract_all(file,
|
158
|
+
SevenZipRuby::Reader.extract_all(file, ECDict::STORAGE)
|
160
159
|
end
|
161
160
|
|
162
|
-
puts "=> 已成功解压出ecdict.csv(#{ '%.2f' % (File.size(
|
161
|
+
puts "=> 已成功解压出ecdict.csv(#{ '%.2f' % (File.size(ECDict::CSV)/1024.0/1024) }MB)"
|
163
162
|
true
|
164
163
|
end
|
165
164
|
|
@@ -170,7 +169,7 @@ end
|
|
170
169
|
#
|
171
170
|
def check_db_integrity
|
172
171
|
# 正常从CSV生成出来的数据库应当至少400MB以上
|
173
|
-
if File.size(
|
172
|
+
if File.size(ECDict::DB) <= 400*1024*1024
|
174
173
|
false
|
175
174
|
else
|
176
175
|
true
|
@@ -191,13 +190,13 @@ end
|
|
191
190
|
def generate_sqlite_db()
|
192
191
|
|
193
192
|
# SQLite3::Database.new已经生成了该文件,所以需要提前判断到底是否存在
|
194
|
-
if File.exist?(
|
193
|
+
if File.exist?(ECDict::DB)
|
195
194
|
|
196
195
|
if !check_db_integrity
|
197
196
|
puts "=> 删除旧有的不完整数据库"
|
198
197
|
$DB.close
|
199
|
-
FileUtils.rm(
|
200
|
-
$DB = SQLite3::Database.new
|
198
|
+
FileUtils.rm(ECDict::DB)
|
199
|
+
$DB = SQLite3::Database.new ECDict::DB
|
201
200
|
else
|
202
201
|
puts "=> 完整(可能的)数据库已存在,无需再次从CSV文件生成,直接使用"
|
203
202
|
return true
|
@@ -249,12 +248,12 @@ EOF
|
|
249
248
|
|
250
249
|
begin
|
251
250
|
$DB.execute sql
|
252
|
-
puts "=> 创建数据库文件#{
|
251
|
+
puts "=> 创建数据库文件#{ECDict::DB},并生成表`ecdict`"
|
253
252
|
rescue Exception => e
|
254
253
|
puts "=> #{e.message}"
|
255
|
-
puts "=> 创建数据库文件#{
|
254
|
+
puts "=> 创建数据库文件#{ECDict::DB}失败,或表`ecdict`生成失败"
|
256
255
|
close_db
|
257
|
-
File.delete(
|
256
|
+
File.delete(ECDict::DB)
|
258
257
|
return false
|
259
258
|
end
|
260
259
|
|
@@ -276,7 +275,7 @@ EOF
|
|
276
275
|
puts <<~EOC
|
277
276
|
|
278
277
|
#{blue("gem_name = 'ecdict'")}
|
279
|
-
#{blue("version = '#{
|
278
|
+
#{blue("version = '#{ECDict::VERSION}'")}
|
280
279
|
#{blue("author = 'ccmywish'")}
|
281
280
|
#{blue("bug_track = [ 'https://gitee.com/ccmywish/ecdict/issues'
|
282
281
|
'https://github.com/ccmywish/ecdict/issues ]")}
|
@@ -301,7 +300,7 @@ EOF
|
|
301
300
|
require 'csv'
|
302
301
|
num = 0
|
303
302
|
$DB.transaction
|
304
|
-
CSV.foreach(
|
303
|
+
CSV.foreach(ECDict::CSV) do |row|
|
305
304
|
num = num + 1
|
306
305
|
next if num == 1 # headers不加入
|
307
306
|
row.map! do |n|
|
@@ -319,11 +318,11 @@ EOF
|
|
319
318
|
puts "=> #{e.message}"
|
320
319
|
puts "=> 数据库插入信息失败"
|
321
320
|
close_db
|
322
|
-
File.delete(
|
321
|
+
File.delete(ECDict::DB)
|
323
322
|
return false
|
324
323
|
end
|
325
324
|
puts
|
326
|
-
puts "=> 数据库#{
|
325
|
+
puts "=> 数据库#{ECDict::DB}已完整生成"
|
327
326
|
close_db
|
328
327
|
return true
|
329
328
|
end
|
@@ -350,7 +349,7 @@ end
|
|
350
349
|
#
|
351
350
|
# Error code -1: No db or db not intact
|
352
351
|
# Error code 1: No input
|
353
|
-
# Error code 2:
|
352
|
+
# Error code 2: Doesn't find a result
|
354
353
|
#
|
355
354
|
|
356
355
|
#
|
@@ -423,7 +422,7 @@ end
|
|
423
422
|
#
|
424
423
|
# Search Chinese word to find English words
|
425
424
|
#
|
426
|
-
def
|
425
|
+
def search_chinese(cn_word, support_phrase: false)
|
427
426
|
|
428
427
|
if cn_word.empty?
|
429
428
|
puts "ecdict: 请输入要查询的中文,或使用`ecdict -h`查看帮助"
|
@@ -435,7 +434,7 @@ def chinese_search(cn_word, support_phrase: false)
|
|
435
434
|
end
|
436
435
|
|
437
436
|
if support_phrase
|
438
|
-
puts "ecdict:
|
437
|
+
puts "ecdict: 搜索加强,包含短语"
|
439
438
|
end
|
440
439
|
|
441
440
|
tables = []
|
@@ -451,7 +450,7 @@ def chinese_search(cn_word, support_phrase: false)
|
|
451
450
|
end
|
452
451
|
end
|
453
452
|
if rows.empty?
|
454
|
-
puts "ecdict:
|
453
|
+
puts "ecdict: 抱歉,未找到与之相关的英文"
|
455
454
|
close_db
|
456
455
|
return
|
457
456
|
else
|
@@ -467,12 +466,14 @@ def chinese_search(cn_word, support_phrase: false)
|
|
467
466
|
next if trans.include?("\r\n")
|
468
467
|
next if trans.include?("\n")
|
469
468
|
|
470
|
-
|
469
|
+
|
471
470
|
if !support_phrase
|
471
|
+
# 不要搜索词组
|
472
472
|
next if en_word.include?(' ')
|
473
|
+
# 不要搜索连字词
|
474
|
+
next if en_word.include?('-')
|
473
475
|
end
|
474
|
-
|
475
|
-
next if en_word.include?('-')
|
476
|
+
|
476
477
|
|
477
478
|
# filter
|
478
479
|
# "[网络] 微软,认证专家;微软认证产品专家;微软专家认证"
|
@@ -506,8 +507,14 @@ def chinese_search(cn_word, support_phrase: false)
|
|
506
507
|
if found_a_word
|
507
508
|
puts
|
508
509
|
else
|
509
|
-
|
510
|
-
|
510
|
+
|
511
|
+
if !support_phrase
|
512
|
+
puts "ecdict: 扩大搜索范围,再次尝试搜索..."
|
513
|
+
search_chinese(cn_word, support_phrase: true)
|
514
|
+
else
|
515
|
+
puts "ecdict: 抱歉,未找到与之相关的英文"
|
516
|
+
end
|
517
|
+
|
511
518
|
end
|
512
519
|
# end of else
|
513
520
|
end
|
@@ -592,7 +599,9 @@ def start_ecrepl
|
|
592
599
|
require 'reline'
|
593
600
|
Reline.completion_proc = lambda do |word|
|
594
601
|
if word.strip.empty?
|
595
|
-
|
602
|
+
return %w[1.输入单词并回车查询含义
|
603
|
+
2.输入单词时按一次tab键反馈搜索建议
|
604
|
+
3.输入exit或按Ctrl-C或Ctrl-D退出]
|
596
605
|
end
|
597
606
|
|
598
607
|
max_len = word.length + 4
|
@@ -600,30 +609,54 @@ def start_ecrepl
|
|
600
609
|
|
601
610
|
# $DB.execute "SELECT DISTINCT sw FROM ecdict WHERE sw LIKE '#{word}%' AND length(sw)<#{max_len} LIMIT 12" { |row| puts row }
|
602
611
|
|
603
|
-
# 以word开头的单词
|
604
|
-
ret = $DB.execute "SELECT DISTINCT sw FROM ecdict WHERE sw LIKE '#{word}%'
|
605
|
-
AND length(sw)<#{max_len} LIMIT 64"
|
606
|
-
# [["baba"], ["babe"], ["babn"], ["baby"]]
|
607
|
-
ret = ret.to_a.flatten
|
608
612
|
|
609
|
-
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
+
# Display suggestions
|
614
|
+
#
|
615
|
+
# @params word [String] The word we search
|
616
|
+
# @params ret [Array] The suggestions array returned
|
617
|
+
def _display_suggestions(word, ret)
|
618
|
+
return if ret.empty?
|
619
|
+
if word.length <= 7
|
620
|
+
LsTable.ls(ret) { puts blue(_1) }
|
621
|
+
else
|
622
|
+
LsTable.ls(ret, cell_len: 14, cell_num: 6) { puts blue(_1) }
|
623
|
+
end
|
624
|
+
puts
|
613
625
|
end
|
614
626
|
|
615
|
-
|
627
|
+
# Generate suggestions
|
628
|
+
# 1. the words beginning with our search word
|
629
|
+
# 2. the words including our search word
|
630
|
+
#
|
631
|
+
# @params word [String] The word we search
|
632
|
+
# @params max_len [Integer] Suggestion's max length
|
633
|
+
def _gen_suggestion_1(word, max_len)
|
634
|
+
ret = $DB.execute <<-SQL
|
635
|
+
SELECT DISTINCT sw FROM ecdict WHERE sw LIKE '#{word}%'
|
636
|
+
AND length(sw)<#{max_len} LIMIT 64
|
637
|
+
SQL
|
638
|
+
# [["baba"], ["babe"], ["babn"], ["baby"]]
|
639
|
+
ret = ret.to_a.flatten
|
640
|
+
end
|
616
641
|
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
else
|
624
|
-
LsTable.ls(ret, cell_len: 14, cell_num: 6) { puts blue(_1) }
|
642
|
+
def _gen_suggestion_2(word, max_len)
|
643
|
+
ret = $DB.execute <<-SQL
|
644
|
+
SELECT DISTINCT sw FROM ecdict WHERE sw LIKE '_%#{word}%'
|
645
|
+
AND length(sw)<#{max_len} LIMIT 64
|
646
|
+
SQL
|
647
|
+
ret = ret.to_a.flatten
|
625
648
|
end
|
626
649
|
|
650
|
+
suggestions = [
|
651
|
+
Thread.new {
|
652
|
+
_display_suggestions word, _gen_suggestion_1(word, max_len)
|
653
|
+
},
|
654
|
+
Thread.new {
|
655
|
+
_display_suggestions word, _gen_suggestion_2(word, max_len)
|
656
|
+
}
|
657
|
+
].each(&:join)
|
658
|
+
|
659
|
+
""
|
627
660
|
end
|
628
661
|
|
629
662
|
#
|
@@ -654,12 +687,12 @@ end
|
|
654
687
|
|
655
688
|
|
656
689
|
#
|
657
|
-
# remove everything in #{
|
690
|
+
# remove everything in #{ECDict::STORAGE}
|
658
691
|
#
|
659
692
|
def delete_cache
|
660
693
|
close_db
|
661
694
|
begin
|
662
|
-
FileUtils.rm_rf(
|
695
|
+
FileUtils.rm_rf(ECDict::STORAGE)
|
663
696
|
rescue => e
|
664
697
|
puts "ecdict: #{e.message}"
|
665
698
|
puts "ecdict: 清空词典数据失败"
|
@@ -676,7 +709,7 @@ end
|
|
676
709
|
|
677
710
|
def print_version
|
678
711
|
puts <<EOH
|
679
|
-
ecdict (v#{
|
712
|
+
ecdict (v#{ECDict::VERSION}): A courteous cli translator.
|
680
713
|
EOH
|
681
714
|
|
682
715
|
end
|
@@ -684,14 +717,13 @@ end
|
|
684
717
|
|
685
718
|
def help
|
686
719
|
puts <<EOH
|
687
|
-
ecdict (v#{
|
720
|
+
ecdict (v#{ECDict::VERSION}): A courteous cli translator.
|
688
721
|
|
689
722
|
usage:
|
690
723
|
|
691
724
|
ecdict word => 查询单词word
|
692
725
|
ecdict -r => 启动ecdict repl交互式查询,输入exit或Ctrl-C/D退出
|
693
|
-
ecdict -c 中文 =>
|
694
|
-
ecdict -cp 中文 => 搜索中文短语对应的英文短语
|
726
|
+
ecdict -c 中文 => 搜索中文单词对应的英文单词/短语
|
695
727
|
ecdict -i => 下载ecdict词典数据并安装
|
696
728
|
ecdict -v => 打印此Gem版本号
|
697
729
|
ecdict -h => 打印此帮助
|
@@ -722,11 +754,10 @@ when "-h" then help
|
|
722
754
|
when "-i" then download_and_install_ecdict_data
|
723
755
|
when "-r" then start_ecrepl
|
724
756
|
when "-d" then delete_cache
|
725
|
-
when "-c" then
|
726
|
-
when "-cp" then chinese_search(ARGV.join, support_phrase: true)
|
757
|
+
when "-c" then search_chinese(ARGV.join)
|
727
758
|
else
|
728
759
|
reply_once(query)
|
729
760
|
end
|
730
761
|
|
731
762
|
# ensure close db
|
732
|
-
close_db
|
763
|
+
close_db
|
data/lib/ecdict.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
# ---------------------------------------------------------------
|
2
|
+
# File : ecdict.rb
|
3
|
+
# Authors : ccmywish <ccmywish@qq.com>
|
4
|
+
# Created on : <2022-04-29>
|
5
|
+
# Last modified : <2022-04-29>
|
6
|
+
#
|
7
|
+
# ecdict:
|
8
|
+
#
|
9
|
+
# ecdict lib
|
10
|
+
#
|
11
|
+
# ---------------------------------------------------------------
|
12
|
+
|
13
|
+
module ECDict
|
14
|
+
|
15
|
+
VERSION = "1.3.1"
|
16
|
+
|
17
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ecdict
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 1.3.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- ccmywish
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-04-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sqlite3
|
@@ -77,6 +77,7 @@ extensions: []
|
|
77
77
|
extra_rdoc_files: []
|
78
78
|
files:
|
79
79
|
- bin/ecdict
|
80
|
+
- lib/ecdict.rb
|
80
81
|
homepage: https://gitee.com/ccmywish/ecdict
|
81
82
|
licenses:
|
82
83
|
- MIT
|
@@ -98,7 +99,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
98
99
|
- !ruby/object:Gem::Version
|
99
100
|
version: '0'
|
100
101
|
requirements: []
|
101
|
-
rubygems_version: 3.3.
|
102
|
+
rubygems_version: 3.3.12
|
102
103
|
signing_key:
|
103
104
|
specification_version: 4
|
104
105
|
summary: 'ecdict: English-to-Chinese dictionary on the cli.'
|