ecdict 1.3.1 → 1.3.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/bin/ecdict +763 -763
  3. data/lib/ecdict.rb +16 -17
  4. metadata +6 -6
data/bin/ecdict CHANGED
@@ -1,763 +1,763 @@
1
- #!/usr/bin/env ruby
2
- # ------------------------------------------------------
3
- # File : ecdict.rb
4
- # Authors : ccmywish <ccmywish@qq.com>
5
- # Created on : <2020-10-18>
6
- # Last modified : <2022-04-30>
7
- #
8
- # ecdict:
9
- #
10
- # The command line dict using ECDICT.
11
- #
12
- # ECDICT: https://github.com/skywind3000/ECDICT
13
- #
14
- # This gem consists of:
15
- # 1. ecdict installer
16
- # 2. ecdict cmd
17
- # 3. ecdict repl
18
- #
19
- # Exit -1: 下载原始数据失败
20
- # Exit -2: 解压原始数据至CSV失败
21
- # EXit -3: 从CSV生成数据库失败
22
- # Exit 1: 无词典
23
- # Exit 2: 缺少参数
24
- #
25
- # 未搜索到结果并不认为是错误
26
- # ------------------------------------------------------
27
-
28
- require 'sqlite3'
29
- require 'ecdict'
30
-
31
- ECDict::STORAGE = File.expand_path("~/.local/share/ecdict")
32
- ECDict::RAW_DATA = File.join(ECDict::STORAGE, 'ecdict-csv.7z')
33
-
34
- ECDict::DB = File.join(ECDict::STORAGE, 'ecdict.db')
35
- ECDict::CSV = File.join(ECDict::STORAGE, 'ecdict.csv')
36
-
37
- # 这个版本是我从上游下载的CSV后自己压缩的
38
- # 这个文件的实际创建日期(上游作者创建)是2017/06/04
39
- # 至2022/03/22,未见更新,估计以后上游维护这个词典的也不会经常更新
40
- # 而340万的单词量已经足够,因此我们可以足够长的时间一直停留在此词典版本
41
- ECDict::DICT_DOWNLOAD_LINK = "https://gitee.com/ccmywish/ecdict-data"
42
- ECDict::DICT_SHA256 = "86782a0e5d05486b482be5a2e5fa99b1a9b2ae51240c853ecfe181886133818a"
43
-
44
-
45
- require 'fileutils'
46
- FileUtils.mkdir_p(ECDict::STORAGE)
47
- $DB = SQLite3::Database.new ECDict::DB
48
-
49
- def close_db
50
- $DB.close if $DB
51
- end
52
-
53
- def close_db_and_exit(num = 0)
54
- close_db
55
- exit num
56
- end
57
-
58
-
59
-
60
- ####################
61
- # helper: for color
62
- ####################
63
-
64
- def bold(str) "\e[1m#{str}\e[0m" end
65
- def underline(str) "\e[4m#{str}\e[0m" end
66
- def red(str) "\e[31m#{str}\e[0m" end
67
- def green(str) "\e[32m#{str}\e[0m" end
68
- def yellow(str) "\e[33m#{str}\e[0m" end
69
- def blue(str) "\e[34m#{str}\e[0m" end
70
- def purple(str) "\e[35m#{str}\e[0m" end
71
- def cyan(str) "\e[36m#{str}\e[0m" end
72
-
73
-
74
-
75
- ##########################
76
- # db download and install
77
- ##########################
78
- #
79
- # 1. Download .7z file from Gitee
80
- # 2. Decompress the .7z file to CSV file
81
- # 3. Transform CSV file into database
82
- #
83
-
84
- def check_download_integrity
85
- require 'digest'
86
- return false if !File.exists?(ECDict::RAW_DATA)
87
- sha256 = Digest::SHA256.file(ECDict::RAW_DATA).hexdigest
88
- if ECDict::DICT_SHA256 == sha256
89
- true
90
- else
91
- false
92
- end
93
- end
94
-
95
-
96
- def download_ecdict_raw_data
97
-
98
- if File.exists?(ECDict::RAW_DATA) && check_download_integrity
99
- puts "=> 已存在原始数据文件#{ECDict::RAW_DATA}, 无需再次下载"
100
- else
101
-
102
- if File.exists?(ECDict::RAW_DATA)
103
- puts "=> 删除已存在但不完整的原始数据文件"
104
- FileUtils.rm ECDict::RAW_DATA
105
- end
106
-
107
- # 若已经有拉取的仓库,先检查里面的数据是否可用
108
- raw_data_git_dir = File.join(ECDict::STORAGE, 'ecdict-data')
109
- raw_data_git_dir_data = File.join(raw_data_git_dir, 'ecdict-csv.7z')
110
-
111
- if Dir.exists?(raw_data_git_dir)
112
- if File.exists?(raw_data_git_dir_data)
113
- FileUtils.cp(raw_data_git_dir_data, ECDict::STORAGE)
114
- if check_download_integrity
115
- puts "=> 从已经Git pull到的仓库中获取原始数据文件"
116
- return true
117
- else
118
- FileUtils.rm ECDict::RAW_DATA
119
- end
120
- end
121
- FileUtils.rm_rf(raw_data_git_dir)
122
- end
123
-
124
- begin
125
- puts "=> 使用Git从#{ECDict::DICT_DOWNLOAD_LINK}获取原始数据库文件"
126
- ret = system("git -C #{ECDict::STORAGE} clone #{ECDict::DICT_DOWNLOAD_LINK} ")
127
- FileUtils.cp(File.join(ECDict::STORAGE, 'ecdict-data', 'ecdict-csv.7z'), ECDict::STORAGE) rescue nil
128
- raise "Git拉取仓库过程失败,原始数据文件不完整!" if (ret != true || !check_download_integrity)
129
- rescue StandardError => e
130
- puts "=> #{e.message}"
131
- FileUtils.rm(ECDict::RAW_DATA) rescue nil
132
- return false
133
- else
134
- puts "=> 下载完成!"
135
- end
136
- end
137
- true
138
- end
139
-
140
-
141
- def decompress_7z_to_csv()
142
- require 'seven_zip_ruby'
143
-
144
- # 正常解压出来的CSV应当至少200MB以上
145
- if File.exists?(ECDict::CSV)
146
- if File.size(ECDict::CSV) <= 200*1024*1024
147
- puts "=> 删除旧有的不完全词典数据"
148
- FileUtils.rm(ECDict::CSV)
149
- else
150
- puts "=> CSV数据已存在,无需再次解压,直接使用"
151
- return true
152
- end
153
- end
154
-
155
-
156
- File.open(ECDict::RAW_DATA, "rb") do |file|
157
- puts "=> 正在解压ecdict-csv.7z(#{ '%.2f' % (file.size/1024.0/1024) }MB)"
158
- SevenZipRuby::Reader.extract_all(file, ECDict::STORAGE)
159
- end
160
-
161
- puts "=> 已成功解压出ecdict.csv(#{ '%.2f' % (File.size(ECDict::CSV)/1024.0/1024) }MB)"
162
- true
163
- end
164
-
165
-
166
- #
167
- # 检查生成出来的db的完整性
168
- # 不太方便检查,只能粗略的查看大小
169
- #
170
- def check_db_integrity
171
- # 正常从CSV生成出来的数据库应当至少400MB以上
172
- if File.size(ECDict::DB) <= 400*1024*1024
173
- false
174
- else
175
- true
176
- end
177
- end
178
-
179
-
180
- #
181
- # This is the Ruby versioned [stardict.py] from
182
- # https://github.com/skywind3000/ECDICT
183
- #
184
- # It generates [ecdict.db] in 3 steps:
185
- #
186
- # 1. create new and empty db file
187
- # 2. create table in the db
188
- # 3. insert data from CSV into the table
189
- #
190
- def generate_sqlite_db()
191
-
192
- # SQLite3::Database.new已经生成了该文件,所以需要提前判断到底是否存在
193
- if File.exist?(ECDict::DB)
194
-
195
- if !check_db_integrity
196
- puts "=> 删除旧有的不完整数据库"
197
- $DB.close
198
- FileUtils.rm(ECDict::DB)
199
- $DB = SQLite3::Database.new ECDict::DB
200
- else
201
- puts "=> 完整(可能的)数据库已存在,无需再次从CSV文件生成,直接使用"
202
- return true
203
- end
204
- end
205
-
206
-
207
- # 共15个字段,存的时候只需要14个
208
- # 1.id 主键,自增
209
- # 2.sw strip word(删除非alnum字符)
210
- # 3.word 单词名称
211
- # 4.phonetic 音标,以英语英标为主
212
- # 5.definition 单词释义(英文),每行一个释义
213
- # 6.translation 单词释义(中文),每行一个释义
214
- # 7.pos 词语位置,用 "/" 分割不同位置
215
- # 8.collins 柯林斯星级
216
- # 9.oxford 是否是牛津三千核心词汇
217
- # 10.tag 字符串标签:zk/中考,gk/高考,cet4/四级 等等标签,空格分割
218
- # 11.bnc 英国国家语料库词频顺序
219
- # 12.frq 当代语料库词频顺序
220
- # 13.exchange 时态复数等变换,使用 "/" 分割不同项目
221
- # 14.detail json 扩展信息,字典形式保存例句(待添加)
222
- # 15.audio 读音音频 url (待添加)
223
-
224
- sql = <<-EOF
225
- CREATE TABLE IF NOT EXISTS "ecdict" (
226
- "id" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL UNIQUE,
227
- "word" VARCHAR(64) COLLATE NOCASE NOT NULL UNIQUE,
228
- "sw" VARCHAR(64) COLLATE NOCASE NOT NULL,
229
- "phonetic" VARCHAR(64),
230
- "definition" TEXT,
231
- "translation" TEXT,
232
- "pos" VARCHAR(16),
233
- "collins" INTEGER DEFAULT(0),
234
- "oxford" INTEGER DEFAULT(0),
235
- "tag" VARCHAR(64),
236
- "bnc" INTEGER DEFAULT(NULL),
237
- "frq" INTEGER DEFAULT(NULL),
238
- "exchange" TEXT,
239
- "detail" TEXT,
240
- "audio" TEXT
241
- );
242
- CREATE UNIQUE INDEX IF NOT EXISTS "ecdict_1" ON ecdict (id);
243
- CREATE UNIQUE INDEX IF NOT EXISTS "ecdict_2" ON ecdict (word);
244
- CREATE INDEX IF NOT EXISTS "ecdict_3" ON ecdict (sw, word collate nocase);
245
- CREATE INDEX IF NOT EXISTS "ecd_1" ON ecdict (word collate nocase);
246
- EOF
247
-
248
-
249
- begin
250
- $DB.execute sql
251
- puts "=> 创建数据库文件#{ECDict::DB},并生成表`ecdict`"
252
- rescue Exception => e
253
- puts "=> #{e.message}"
254
- puts "=> 创建数据库文件#{ECDict::DB}失败,或表`ecdict`生成失败"
255
- close_db
256
- File.delete(ECDict::DB)
257
- return false
258
- end
259
-
260
-
261
- # 这是生成数据库中sw字段的函数,用于模糊匹配,csv中并不包含此字段
262
- def strip_word(word)
263
- word.gsub(/\W/,'').downcase
264
- end
265
-
266
- your_machine = case RUBY_PLATFORM
267
- when /linux/i then "linux"
268
- when /ucrt/i, /mingw/i then "windows"
269
- when /mac/i then "mac"
270
- when /bsd/i then "bsd"
271
- else "machine"
272
- end
273
-
274
- begin
275
- puts <<~EOC
276
-
277
- #{blue("gem_name = 'ecdict'")}
278
- #{blue("version = '#{ECDict::VERSION}'")}
279
- #{blue("author = 'ccmywish'")}
280
- #{blue("bug_track = [ 'https://gitee.com/ccmywish/ecdict/issues'
281
- 'https://github.com/ccmywish/ecdict/issues ]")}
282
-
283
- #{green("if")} #{purple("your_#{your_machine}_is_good")}
284
- wait n =~ #{red('3min or less')}
285
- #{green("end")}
286
-
287
- #{blue(bold("the_\#{author}_is_busily_inserting_data_for_you(...)"))}
288
-
289
- EOC
290
-
291
- require 'progress_bar'
292
- # progress_bar的bar在Windows上显示有问题
293
- progress = ProgressBar.new(3402560,:counter,:percentage,:elapsed)
294
-
295
- columns = %w{word sw phonetic definition translation pos collins oxford
296
- tag bnc frq exchange detail audio}.join(',')
297
-
298
- insert = "INSERT INTO ecdict (#{columns}) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?) "
299
-
300
- require 'csv'
301
- num = 0
302
- $DB.transaction
303
- CSV.foreach(ECDict::CSV) do |row|
304
- num = num + 1
305
- next if num == 1 # headers不加入
306
- row.map! do |n|
307
- r = n.nil?? '' : n
308
- r.gsub('\n',"\n") # 插入数据库时应直接转换为真的换行符
309
- end
310
- sw = strip_word(row[0])
311
- row.insert(1,sw)
312
- $DB.execute(insert,row)
313
- progress.increment!
314
- end
315
- $DB.commit
316
-
317
- rescue Exception => e
318
- puts "=> #{e.message}"
319
- puts "=> 数据库插入信息失败"
320
- close_db
321
- File.delete(ECDict::DB)
322
- return false
323
- end
324
- puts
325
- puts "=> 数据库#{ECDict::DB}已完整生成"
326
- close_db
327
- return true
328
- end
329
-
330
-
331
- def download_and_install_ecdict_data
332
- download_ecdict_raw_data || ( puts("\e[31m=> 下载失败,安装取消!\e[0m") || exit(-1) )
333
- decompress_7z_to_csv || ( puts("\e[31m=> 解压失败,安装取消!\e[0m") || exit(-2) )
334
- generate_sqlite_db || ( puts("\e[31m=> 生成数据库失败,安装取消!\e[0m") || exit(-3) )
335
- puts
336
- puts "Hooray! 词典数据下载安装完成! 请尝试使用:\n\n"
337
- puts " 1. ecdict word 查询单词 "
338
- puts " 2. ecdict -r 进入交互式查询REPL"
339
- puts " 3. ecdict -c 中文 搜索中文单词"
340
- puts " 4. ecdict -cp 中文 搜索中文短语"
341
- puts
342
- end
343
-
344
-
345
-
346
- ####################
347
- # ecdict cmd
348
- ####################
349
- #
350
- # Error code -1: No db or db not intact
351
- # Error code 1: No input
352
- # Error code 2: Doesn't find a result
353
- #
354
-
355
- #
356
- # Look up the dictionary db once a call
357
- #
358
- def reply_once(query)
359
-
360
- if !check_db_integrity
361
- puts "ecdict: 数据库不存在或数据库不完整,请使用`ecdict -i`来下载安装词典数据"
362
- close_db_and_exit(1)
363
- end
364
-
365
- tables = []
366
- $DB.execute "SELECT name FROM sqlite_master WHERE TYPE = 'table' AND name != 'sqlite_sequence'; " do |tbl|
367
- tables << tbl
368
- end
369
- # 每个表自己被数组包裹, [["gnuutils"], ["ruby"]]
370
- tables.flatten!
371
-
372
-
373
- rows = []
374
- tables.each do |t|
375
- $DB.execute "SELECT phonetic,translation,definition,exchange FROM #{t} WHERE word = '#{query}'" do |row|
376
- rows << row
377
- end
378
- end
379
-
380
- if rows.empty?
381
- puts "ecdict: 未找到结果,请检查单词拼写是否正确"
382
- close_db
383
- return
384
- else
385
- line = rows[0]
386
- phonetic,trans,definition,exchange = line[0],line[1],line[2],line[3]
387
- puts "#{query} [#{phonetic.strip.empty?? :nil : phonetic}]"
388
- puts
389
- puts "* 中文翻译"
390
- trans.split("\n").each {|t| puts "- #{t}"}
391
- puts
392
- if !definition.nil? && definition.strip != ""
393
- puts "* 英文释义"
394
- definition.split("\n").each {|t| puts "- #{t}"}
395
- puts
396
- end
397
- if !exchange.nil? && !exchange.strip.empty?
398
- puts "* 变化形式"
399
- exchange.split("/").each do |e|
400
- print "- "
401
- type,word = e.split(":")
402
- case type
403
- when ?p then print "过去式 : "
404
- when ?d then print "过去分词: "
405
- when ?i then print "现在分词: "
406
- when ?3 then print "第三人称: "
407
- when ?r then print "比较级 : "
408
- when ?t then print "最高级 : "
409
- when ?s then print "名词复数: "
410
- when ?0 then print "词根来源: "
411
- when ?1 then print "词根变化: "
412
- end
413
- puts word
414
- end
415
- end
416
- end
417
-
418
- $DB.close if $DB
419
- end
420
-
421
-
422
- #
423
- # Search Chinese word to find English words
424
- #
425
- def search_chinese(cn_word, support_phrase: false)
426
-
427
- if cn_word.empty?
428
- puts "ecdict: 请输入要查询的中文,或使用`ecdict -h`查看帮助"
429
- close_db_and_exit(2)
430
- end
431
- if !check_db_integrity
432
- puts "=> 数据库不存在或数据库不完整,请使用`ecdict -i`来下载安装词典数据"
433
- close_db_and_exit(1)
434
- end
435
-
436
- if support_phrase
437
- puts "ecdict: 搜索加强,包含短语"
438
- end
439
-
440
- tables = []
441
- $DB.execute "SELECT name FROM sqlite_master WHERE TYPE = 'table' AND name != 'sqlite_sequence'; " do |tbl|
442
- tables << tbl
443
- end
444
- tables.flatten!
445
-
446
- rows = []
447
- tables.each do |t|
448
- $DB.execute "SELECT word, translation FROM #{t} WHERE translation LIKE '%#{cn_word}%' " do |row|
449
- rows << row
450
- end
451
- end
452
- if rows.empty?
453
- puts "ecdict: 抱歉,未找到与之相关的英文"
454
- close_db
455
- return
456
- else
457
- # 有的时候并不是没有结果,而是被我们过滤掉了,对此确认一下
458
- found_a_word = false
459
-
460
- rows.each do |line|
461
- en_word,trans = line[0],line[1]
462
-
463
- # 过长的直接跳过
464
- next if trans.length > 35
465
- # 有换行符的太长,直接跳过
466
- next if trans.include?("\r\n")
467
- next if trans.include?("\n")
468
-
469
-
470
- if !support_phrase
471
- # 不要搜索词组
472
- next if en_word.include?(' ')
473
- # 不要搜索连字词
474
- next if en_word.include?('-')
475
- end
476
-
477
-
478
- # filter
479
- # "[网络] 微软,认证专家;微软认证产品专家;微软专家认证"
480
- trans_words1 = trans.split(',') # 英文逗号!!!
481
-
482
- trans_words = []
483
- trans_words1.each do |w|
484
- trans_words << w.split(';') # 中文分号!!!
485
- end
486
- # [ [] [] [] ]
487
- trans_words = trans_words.flatten
488
-
489
- ret = nil
490
- trans_words.each do |t|
491
- ret = t.split.index do
492
- # 必须以搜索的中文开头,最多容纳两个字符
493
- # _1 =~ /^#{cn_word}.{0,2}$/
494
-
495
- # 往往中文都是精确搜索
496
- _1 == cn_word
497
- end
498
- break if ret
499
- end
500
-
501
- if !ret.nil?
502
- found_a_word = true
503
- puts "#{blue(en_word)}: #{trans}"
504
- end
505
- end
506
-
507
- if found_a_word
508
- puts
509
- else
510
-
511
- if !support_phrase
512
- puts "ecdict: 扩大搜索范围,再次尝试搜索..."
513
- search_chinese(cn_word, support_phrase: true)
514
- else
515
- puts "ecdict: 抱歉,未找到与之相关的英文"
516
- end
517
-
518
- end
519
- # end of else
520
- end
521
-
522
- end
523
-
524
-
525
- ####################
526
- # ecdict REPL
527
- ####################
528
-
529
- #
530
- # Look up the dictionary db all the time in a not ending way,
531
- # that is, interactively as a REPL does.
532
- #
533
- # Notice the similar function `reply_once` above,
534
- # which only answer once.
535
- #
536
- def replying(query)
537
-
538
- answer = nil
539
- $DB.execute "SELECT phonetic,translation,definition,exchange FROM ecdict WHERE word = '#{query}'" do |row|
540
- answer = row
541
- end
542
- if answer.nil?
543
- puts "未找到结果,请检查单词拼写是否正确"
544
- else
545
- line = answer
546
- phonetic,trans,definition,exchange = line[0],line[1],line[2],line[3]
547
- puts "#{query} [#{phonetic.strip.empty?? :nil : phonetic}]"
548
- puts
549
- puts "* 中文翻译"
550
- trans.split("\n").each {|t| puts "- #{t}"}
551
- puts
552
- if !definition.nil? && definition.strip != ""
553
- puts "* 英文释义"
554
- definition.split("\n").each {|t| puts "- #{t}"}
555
- puts
556
- end
557
- if !exchange.nil? && !exchange.strip.empty?
558
- puts "* 变化形式"
559
- exchange.split("/").each do |e|
560
- print "- "
561
- type,word = e.split(":")
562
- case type
563
- when ?p then print "过去式 : "
564
- when ?d then print "过去分词: "
565
- when ?i then print "现在分词: "
566
- when ?3 then print "第三人称: "
567
- when ?r then print "比较级 : "
568
- when ?t then print "最高级 : "
569
- when ?s then print "名词复数: "
570
- when ?0 then print "词根来源: "
571
- when ?1 then print "词根变化: "
572
- end
573
- puts word
574
- end
575
- end
576
- end
577
- end
578
-
579
-
580
- #
581
- # Start ecrepl
582
- #
583
- def start_ecrepl
584
-
585
- if !check_db_integrity
586
- puts "ecdict: 数据库不存在或数据库不完整,请使用`ecdict -i`来下载安装词典数据"
587
- close_db_and_exit(1)
588
- end
589
-
590
- begin
591
- stty_save = `stty -g`.chomp
592
- rescue
593
- end
594
-
595
- #
596
- # completion
597
- #
598
- require 'ls_table'
599
- require 'reline'
600
- Reline.completion_proc = lambda do |word|
601
- if word.strip.empty?
602
- return %w[1.输入单词并回车查询含义
603
- 2.输入单词时按一次tab键反馈搜索建议
604
- 3.输入exit或按Ctrl-C或Ctrl-D退出]
605
- end
606
-
607
- max_len = word.length + 4
608
- puts
609
-
610
- # $DB.execute "SELECT DISTINCT sw FROM ecdict WHERE sw LIKE '#{word}%' AND length(sw)<#{max_len} LIMIT 12" { |row| puts row }
611
-
612
-
613
- # Display suggestions
614
- #
615
- # @params word [String] The word we search
616
- # @params ret [Array] The suggestions array returned
617
- def _display_suggestions(word, ret)
618
- return if ret.empty?
619
- if word.length <= 7
620
- LsTable.ls(ret) { puts blue(_1) }
621
- else
622
- LsTable.ls(ret, cell_len: 14, cell_num: 6) { puts blue(_1) }
623
- end
624
- puts
625
- end
626
-
627
- # Generate suggestions
628
- # 1. the words beginning with our search word
629
- # 2. the words including our search word
630
- #
631
- # @params word [String] The word we search
632
- # @params max_len [Integer] Suggestion's max length
633
- def _gen_suggestion_1(word, max_len)
634
- ret = $DB.execute <<-SQL
635
- SELECT DISTINCT sw FROM ecdict WHERE sw LIKE '#{word}%'
636
- AND length(sw)<#{max_len} LIMIT 64
637
- SQL
638
- # [["baba"], ["babe"], ["babn"], ["baby"]]
639
- ret = ret.to_a.flatten
640
- end
641
-
642
- def _gen_suggestion_2(word, max_len)
643
- ret = $DB.execute <<-SQL
644
- SELECT DISTINCT sw FROM ecdict WHERE sw LIKE '_%#{word}%'
645
- AND length(sw)<#{max_len} LIMIT 64
646
- SQL
647
- ret = ret.to_a.flatten
648
- end
649
-
650
- suggestions = [
651
- Thread.new {
652
- _display_suggestions word, _gen_suggestion_1(word, max_len)
653
- },
654
- Thread.new {
655
- _display_suggestions word, _gen_suggestion_2(word, max_len)
656
- }
657
- ].each(&:join)
658
-
659
- ""
660
- end
661
-
662
- #
663
- # main query
664
- #
665
- begin
666
- puts red("ECDict REPL (Ruby #{RUBY_VERSION} Powered)")
667
-
668
- while line = Reline.readline("\e[32mecdict> \e[0m", true)
669
- case word = line.chomp
670
- when 'exit'
671
- close_db
672
- return
673
- when ''
674
- # NOOP
675
- else
676
- replying(word)
677
- end
678
- end
679
- rescue Interrupt
680
- # puts '^C'
681
- `stty #{stty_save}` if stty_save
682
- close_db
683
- return
684
- end
685
-
686
- end
687
-
688
-
689
- #
690
- # remove everything in #{ECDict::STORAGE}
691
- #
692
- def delete_cache
693
- close_db
694
- begin
695
- FileUtils.rm_rf(ECDict::STORAGE)
696
- rescue => e
697
- puts "ecdict: #{e.message}"
698
- puts "ecdict: 清空词典数据失败"
699
- else
700
- puts "ecdict: 清空词典数据完成"
701
- end
702
- puts
703
- end
704
-
705
-
706
- ####################
707
- # others
708
- ####################
709
-
710
- def print_version
711
- puts <<EOH
712
- ecdict (v#{ECDict::VERSION}): A courteous cli translator.
713
- EOH
714
-
715
- end
716
-
717
-
718
- def help
719
- puts <<EOH
720
- ecdict (v#{ECDict::VERSION}): A courteous cli translator.
721
-
722
- usage:
723
-
724
- ecdict word => 查询单词word
725
- ecdict -r => 启动ecdict repl交互式查询,输入exit或Ctrl-C/D退出
726
- ecdict -c 中文 => 搜索中文单词对应的英文单词/短语
727
- ecdict -i => 下载ecdict词典数据并安装
728
- ecdict -v => 打印此Gem版本号
729
- ecdict -h => 打印此帮助
730
- ecdict -d => 清空词典数据
731
-
732
- EOH
733
-
734
- end
735
-
736
-
737
- ####################
738
- # main
739
- ####################
740
-
741
- if ARGV.empty?
742
- if !check_db_integrity
743
- puts "ecdict: 数据库不存在或数据库不完整,请使用`ecdict -i`来下载安装词典数据"
744
- close_db_and_exit(1)
745
- end
746
- puts "ecdict: 请输入要查询的词/词组 或使用`ecdict -h`查看帮助"
747
- close_db_and_exit(2)
748
- end
749
-
750
- query = ARGV.shift
751
- case query
752
- when "-v" then print_version
753
- when "-h" then help
754
- when "-i" then download_and_install_ecdict_data
755
- when "-r" then start_ecrepl
756
- when "-d" then delete_cache
757
- when "-c" then search_chinese(ARGV.join)
758
- else
759
- reply_once(query)
760
- end
761
-
762
- # ensure close db
763
- close_db
1
+ #!/usr/bin/env ruby
2
+ # ------------------------------------------------------
3
+ # File : ecdict.rb
4
+ # Authors : ccmywish <ccmywish@qq.com>
5
+ # Created on : <2020-10-18>
6
+ # Last modified : <2022-04-30>
7
+ #
8
+ # ecdict:
9
+ #
10
+ # The command line dict using ECDICT.
11
+ #
12
+ # ECDICT: https://github.com/skywind3000/ECDICT
13
+ #
14
+ # This gem consists of:
15
+ # 1. ecdict installer
16
+ # 2. ecdict cmd
17
+ # 3. ecdict repl
18
+ #
19
+ # Exit -1: 下载原始数据失败
20
+ # Exit -2: 解压原始数据至CSV失败
21
+ # EXit -3: 从CSV生成数据库失败
22
+ # Exit 1: 无词典
23
+ # Exit 2: 缺少参数
24
+ #
25
+ # 未搜索到结果并不认为是错误
26
+ # ------------------------------------------------------
27
+
28
+ require 'sqlite3'
29
+ require 'ecdict'
30
+
31
+ ECDict::STORAGE = File.expand_path("~/.local/share/ecdict")
32
+ ECDict::RAW_DATA = File.join(ECDict::STORAGE, 'ecdict-csv.7z')
33
+
34
+ ECDict::DB = File.join(ECDict::STORAGE, 'ecdict.db')
35
+ ECDict::CSV = File.join(ECDict::STORAGE, 'ecdict.csv')
36
+
37
+ # 这个版本是我从上游下载的CSV后自己压缩的
38
+ # 这个文件的实际创建日期(上游作者创建)是2017/06/04
39
+ # 至2022/03/22,未见更新,估计以后上游维护这个词典的也不会经常更新
40
+ # 而340万的单词量已经足够,因此我们可以足够长的时间一直停留在此词典版本
41
+ ECDict::DICT_DOWNLOAD_LINK = "https://gitee.com/ccmywish/ecdict-data"
42
+ ECDict::DICT_SHA256 = "86782a0e5d05486b482be5a2e5fa99b1a9b2ae51240c853ecfe181886133818a"
43
+
44
+
45
+ require 'fileutils'
46
+ FileUtils.mkdir_p(ECDict::STORAGE)
47
+ $DB = SQLite3::Database.new ECDict::DB
48
+
49
+ def close_db
50
+ $DB.close if $DB
51
+ end
52
+
53
+ def close_db_and_exit(num = 0)
54
+ close_db
55
+ exit num
56
+ end
57
+
58
+
59
+
60
+ ####################
61
+ # helper: for color
62
+ ####################
63
+
64
+ def bold(str) "\e[1m#{str}\e[0m" end
65
+ def underline(str) "\e[4m#{str}\e[0m" end
66
+ def red(str) "\e[31m#{str}\e[0m" end
67
+ def green(str) "\e[32m#{str}\e[0m" end
68
+ def yellow(str) "\e[33m#{str}\e[0m" end
69
+ def blue(str) "\e[34m#{str}\e[0m" end
70
+ def purple(str) "\e[35m#{str}\e[0m" end
71
+ def cyan(str) "\e[36m#{str}\e[0m" end
72
+
73
+
74
+
75
+ ##########################
76
+ # db download and install
77
+ ##########################
78
+ #
79
+ # 1. Download .7z file from Gitee
80
+ # 2. Decompress the .7z file to CSV file
81
+ # 3. Transform CSV file into database
82
+ #
83
+
84
+ def check_download_integrity
85
+ require 'digest'
86
+ return false if !File.exist?(ECDict::RAW_DATA)
87
+ sha256 = Digest::SHA256.file(ECDict::RAW_DATA).hexdigest
88
+ if ECDict::DICT_SHA256 == sha256
89
+ true
90
+ else
91
+ false
92
+ end
93
+ end
94
+
95
+
96
+ def download_ecdict_raw_data
97
+
98
+ if File.exist?(ECDict::RAW_DATA) && check_download_integrity
99
+ puts "=> 已存在原始数据文件#{ECDict::RAW_DATA}, 无需再次下载"
100
+ else
101
+
102
+ if File.exist?(ECDict::RAW_DATA)
103
+ puts "=> 删除已存在但不完整的原始数据文件"
104
+ FileUtils.rm ECDict::RAW_DATA
105
+ end
106
+
107
+ # 若已经有拉取的仓库,先检查里面的数据是否可用
108
+ raw_data_git_dir = File.join(ECDict::STORAGE, 'ecdict-data')
109
+ raw_data_git_dir_data = File.join(raw_data_git_dir, 'ecdict-csv.7z')
110
+
111
+ if Dir.exist?(raw_data_git_dir)
112
+ if File.exist?(raw_data_git_dir_data)
113
+ FileUtils.cp(raw_data_git_dir_data, ECDict::STORAGE)
114
+ if check_download_integrity
115
+ puts "=> 从已经Git pull到的仓库中获取原始数据文件"
116
+ return true
117
+ else
118
+ FileUtils.rm ECDict::RAW_DATA
119
+ end
120
+ end
121
+ FileUtils.rm_rf(raw_data_git_dir)
122
+ end
123
+
124
+ begin
125
+ puts "=> 使用Git从#{ECDict::DICT_DOWNLOAD_LINK}获取原始数据库文件"
126
+ ret = system("git -C #{ECDict::STORAGE} clone #{ECDict::DICT_DOWNLOAD_LINK} ")
127
+ FileUtils.cp(File.join(ECDict::STORAGE, 'ecdict-data', 'ecdict-csv.7z'), ECDict::STORAGE) rescue nil
128
+ raise "Git拉取仓库过程失败,原始数据文件不完整!" if (ret != true || !check_download_integrity)
129
+ rescue StandardError => e
130
+ puts "=> #{e.message}"
131
+ FileUtils.rm(ECDict::RAW_DATA) rescue nil
132
+ return false
133
+ else
134
+ puts "=> 下载完成!"
135
+ end
136
+ end
137
+ true
138
+ end
139
+
140
+
141
+ def decompress_7z_to_csv()
142
+ require 'seven_zip_ruby'
143
+
144
+ # 正常解压出来的CSV应当至少200MB以上
145
+ if File.exist?(ECDict::CSV)
146
+ if File.size(ECDict::CSV) <= 200*1024*1024
147
+ puts "=> 删除旧有的不完全词典数据"
148
+ FileUtils.rm(ECDict::CSV)
149
+ else
150
+ puts "=> CSV数据已存在,无需再次解压,直接使用"
151
+ return true
152
+ end
153
+ end
154
+
155
+
156
+ File.open(ECDict::RAW_DATA, "rb") do |file|
157
+ puts "=> 正在解压ecdict-csv.7z(#{ '%.2f' % (file.size/1024.0/1024) }MB)"
158
+ SevenZipRuby::Reader.extract_all(file, ECDict::STORAGE)
159
+ end
160
+
161
+ puts "=> 已成功解压出ecdict.csv(#{ '%.2f' % (File.size(ECDict::CSV)/1024.0/1024) }MB)"
162
+ true
163
+ end
164
+
165
+
166
+ #
167
+ # 检查生成出来的db的完整性
168
+ # 不太方便检查,只能粗略的查看大小
169
+ #
170
+ def check_db_integrity
171
+ # 正常从CSV生成出来的数据库应当至少400MB以上
172
+ if File.size(ECDict::DB) <= 400*1024*1024
173
+ false
174
+ else
175
+ true
176
+ end
177
+ end
178
+
179
+
180
+ #
181
+ # This is the Ruby versioned [stardict.py] from
182
+ # https://github.com/skywind3000/ECDICT
183
+ #
184
+ # It generates [ecdict.db] in 3 steps:
185
+ #
186
+ # 1. create new and empty db file
187
+ # 2. create table in the db
188
+ # 3. insert data from CSV into the table
189
+ #
190
+ def generate_sqlite_db()
191
+
192
+ # SQLite3::Database.new已经生成了该文件,所以需要提前判断到底是否存在
193
+ if File.exist?(ECDict::DB)
194
+
195
+ if !check_db_integrity
196
+ puts "=> 删除旧有的不完整数据库"
197
+ $DB.close
198
+ FileUtils.rm(ECDict::DB)
199
+ $DB = SQLite3::Database.new ECDict::DB
200
+ else
201
+ puts "=> 完整(可能的)数据库已存在,无需再次从CSV文件生成,直接使用"
202
+ return true
203
+ end
204
+ end
205
+
206
+
207
+ # 共15个字段,存的时候只需要14个
208
+ # 1.id 主键,自增
209
+ # 2.sw strip word(删除非alnum字符)
210
+ # 3.word 单词名称
211
+ # 4.phonetic 音标,以英语英标为主
212
+ # 5.definition 单词释义(英文),每行一个释义
213
+ # 6.translation 单词释义(中文),每行一个释义
214
+ # 7.pos 词语位置,用 "/" 分割不同位置
215
+ # 8.collins 柯林斯星级
216
+ # 9.oxford 是否是牛津三千核心词汇
217
+ # 10.tag 字符串标签:zk/中考,gk/高考,cet4/四级 等等标签,空格分割
218
+ # 11.bnc 英国国家语料库词频顺序
219
+ # 12.frq 当代语料库词频顺序
220
+ # 13.exchange 时态复数等变换,使用 "/" 分割不同项目
221
+ # 14.detail json 扩展信息,字典形式保存例句(待添加)
222
+ # 15.audio 读音音频 url (待添加)
223
+
224
+ sql = <<-EOF
225
+ CREATE TABLE IF NOT EXISTS "ecdict" (
226
+ "id" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL UNIQUE,
227
+ "word" VARCHAR(64) COLLATE NOCASE NOT NULL UNIQUE,
228
+ "sw" VARCHAR(64) COLLATE NOCASE NOT NULL,
229
+ "phonetic" VARCHAR(64),
230
+ "definition" TEXT,
231
+ "translation" TEXT,
232
+ "pos" VARCHAR(16),
233
+ "collins" INTEGER DEFAULT(0),
234
+ "oxford" INTEGER DEFAULT(0),
235
+ "tag" VARCHAR(64),
236
+ "bnc" INTEGER DEFAULT(NULL),
237
+ "frq" INTEGER DEFAULT(NULL),
238
+ "exchange" TEXT,
239
+ "detail" TEXT,
240
+ "audio" TEXT
241
+ );
242
+ CREATE UNIQUE INDEX IF NOT EXISTS "ecdict_1" ON ecdict (id);
243
+ CREATE UNIQUE INDEX IF NOT EXISTS "ecdict_2" ON ecdict (word);
244
+ CREATE INDEX IF NOT EXISTS "ecdict_3" ON ecdict (sw, word collate nocase);
245
+ CREATE INDEX IF NOT EXISTS "ecd_1" ON ecdict (word collate nocase);
246
+ EOF
247
+
248
+
249
+ begin
250
+ $DB.execute sql
251
+ puts "=> 创建数据库文件#{ECDict::DB},并生成表`ecdict`"
252
+ rescue Exception => e
253
+ puts "=> #{e.message}"
254
+ puts "=> 创建数据库文件#{ECDict::DB}失败,或表`ecdict`生成失败"
255
+ close_db
256
+ File.delete(ECDict::DB)
257
+ return false
258
+ end
259
+
260
+
261
+ # 这是生成数据库中sw字段的函数,用于模糊匹配,csv中并不包含此字段
262
+ def strip_word(word)
263
+ word.gsub(/\W/,'').downcase
264
+ end
265
+
266
+ your_machine = case RUBY_PLATFORM
267
+ when /linux/i then "linux"
268
+ when /ucrt/i, /mingw/i then "windows"
269
+ when /mac/i then "mac"
270
+ when /bsd/i then "bsd"
271
+ else "machine"
272
+ end
273
+
274
+ begin
275
+ puts <<~EOC
276
+
277
+ #{blue("gem_name = 'ecdict'")}
278
+ #{blue("version = '#{ECDict::VERSION}'")}
279
+ #{blue("author = 'ccmywish'")}
280
+ #{blue("bug_track = [ 'https://gitee.com/ccmywish/ecdict/issues'
281
+ 'https://github.com/ccmywish/ecdict/issues ]")}
282
+
283
+ #{green("if")} #{purple("your_#{your_machine}_is_good")}
284
+ wait n =~ #{red('3min or less')}
285
+ #{green("end")}
286
+
287
+ #{blue(bold("the_\#{author}_is_busily_inserting_data_for_you(...)"))}
288
+
289
+ EOC
290
+
291
+ require 'progress_bar'
292
+ # progress_bar的bar在Windows上显示有问题
293
+ progress = ProgressBar.new(3402560,:counter,:percentage,:elapsed)
294
+
295
+ columns = %w{word sw phonetic definition translation pos collins oxford
296
+ tag bnc frq exchange detail audio}.join(',')
297
+
298
+ insert = "INSERT INTO ecdict (#{columns}) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?) "
299
+
300
+ require 'csv'
301
+ num = 0
302
+ $DB.transaction
303
+ CSV.foreach(ECDict::CSV) do |row|
304
+ num = num + 1
305
+ next if num == 1 # headers不加入
306
+ row.map! do |n|
307
+ r = n.nil?? '' : n
308
+ r.gsub('\n',"\n") # 插入数据库时应直接转换为真的换行符
309
+ end
310
+ sw = strip_word(row[0])
311
+ row.insert(1,sw)
312
+ $DB.execute(insert,row)
313
+ progress.increment!
314
+ end
315
+ $DB.commit
316
+
317
+ rescue Exception => e
318
+ puts "=> #{e.message}"
319
+ puts "=> 数据库插入信息失败"
320
+ close_db
321
+ File.delete(ECDict::DB)
322
+ return false
323
+ end
324
+ puts
325
+ puts "=> 数据库#{ECDict::DB}已完整生成"
326
+ close_db
327
+ return true
328
+ end
329
+
330
+
331
+ def download_and_install_ecdict_data
332
+ download_ecdict_raw_data || ( puts("\e[31m=> 下载失败,安装取消!\e[0m") || exit(-1) )
333
+ decompress_7z_to_csv || ( puts("\e[31m=> 解压失败,安装取消!\e[0m") || exit(-2) )
334
+ generate_sqlite_db || ( puts("\e[31m=> 生成数据库失败,安装取消!\e[0m") || exit(-3) )
335
+ puts
336
+ puts "Hooray! 词典数据下载安装完成! 请尝试使用:\n\n"
337
+ puts " 1. ecdict word 查询单词 "
338
+ puts " 2. ecdict -r 进入交互式查询REPL"
339
+ puts " 3. ecdict -c 中文 搜索中文单词"
340
+ puts " 4. ecdict -cp 中文 搜索中文短语"
341
+ puts
342
+ end
343
+
344
+
345
+
346
+ ####################
347
+ # ecdict cmd
348
+ ####################
349
+ #
350
+ # Error code -1: No db or db not intact
351
+ # Error code 1: No input
352
+ # Error code 2: Doesn't find a result
353
+ #
354
+
355
+ #
356
+ # Look up the dictionary db once a call
357
+ #
358
+ def reply_once(query)
359
+
360
+ if !check_db_integrity
361
+ puts "ecdict: 数据库不存在或数据库不完整,请使用`ecdict -i`来下载安装词典数据"
362
+ close_db_and_exit(1)
363
+ end
364
+
365
+ tables = []
366
+ $DB.execute "SELECT name FROM sqlite_master WHERE TYPE = 'table' AND name != 'sqlite_sequence'; " do |tbl|
367
+ tables << tbl
368
+ end
369
+ # 每个表自己被数组包裹, [["gnuutils"], ["ruby"]]
370
+ tables.flatten!
371
+
372
+
373
+ rows = []
374
+ tables.each do |t|
375
+ $DB.execute "SELECT phonetic,translation,definition,exchange FROM #{t} WHERE word = '#{query}'" do |row|
376
+ rows << row
377
+ end
378
+ end
379
+
380
+ if rows.empty?
381
+ puts "ecdict: 未找到结果,请检查单词拼写是否正确"
382
+ close_db
383
+ return
384
+ else
385
+ line = rows[0]
386
+ phonetic,trans,definition,exchange = line[0],line[1],line[2],line[3]
387
+ puts "#{query} [#{phonetic.strip.empty?? :nil : phonetic}]"
388
+ puts
389
+ puts "* 中文翻译"
390
+ trans.split("\n").each {|t| puts "- #{t}"}
391
+ puts
392
+ if !definition.nil? && definition.strip != ""
393
+ puts "* 英文释义"
394
+ definition.split("\n").each {|t| puts "- #{t}"}
395
+ puts
396
+ end
397
+ if !exchange.nil? && !exchange.strip.empty?
398
+ puts "* 变化形式"
399
+ exchange.split("/").each do |e|
400
+ print "- "
401
+ type,word = e.split(":")
402
+ case type
403
+ when ?p then print "过去式 : "
404
+ when ?d then print "过去分词: "
405
+ when ?i then print "现在分词: "
406
+ when ?3 then print "第三人称: "
407
+ when ?r then print "比较级 : "
408
+ when ?t then print "最高级 : "
409
+ when ?s then print "名词复数: "
410
+ when ?0 then print "词根来源: "
411
+ when ?1 then print "词根变化: "
412
+ end
413
+ puts word
414
+ end
415
+ end
416
+ end
417
+
418
+ $DB.close if $DB
419
+ end
420
+
421
+
422
+ #
423
+ # Search Chinese word to find English words
424
+ #
425
+ def search_chinese(cn_word, support_phrase: false)
426
+
427
+ if cn_word.empty?
428
+ puts "ecdict: 请输入要查询的中文,或使用`ecdict -h`查看帮助"
429
+ close_db_and_exit(2)
430
+ end
431
+ if !check_db_integrity
432
+ puts "=> 数据库不存在或数据库不完整,请使用`ecdict -i`来下载安装词典数据"
433
+ close_db_and_exit(1)
434
+ end
435
+
436
+ if support_phrase
437
+ puts "ecdict: 搜索加强,包含短语"
438
+ end
439
+
440
+ tables = []
441
+ $DB.execute "SELECT name FROM sqlite_master WHERE TYPE = 'table' AND name != 'sqlite_sequence'; " do |tbl|
442
+ tables << tbl
443
+ end
444
+ tables.flatten!
445
+
446
+ rows = []
447
+ tables.each do |t|
448
+ $DB.execute "SELECT word, translation FROM #{t} WHERE translation LIKE '%#{cn_word}%' " do |row|
449
+ rows << row
450
+ end
451
+ end
452
+ if rows.empty?
453
+ puts "ecdict: 抱歉,未找到与之相关的英文"
454
+ close_db
455
+ return
456
+ else
457
+ # 有的时候并不是没有结果,而是被我们过滤掉了,对此确认一下
458
+ found_a_word = false
459
+
460
+ rows.each do |line|
461
+ en_word,trans = line[0],line[1]
462
+
463
+ # 过长的直接跳过
464
+ next if trans.length > 35
465
+ # 有换行符的太长,直接跳过
466
+ next if trans.include?("\r\n")
467
+ next if trans.include?("\n")
468
+
469
+
470
+ if !support_phrase
471
+ # 不要搜索词组
472
+ next if en_word.include?(' ')
473
+ # 不要搜索连字词
474
+ next if en_word.include?('-')
475
+ end
476
+
477
+
478
+ # filter
479
+ # "[网络] 微软,认证专家;微软认证产品专家;微软专家认证"
480
+ trans_words1 = trans.split(',') # 英文逗号!!!
481
+
482
+ trans_words = []
483
+ trans_words1.each do |w|
484
+ trans_words << w.split(';') # 中文分号!!!
485
+ end
486
+ # [ [] [] [] ]
487
+ trans_words = trans_words.flatten
488
+
489
+ ret = nil
490
+ trans_words.each do |t|
491
+ ret = t.split.index do
492
+ # 必须以搜索的中文开头,最多容纳两个字符
493
+ # _1 =~ /^#{cn_word}.{0,2}$/
494
+
495
+ # 往往中文都是精确搜索
496
+ _1 == cn_word
497
+ end
498
+ break if ret
499
+ end
500
+
501
+ if !ret.nil?
502
+ found_a_word = true
503
+ puts "#{blue(en_word)}: #{trans}"
504
+ end
505
+ end
506
+
507
+ if found_a_word
508
+ puts
509
+ else
510
+
511
+ if !support_phrase
512
+ puts "ecdict: 扩大搜索范围,再次尝试搜索..."
513
+ search_chinese(cn_word, support_phrase: true)
514
+ else
515
+ puts "ecdict: 抱歉,未找到与之相关的英文"
516
+ end
517
+
518
+ end
519
+ # end of else
520
+ end
521
+
522
+ end
523
+
524
+
525
+ ####################
526
+ # ecdict REPL
527
+ ####################
528
+
529
+ #
530
+ # Look up the dictionary db all the time in a not ending way,
531
+ # that is, interactively as a REPL does.
532
+ #
533
+ # Notice the similar function `reply_once` above,
534
+ # which only answer once.
535
+ #
536
+ def replying(query)
537
+
538
+ answer = nil
539
+ $DB.execute "SELECT phonetic,translation,definition,exchange FROM ecdict WHERE word = '#{query}'" do |row|
540
+ answer = row
541
+ end
542
+ if answer.nil?
543
+ puts "未找到结果,请检查单词拼写是否正确"
544
+ else
545
+ line = answer
546
+ phonetic,trans,definition,exchange = line[0],line[1],line[2],line[3]
547
+ puts "#{query} [#{phonetic.strip.empty?? :nil : phonetic}]"
548
+ puts
549
+ puts "* 中文翻译"
550
+ trans.split("\n").each {|t| puts "- #{t}"}
551
+ puts
552
+ if !definition.nil? && definition.strip != ""
553
+ puts "* 英文释义"
554
+ definition.split("\n").each {|t| puts "- #{t}"}
555
+ puts
556
+ end
557
+ if !exchange.nil? && !exchange.strip.empty?
558
+ puts "* 变化形式"
559
+ exchange.split("/").each do |e|
560
+ print "- "
561
+ type,word = e.split(":")
562
+ case type
563
+ when ?p then print "过去式 : "
564
+ when ?d then print "过去分词: "
565
+ when ?i then print "现在分词: "
566
+ when ?3 then print "第三人称: "
567
+ when ?r then print "比较级 : "
568
+ when ?t then print "最高级 : "
569
+ when ?s then print "名词复数: "
570
+ when ?0 then print "词根来源: "
571
+ when ?1 then print "词根变化: "
572
+ end
573
+ puts word
574
+ end
575
+ end
576
+ end
577
+ end
578
+
579
+
580
+ #
581
+ # Start ecrepl
582
+ #
583
+ def start_ecrepl
584
+
585
+ if !check_db_integrity
586
+ puts "ecdict: 数据库不存在或数据库不完整,请使用`ecdict -i`来下载安装词典数据"
587
+ close_db_and_exit(1)
588
+ end
589
+
590
+ begin
591
+ stty_save = `stty -g`.chomp
592
+ rescue
593
+ end
594
+
595
+ #
596
+ # completion
597
+ #
598
+ require 'ls_table'
599
+ require 'reline'
600
+ Reline.completion_proc = lambda do |word|
601
+ if word.strip.empty?
602
+ return %w[1.输入单词并回车查询含义
603
+ 2.输入单词时按一次tab键反馈搜索建议
604
+ 3.输入exit或按Ctrl-C或Ctrl-D退出]
605
+ end
606
+
607
+ max_len = word.length + 4
608
+ puts
609
+
610
+ # $DB.execute "SELECT DISTINCT sw FROM ecdict WHERE sw LIKE '#{word}%' AND length(sw)<#{max_len} LIMIT 12" { |row| puts row }
611
+
612
+
613
+ # Display suggestions
614
+ #
615
+ # @params word [String] The word we search
616
+ # @params ret [Array] The suggestions array returned
617
+ def _display_suggestions(word, ret)
618
+ return if ret.empty?
619
+ if word.length <= 7
620
+ LsTable.ls(ret) { puts blue(_1) }
621
+ else
622
+ LsTable.ls(ret, cell_len: 14, cell_num: 6) { puts blue(_1) }
623
+ end
624
+ puts
625
+ end
626
+
627
+ # Generate suggestions
628
+ # 1. the words beginning with our search word
629
+ # 2. the words including our search word
630
+ #
631
+ # @params word [String] The word we search
632
+ # @params max_len [Integer] Suggestion's max length
633
+ def _gen_suggestion_1(word, max_len)
634
+ ret = $DB.execute <<-SQL
635
+ SELECT DISTINCT sw FROM ecdict WHERE sw LIKE '#{word}%'
636
+ AND length(sw)<#{max_len} LIMIT 64
637
+ SQL
638
+ # [["baba"], ["babe"], ["babn"], ["baby"]]
639
+ ret = ret.to_a.flatten
640
+ end
641
+
642
+ def _gen_suggestion_2(word, max_len)
643
+ ret = $DB.execute <<-SQL
644
+ SELECT DISTINCT sw FROM ecdict WHERE sw LIKE '_%#{word}%'
645
+ AND length(sw)<#{max_len} LIMIT 64
646
+ SQL
647
+ ret = ret.to_a.flatten
648
+ end
649
+
650
+ suggestions = [
651
+ Thread.new {
652
+ _display_suggestions word, _gen_suggestion_1(word, max_len)
653
+ },
654
+ Thread.new {
655
+ _display_suggestions word, _gen_suggestion_2(word, max_len)
656
+ }
657
+ ].each(&:join)
658
+
659
+ ""
660
+ end
661
+
662
+ #
663
+ # main query
664
+ #
665
+ begin
666
+ puts red("ECDict REPL (Ruby #{RUBY_VERSION} Powered)")
667
+
668
+ while line = Reline.readline("\e[32mecdict> \e[0m", true)
669
+ case word = line.chomp
670
+ when 'exit'
671
+ close_db
672
+ return
673
+ when ''
674
+ # NOOP
675
+ else
676
+ replying(word)
677
+ end
678
+ end
679
+ rescue Interrupt
680
+ # puts '^C'
681
+ `stty #{stty_save}` if stty_save
682
+ close_db
683
+ return
684
+ end
685
+
686
+ end
687
+
688
+
689
+ #
690
+ # remove everything in #{ECDict::STORAGE}
691
+ #
692
+ def delete_cache
693
+ close_db
694
+ begin
695
+ FileUtils.rm_rf(ECDict::STORAGE)
696
+ rescue => e
697
+ puts "ecdict: #{e.message}"
698
+ puts "ecdict: 清空词典数据失败"
699
+ else
700
+ puts "ecdict: 清空词典数据完成"
701
+ end
702
+ puts
703
+ end
704
+
705
+
706
+ ####################
707
+ # others
708
+ ####################
709
+
710
+ def print_version
711
+ puts <<EOH
712
+ ecdict (v#{ECDict::VERSION}): A courteous cli translator.
713
+ EOH
714
+
715
+ end
716
+
717
+
718
+ def help
719
+ puts <<EOH
720
+ ecdict (v#{ECDict::VERSION}): A courteous cli translator.
721
+
722
+ usage:
723
+
724
+ ecdict word => 查询单词word
725
+ ecdict -r => 启动ecdict repl交互式查询,输入exit或Ctrl-C/D退出
726
+ ecdict -c 中文 => 搜索中文单词对应的英文单词/短语
727
+ ecdict -i => 下载ecdict词典数据并安装
728
+ ecdict -v => 打印此Gem版本号
729
+ ecdict -h => 打印此帮助
730
+ ecdict -d => 清空词典数据
731
+
732
+ EOH
733
+
734
+ end
735
+
736
+
737
+ ####################
738
+ # main
739
+ ####################
740
+
741
+ if ARGV.empty?
742
+ if !check_db_integrity
743
+ puts "ecdict: 数据库不存在或数据库不完整,请使用`ecdict -i`来下载安装词典数据"
744
+ close_db_and_exit(1)
745
+ end
746
+ puts "ecdict: 请输入要查询的词/词组 或使用`ecdict -h`查看帮助"
747
+ close_db_and_exit(2)
748
+ end
749
+
750
+ query = ARGV.shift
751
+ case query
752
+ when "-v" then print_version
753
+ when "-h" then help
754
+ when "-i" then download_and_install_ecdict_data
755
+ when "-r" then start_ecrepl
756
+ when "-d" then delete_cache
757
+ when "-c" then search_chinese(ARGV.join)
758
+ else
759
+ reply_once(query)
760
+ end
761
+
762
+ # ensure close db
763
+ close_db