ecdict 1.3.0 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/bin/ecdict +763 -762
  3. data/lib/ecdict.rb +16 -0
  4. metadata +7 -6
data/bin/ecdict CHANGED
@@ -1,762 +1,763 @@
1
- #!/usr/bin/env ruby
2
- # ------------------------------------------------------
3
- # File : ecdict.rb
4
- # Authors : ccmywish <ccmywish@qq.com>
5
- # Created on : <2020-10-18>
6
- # Last modified : <2022-04-29>
7
- #
8
- # ecdict:
9
- #
10
- # The command line dict using ECDICT.
11
- #
12
- # ECDICT: https://github.com/skywind3000/ECDICT
13
- #
14
- # This gem consists of:
15
- # 1. ecdict installer
16
- # 2. ecdict cmd
17
- # 3. ecdict repl
18
- #
19
- # Exit -1: 下载原始数据失败
20
- # Exit -2: 解压原始数据至CSV失败
21
- # EXit -3: 从CSV生成数据库失败
22
- # Exit 1: 无词典
23
- # Exit 2: 缺少参数
24
- #
25
- # 未搜索到结果并不认为是错误
26
- # ------------------------------------------------------
27
-
28
- require 'sqlite3'
29
-
30
- ECDict::STORAGE = File.expand_path("~/.local/share/ecdict")
31
- ECDict::RAW_DATA = File.join(ECDict::STORAGE, 'ecdict-csv.7z')
32
-
33
- ECDict::DB = File.join(ECDict::STORAGE, 'ecdict.db')
34
- ECDict::CSV = File.join(ECDict::STORAGE, 'ecdict.csv')
35
-
36
- # 这个版本是我从上游下载的CSV后自己压缩的
37
- # 这个文件的实际创建日期(上游作者创建)是2017/06/04
38
- # 至2022/03/22,未见更新,估计以后上游维护这个词典的也不会经常更新
39
- # 而340万的单词量已经足够,因此我们可以足够长的时间一直停留在此词典版本
40
- ECDict::DICT_DOWNLOAD_LINK = "https://gitee.com/ccmywish/ecdict-data"
41
- ECDict::DICT_SHA256 = "86782a0e5d05486b482be5a2e5fa99b1a9b2ae51240c853ecfe181886133818a"
42
-
43
-
44
- require 'fileutils'
45
- FileUtils.mkdir_p(ECDict::STORAGE)
46
- $DB = SQLite3::Database.new ECDict::DB
47
-
48
- def close_db
49
- $DB.close if $DB
50
- end
51
-
52
- def close_db_and_exit(num = 0)
53
- close_db
54
- exit num
55
- end
56
-
57
-
58
-
59
- ####################
60
- # helper: for color
61
- ####################
62
-
63
- def bold(str) "\e[1m#{str}\e[0m" end
64
- def underline(str) "\e[4m#{str}\e[0m" end
65
- def red(str) "\e[31m#{str}\e[0m" end
66
- def green(str) "\e[32m#{str}\e[0m" end
67
- def yellow(str) "\e[33m#{str}\e[0m" end
68
- def blue(str) "\e[34m#{str}\e[0m" end
69
- def purple(str) "\e[35m#{str}\e[0m" end
70
- def cyan(str) "\e[36m#{str}\e[0m" end
71
-
72
-
73
-
74
- ##########################
75
- # db download and install
76
- ##########################
77
- #
78
- # 1. Download .7z file from Gitee
79
- # 2. Decompress the .7z file to CSV file
80
- # 3. Transform CSV file into database
81
- #
82
-
83
- def check_download_integrity
84
- require 'digest'
85
- return false if !File.exists?(ECDict::RAW_DATA)
86
- sha256 = Digest::SHA256.file(ECDict::RAW_DATA).hexdigest
87
- if ECDict::DICT_SHA256 == sha256
88
- true
89
- else
90
- false
91
- end
92
- end
93
-
94
-
95
- def download_ecdict_raw_data
96
-
97
- if File.exists?(ECDict::RAW_DATA) && check_download_integrity
98
- puts "=> 已存在原始数据文件#{ECDict::RAW_DATA}, 无需再次下载"
99
- else
100
-
101
- if File.exists?(ECDict::RAW_DATA)
102
- puts "=> 删除已存在但不完整的原始数据文件"
103
- FileUtils.rm ECDict::RAW_DATA
104
- end
105
-
106
- # 若已经有拉取的仓库,先检查里面的数据是否可用
107
- raw_data_git_dir = File.join(ECDict::STORAGE, 'ecdict-data')
108
- raw_data_git_dir_data = File.join(raw_data_git_dir, 'ecdict-csv.7z')
109
-
110
- if Dir.exists?(raw_data_git_dir)
111
- if File.exists?(raw_data_git_dir_data)
112
- FileUtils.cp(raw_data_git_dir_data, ECDict::STORAGE)
113
- if check_download_integrity
114
- puts "=> 从已经Git pull到的仓库中获取原始数据文件"
115
- return true
116
- else
117
- FileUtils.rm ECDict::RAW_DATA
118
- end
119
- end
120
- FileUtils.rm_rf(raw_data_git_dir)
121
- end
122
-
123
- begin
124
- puts "=> 使用Git从#{ECDict::DICT_DOWNLOAD_LINK}获取原始数据库文件"
125
- ret = system("git -C #{ECDict::STORAGE} clone #{ECDict::DICT_DOWNLOAD_LINK} ")
126
- FileUtils.cp(File.join(ECDict::STORAGE, 'ecdict-data', 'ecdict-csv.7z'), ECDict::STORAGE) rescue nil
127
- raise "Git拉取仓库过程失败,原始数据文件不完整!" if (ret != true || !check_download_integrity)
128
- rescue StandardError => e
129
- puts "=> #{e.message}"
130
- FileUtils.rm(ECDict::RAW_DATA) rescue nil
131
- return false
132
- else
133
- puts "=> 下载完成!"
134
- end
135
- end
136
- true
137
- end
138
-
139
-
140
- def decompress_7z_to_csv()
141
- require 'seven_zip_ruby'
142
-
143
- # 正常解压出来的CSV应当至少200MB以上
144
- if File.exists?(ECDict::CSV)
145
- if File.size(ECDict::CSV) <= 200*1024*1024
146
- puts "=> 删除旧有的不完全词典数据"
147
- FileUtils.rm(ECDict::CSV)
148
- else
149
- puts "=> CSV数据已存在,无需再次解压,直接使用"
150
- return true
151
- end
152
- end
153
-
154
-
155
- File.open(ECDict::RAW_DATA, "rb") do |file|
156
- puts "=> 正在解压ecdict-csv.7z(#{ '%.2f' % (file.size/1024.0/1024) }MB)"
157
- SevenZipRuby::Reader.extract_all(file, ECDict::STORAGE)
158
- end
159
-
160
- puts "=> 已成功解压出ecdict.csv(#{ '%.2f' % (File.size(ECDict::CSV)/1024.0/1024) }MB)"
161
- true
162
- end
163
-
164
-
165
- #
166
- # 检查生成出来的db的完整性
167
- # 不太方便检查,只能粗略的查看大小
168
- #
169
- def check_db_integrity
170
- # 正常从CSV生成出来的数据库应当至少400MB以上
171
- if File.size(ECDict::DB) <= 400*1024*1024
172
- false
173
- else
174
- true
175
- end
176
- end
177
-
178
-
179
- #
180
- # This is the Ruby versioned [stardict.py] from
181
- # https://github.com/skywind3000/ECDICT
182
- #
183
- # It generates [ecdict.db] in 3 steps:
184
- #
185
- # 1. create new and empty db file
186
- # 2. create table in the db
187
- # 3. insert data from CSV into the table
188
- #
189
- def generate_sqlite_db()
190
-
191
- # SQLite3::Database.new已经生成了该文件,所以需要提前判断到底是否存在
192
- if File.exist?(ECDict::DB)
193
-
194
- if !check_db_integrity
195
- puts "=> 删除旧有的不完整数据库"
196
- $DB.close
197
- FileUtils.rm(ECDict::DB)
198
- $DB = SQLite3::Database.new ECDict::DB
199
- else
200
- puts "=> 完整(可能的)数据库已存在,无需再次从CSV文件生成,直接使用"
201
- return true
202
- end
203
- end
204
-
205
-
206
- # 共15个字段,存的时候只需要14个
207
- # 1.id 主键,自增
208
- # 2.sw strip word(删除非alnum字符)
209
- # 3.word 单词名称
210
- # 4.phonetic 音标,以英语英标为主
211
- # 5.definition 单词释义(英文),每行一个释义
212
- # 6.translation 单词释义(中文),每行一个释义
213
- # 7.pos 词语位置,用 "/" 分割不同位置
214
- # 8.collins 柯林斯星级
215
- # 9.oxford 是否是牛津三千核心词汇
216
- # 10.tag 字符串标签:zk/中考,gk/高考,cet4/四级 等等标签,空格分割
217
- # 11.bnc 英国国家语料库词频顺序
218
- # 12.frq 当代语料库词频顺序
219
- # 13.exchange 时态复数等变换,使用 "/" 分割不同项目
220
- # 14.detail json 扩展信息,字典形式保存例句(待添加)
221
- # 15.audio 读音音频 url (待添加)
222
-
223
- sql = <<-EOF
224
- CREATE TABLE IF NOT EXISTS "ecdict" (
225
- "id" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL UNIQUE,
226
- "word" VARCHAR(64) COLLATE NOCASE NOT NULL UNIQUE,
227
- "sw" VARCHAR(64) COLLATE NOCASE NOT NULL,
228
- "phonetic" VARCHAR(64),
229
- "definition" TEXT,
230
- "translation" TEXT,
231
- "pos" VARCHAR(16),
232
- "collins" INTEGER DEFAULT(0),
233
- "oxford" INTEGER DEFAULT(0),
234
- "tag" VARCHAR(64),
235
- "bnc" INTEGER DEFAULT(NULL),
236
- "frq" INTEGER DEFAULT(NULL),
237
- "exchange" TEXT,
238
- "detail" TEXT,
239
- "audio" TEXT
240
- );
241
- CREATE UNIQUE INDEX IF NOT EXISTS "ecdict_1" ON ecdict (id);
242
- CREATE UNIQUE INDEX IF NOT EXISTS "ecdict_2" ON ecdict (word);
243
- CREATE INDEX IF NOT EXISTS "ecdict_3" ON ecdict (sw, word collate nocase);
244
- CREATE INDEX IF NOT EXISTS "ecd_1" ON ecdict (word collate nocase);
245
- EOF
246
-
247
-
248
- begin
249
- $DB.execute sql
250
- puts "=> 创建数据库文件#{ECDict::DB},并生成表`ecdict`"
251
- rescue Exception => e
252
- puts "=> #{e.message}"
253
- puts "=> 创建数据库文件#{ECDict::DB}失败,或表`ecdict`生成失败"
254
- close_db
255
- File.delete(ECDict::DB)
256
- return false
257
- end
258
-
259
-
260
- # 这是生成数据库中sw字段的函数,用于模糊匹配,csv中并不包含此字段
261
- def strip_word(word)
262
- word.gsub(/\W/,'').downcase
263
- end
264
-
265
- your_machine = case RUBY_PLATFORM
266
- when /linux/i then "linux"
267
- when /ucrt/i, /mingw/i then "windows"
268
- when /mac/i then "mac"
269
- when /bsd/i then "bsd"
270
- else "machine"
271
- end
272
-
273
- begin
274
- puts <<~EOC
275
-
276
- #{blue("gem_name = 'ecdict'")}
277
- #{blue("version = '#{ECDict::VERSION}'")}
278
- #{blue("author = 'ccmywish'")}
279
- #{blue("bug_track = [ 'https://gitee.com/ccmywish/ecdict/issues'
280
- 'https://github.com/ccmywish/ecdict/issues ]")}
281
-
282
- #{green("if")} #{purple("your_#{your_machine}_is_good")}
283
- wait n =~ #{red('3min or less')}
284
- #{green("end")}
285
-
286
- #{blue(bold("the_\#{author}_is_busily_inserting_data_for_you(...)"))}
287
-
288
- EOC
289
-
290
- require 'progress_bar'
291
- # progress_bar的bar在Windows上显示有问题
292
- progress = ProgressBar.new(3402560,:counter,:percentage,:elapsed)
293
-
294
- columns = %w{word sw phonetic definition translation pos collins oxford
295
- tag bnc frq exchange detail audio}.join(',')
296
-
297
- insert = "INSERT INTO ecdict (#{columns}) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?) "
298
-
299
- require 'csv'
300
- num = 0
301
- $DB.transaction
302
- CSV.foreach(ECDict::CSV) do |row|
303
- num = num + 1
304
- next if num == 1 # headers不加入
305
- row.map! do |n|
306
- r = n.nil?? '' : n
307
- r.gsub('\n',"\n") # 插入数据库时应直接转换为真的换行符
308
- end
309
- sw = strip_word(row[0])
310
- row.insert(1,sw)
311
- $DB.execute(insert,row)
312
- progress.increment!
313
- end
314
- $DB.commit
315
-
316
- rescue Exception => e
317
- puts "=> #{e.message}"
318
- puts "=> 数据库插入信息失败"
319
- close_db
320
- File.delete(ECDict::DB)
321
- return false
322
- end
323
- puts
324
- puts "=> 数据库#{ECDict::DB}已完整生成"
325
- close_db
326
- return true
327
- end
328
-
329
-
330
- def download_and_install_ecdict_data
331
- download_ecdict_raw_data || ( puts("\e[31m=> 下载失败,安装取消!\e[0m") || exit(-1) )
332
- decompress_7z_to_csv || ( puts("\e[31m=> 解压失败,安装取消!\e[0m") || exit(-2) )
333
- generate_sqlite_db || ( puts("\e[31m=> 生成数据库失败,安装取消!\e[0m") || exit(-3) )
334
- puts
335
- puts "Hooray! 词典数据下载安装完成! 请尝试使用:\n\n"
336
- puts " 1. ecdict word 查询单词 "
337
- puts " 2. ecdict -r 进入交互式查询REPL"
338
- puts " 3. ecdict -c 中文 搜索中文单词"
339
- puts " 4. ecdict -cp 中文 搜索中文短语"
340
- puts
341
- end
342
-
343
-
344
-
345
- ####################
346
- # ecdict cmd
347
- ####################
348
- #
349
- # Error code -1: No db or db not intact
350
- # Error code 1: No input
351
- # Error code 2: Doesn't find a result
352
- #
353
-
354
- #
355
- # Look up the dictionary db once a call
356
- #
357
- def reply_once(query)
358
-
359
- if !check_db_integrity
360
- puts "ecdict: 数据库不存在或数据库不完整,请使用`ecdict -i`来下载安装词典数据"
361
- close_db_and_exit(1)
362
- end
363
-
364
- tables = []
365
- $DB.execute "SELECT name FROM sqlite_master WHERE TYPE = 'table' AND name != 'sqlite_sequence'; " do |tbl|
366
- tables << tbl
367
- end
368
- # 每个表自己被数组包裹, [["gnuutils"], ["ruby"]]
369
- tables.flatten!
370
-
371
-
372
- rows = []
373
- tables.each do |t|
374
- $DB.execute "SELECT phonetic,translation,definition,exchange FROM #{t} WHERE word = '#{query}'" do |row|
375
- rows << row
376
- end
377
- end
378
-
379
- if rows.empty?
380
- puts "ecdict: 未找到结果,请检查单词拼写是否正确"
381
- close_db
382
- return
383
- else
384
- line = rows[0]
385
- phonetic,trans,definition,exchange = line[0],line[1],line[2],line[3]
386
- puts "#{query} [#{phonetic.strip.empty?? :nil : phonetic}]"
387
- puts
388
- puts "* 中文翻译"
389
- trans.split("\n").each {|t| puts "- #{t}"}
390
- puts
391
- if !definition.nil? && definition.strip != ""
392
- puts "* 英文释义"
393
- definition.split("\n").each {|t| puts "- #{t}"}
394
- puts
395
- end
396
- if !exchange.nil? && !exchange.strip.empty?
397
- puts "* 变化形式"
398
- exchange.split("/").each do |e|
399
- print "- "
400
- type,word = e.split(":")
401
- case type
402
- when ?p then print "过去式 : "
403
- when ?d then print "过去分词: "
404
- when ?i then print "现在分词: "
405
- when ?3 then print "第三人称: "
406
- when ?r then print "比较级 : "
407
- when ?t then print "最高级 : "
408
- when ?s then print "名词复数: "
409
- when ?0 then print "词根来源: "
410
- when ?1 then print "词根变化: "
411
- end
412
- puts word
413
- end
414
- end
415
- end
416
-
417
- $DB.close if $DB
418
- end
419
-
420
-
421
- #
422
- # Search Chinese word to find English words
423
- #
424
- def search_chinese(cn_word, support_phrase: false)
425
-
426
- if cn_word.empty?
427
- puts "ecdict: 请输入要查询的中文,或使用`ecdict -h`查看帮助"
428
- close_db_and_exit(2)
429
- end
430
- if !check_db_integrity
431
- puts "=> 数据库不存在或数据库不完整,请使用`ecdict -i`来下载安装词典数据"
432
- close_db_and_exit(1)
433
- end
434
-
435
- if support_phrase
436
- puts "ecdict: 搜索加强,包含短语"
437
- end
438
-
439
- tables = []
440
- $DB.execute "SELECT name FROM sqlite_master WHERE TYPE = 'table' AND name != 'sqlite_sequence'; " do |tbl|
441
- tables << tbl
442
- end
443
- tables.flatten!
444
-
445
- rows = []
446
- tables.each do |t|
447
- $DB.execute "SELECT word, translation FROM #{t} WHERE translation LIKE '%#{cn_word}%' " do |row|
448
- rows << row
449
- end
450
- end
451
- if rows.empty?
452
- puts "ecdict: 抱歉,未找到与之相关的英文"
453
- close_db
454
- return
455
- else
456
- # 有的时候并不是没有结果,而是被我们过滤掉了,对此确认一下
457
- found_a_word = false
458
-
459
- rows.each do |line|
460
- en_word,trans = line[0],line[1]
461
-
462
- # 过长的直接跳过
463
- next if trans.length > 35
464
- # 有换行符的太长,直接跳过
465
- next if trans.include?("\r\n")
466
- next if trans.include?("\n")
467
-
468
-
469
- if !support_phrase
470
- # 不要搜索词组
471
- next if en_word.include?(' ')
472
- # 不要搜索连字词
473
- next if en_word.include?('-')
474
- end
475
-
476
-
477
- # filter
478
- # "[网络] 微软,认证专家;微软认证产品专家;微软专家认证"
479
- trans_words1 = trans.split(',') # 英文逗号!!!
480
-
481
- trans_words = []
482
- trans_words1.each do |w|
483
- trans_words << w.split(';') # 中文分号!!!
484
- end
485
- # [ [] [] [] ]
486
- trans_words = trans_words.flatten
487
-
488
- ret = nil
489
- trans_words.each do |t|
490
- ret = t.split.index do
491
- # 必须以搜索的中文开头,最多容纳两个字符
492
- # _1 =~ /^#{cn_word}.{0,2}$/
493
-
494
- # 往往中文都是精确搜索
495
- _1 == cn_word
496
- end
497
- break if ret
498
- end
499
-
500
- if !ret.nil?
501
- found_a_word = true
502
- puts "#{blue(en_word)}: #{trans}"
503
- end
504
- end
505
-
506
- if found_a_word
507
- puts
508
- else
509
-
510
- if !support_phrase
511
- puts "ecdict: 扩大搜索范围,再次尝试搜索..."
512
- search_chinese(cn_word, support_phrase: true)
513
- else
514
- puts "ecdict: 抱歉,未找到与之相关的英文"
515
- end
516
-
517
- end
518
- # end of else
519
- end
520
-
521
- end
522
-
523
-
524
- ####################
525
- # ecdict REPL
526
- ####################
527
-
528
- #
529
- # Look up the dictionary db all the time in a not ending way,
530
- # that is, interactively as a REPL does.
531
- #
532
- # Notice the similar function `reply_once` above,
533
- # which only answer once.
534
- #
535
- def replying(query)
536
-
537
- answer = nil
538
- $DB.execute "SELECT phonetic,translation,definition,exchange FROM ecdict WHERE word = '#{query}'" do |row|
539
- answer = row
540
- end
541
- if answer.nil?
542
- puts "未找到结果,请检查单词拼写是否正确"
543
- else
544
- line = answer
545
- phonetic,trans,definition,exchange = line[0],line[1],line[2],line[3]
546
- puts "#{query} [#{phonetic.strip.empty?? :nil : phonetic}]"
547
- puts
548
- puts "* 中文翻译"
549
- trans.split("\n").each {|t| puts "- #{t}"}
550
- puts
551
- if !definition.nil? && definition.strip != ""
552
- puts "* 英文释义"
553
- definition.split("\n").each {|t| puts "- #{t}"}
554
- puts
555
- end
556
- if !exchange.nil? && !exchange.strip.empty?
557
- puts "* 变化形式"
558
- exchange.split("/").each do |e|
559
- print "- "
560
- type,word = e.split(":")
561
- case type
562
- when ?p then print "过去式 : "
563
- when ?d then print "过去分词: "
564
- when ?i then print "现在分词: "
565
- when ?3 then print "第三人称: "
566
- when ?r then print "比较级 : "
567
- when ?t then print "最高级 : "
568
- when ?s then print "名词复数: "
569
- when ?0 then print "词根来源: "
570
- when ?1 then print "词根变化: "
571
- end
572
- puts word
573
- end
574
- end
575
- end
576
- end
577
-
578
-
579
- #
580
- # Start ecrepl
581
- #
582
- def start_ecrepl
583
-
584
- if !check_db_integrity
585
- puts "ecdict: 数据库不存在或数据库不完整,请使用`ecdict -i`来下载安装词典数据"
586
- close_db_and_exit(1)
587
- end
588
-
589
- begin
590
- stty_save = `stty -g`.chomp
591
- rescue
592
- end
593
-
594
- #
595
- # completion
596
- #
597
- require 'ls_table'
598
- require 'reline'
599
- Reline.completion_proc = lambda do |word|
600
- if word.strip.empty?
601
- return %w[1.输入单词并回车查询含义
602
- 2.输入单词时按一次tab键反馈搜索建议
603
- 3.输入exit或按Ctrl-C或Ctrl-D退出]
604
- end
605
-
606
- max_len = word.length + 4
607
- puts
608
-
609
- # $DB.execute "SELECT DISTINCT sw FROM ecdict WHERE sw LIKE '#{word}%' AND length(sw)<#{max_len} LIMIT 12" { |row| puts row }
610
-
611
-
612
- # Display suggestions
613
- #
614
- # @params word [String] The word we search
615
- # @params ret [Array] The suggestions array returned
616
- def _display_suggestions(word, ret)
617
- return if ret.empty?
618
- if word.length <= 7
619
- LsTable.ls(ret) { puts blue(_1) }
620
- else
621
- LsTable.ls(ret, cell_len: 14, cell_num: 6) { puts blue(_1) }
622
- end
623
- puts
624
- end
625
-
626
- # Generate suggestions
627
- # 1. the words beginning with our search word
628
- # 2. the words including our search word
629
- #
630
- # @params word [String] The word we search
631
- # @params max_len [Integer] Suggestion's max length
632
- def _gen_suggestion_1(word, max_len)
633
- ret = $DB.execute <<-SQL
634
- SELECT DISTINCT sw FROM ecdict WHERE sw LIKE '#{word}%'
635
- AND length(sw)<#{max_len} LIMIT 64
636
- SQL
637
- # [["baba"], ["babe"], ["babn"], ["baby"]]
638
- ret = ret.to_a.flatten
639
- end
640
-
641
- def _gen_suggestion_2(word, max_len)
642
- ret = $DB.execute <<-SQL
643
- SELECT DISTINCT sw FROM ecdict WHERE sw LIKE '_%#{word}%'
644
- AND length(sw)<#{max_len} LIMIT 64
645
- SQL
646
- ret = ret.to_a.flatten
647
- end
648
-
649
- suggestions = [
650
- Thread.new {
651
- _display_suggestions word, _gen_suggestion_1(word, max_len)
652
- },
653
- Thread.new {
654
- _display_suggestions word, _gen_suggestion_2(word, max_len)
655
- }
656
- ].each(&:join)
657
-
658
- ""
659
- end
660
-
661
- #
662
- # main query
663
- #
664
- begin
665
- puts red("ECDict REPL (Ruby #{RUBY_VERSION} Powered)")
666
-
667
- while line = Reline.readline("\e[32mecdict> \e[0m", true)
668
- case word = line.chomp
669
- when 'exit'
670
- close_db
671
- return
672
- when ''
673
- # NOOP
674
- else
675
- replying(word)
676
- end
677
- end
678
- rescue Interrupt
679
- # puts '^C'
680
- `stty #{stty_save}` if stty_save
681
- close_db
682
- return
683
- end
684
-
685
- end
686
-
687
-
688
- #
689
- # remove everything in #{ECDict::STORAGE}
690
- #
691
- def delete_cache
692
- close_db
693
- begin
694
- FileUtils.rm_rf(ECDict::STORAGE)
695
- rescue => e
696
- puts "ecdict: #{e.message}"
697
- puts "ecdict: 清空词典数据失败"
698
- else
699
- puts "ecdict: 清空词典数据完成"
700
- end
701
- puts
702
- end
703
-
704
-
705
- ####################
706
- # others
707
- ####################
708
-
709
- def print_version
710
- puts <<EOH
711
- ecdict (v#{ECDict::VERSION}): A courteous cli translator.
712
- EOH
713
-
714
- end
715
-
716
-
717
- def help
718
- puts <<EOH
719
- ecdict (v#{ECDict::VERSION}): A courteous cli translator.
720
-
721
- usage:
722
-
723
- ecdict word => 查询单词word
724
- ecdict -r => 启动ecdict repl交互式查询,输入exit或Ctrl-C/D退出
725
- ecdict -c 中文 => 搜索中文单词对应的英文单词/短语
726
- ecdict -i => 下载ecdict词典数据并安装
727
- ecdict -v => 打印此Gem版本号
728
- ecdict -h => 打印此帮助
729
- ecdict -d => 清空词典数据
730
-
731
- EOH
732
-
733
- end
734
-
735
-
736
- ####################
737
- # main
738
- ####################
739
-
740
- if ARGV.empty?
741
- if !check_db_integrity
742
- puts "ecdict: 数据库不存在或数据库不完整,请使用`ecdict -i`来下载安装词典数据"
743
- close_db_and_exit(1)
744
- end
745
- puts "ecdict: 请输入要查询的词/词组 或使用`ecdict -h`查看帮助"
746
- close_db_and_exit(2)
747
- end
748
-
749
- query = ARGV.shift
750
- case query
751
- when "-v" then print_version
752
- when "-h" then help
753
- when "-i" then download_and_install_ecdict_data
754
- when "-r" then start_ecrepl
755
- when "-d" then delete_cache
756
- when "-c" then search_chinese(ARGV.join)
757
- else
758
- reply_once(query)
759
- end
760
-
761
- # ensure close db
762
- close_db
1
+ #!/usr/bin/env ruby
2
+ # ------------------------------------------------------
3
+ # File : ecdict.rb
4
+ # Authors : ccmywish <ccmywish@qq.com>
5
+ # Created on : <2020-10-18>
6
+ # Last modified : <2022-04-30>
7
+ #
8
+ # ecdict:
9
+ #
10
+ # The command line dict using ECDICT.
11
+ #
12
+ # ECDICT: https://github.com/skywind3000/ECDICT
13
+ #
14
+ # This gem consists of:
15
+ # 1. ecdict installer
16
+ # 2. ecdict cmd
17
+ # 3. ecdict repl
18
+ #
19
+ # Exit -1: 下载原始数据失败
20
+ # Exit -2: 解压原始数据至CSV失败
21
+ # EXit -3: 从CSV生成数据库失败
22
+ # Exit 1: 无词典
23
+ # Exit 2: 缺少参数
24
+ #
25
+ # 未搜索到结果并不认为是错误
26
+ # ------------------------------------------------------
27
+
28
+ require 'sqlite3'
29
+ require 'ecdict'
30
+
31
+ ECDict::STORAGE = File.expand_path("~/.local/share/ecdict")
32
+ ECDict::RAW_DATA = File.join(ECDict::STORAGE, 'ecdict-csv.7z')
33
+
34
+ ECDict::DB = File.join(ECDict::STORAGE, 'ecdict.db')
35
+ ECDict::CSV = File.join(ECDict::STORAGE, 'ecdict.csv')
36
+
37
+ # 这个版本是我从上游下载的CSV后自己压缩的
38
+ # 这个文件的实际创建日期(上游作者创建)是2017/06/04
39
+ # 至2022/03/22,未见更新,估计以后上游维护这个词典的也不会经常更新
40
+ # 而340万的单词量已经足够,因此我们可以足够长的时间一直停留在此词典版本
41
+ ECDict::DICT_DOWNLOAD_LINK = "https://gitee.com/ccmywish/ecdict-data"
42
+ ECDict::DICT_SHA256 = "86782a0e5d05486b482be5a2e5fa99b1a9b2ae51240c853ecfe181886133818a"
43
+
44
+
45
+ require 'fileutils'
46
+ FileUtils.mkdir_p(ECDict::STORAGE)
47
+ $DB = SQLite3::Database.new ECDict::DB
48
+
49
+ def close_db
50
+ $DB.close if $DB
51
+ end
52
+
53
+ def close_db_and_exit(num = 0)
54
+ close_db
55
+ exit num
56
+ end
57
+
58
+
59
+
60
+ ####################
61
+ # helper: for color
62
+ ####################
63
+
64
+ def bold(str) "\e[1m#{str}\e[0m" end
65
+ def underline(str) "\e[4m#{str}\e[0m" end
66
+ def red(str) "\e[31m#{str}\e[0m" end
67
+ def green(str) "\e[32m#{str}\e[0m" end
68
+ def yellow(str) "\e[33m#{str}\e[0m" end
69
+ def blue(str) "\e[34m#{str}\e[0m" end
70
+ def purple(str) "\e[35m#{str}\e[0m" end
71
+ def cyan(str) "\e[36m#{str}\e[0m" end
72
+
73
+
74
+
75
+ ##########################
76
+ # db download and install
77
+ ##########################
78
+ #
79
+ # 1. Download .7z file from Gitee
80
+ # 2. Decompress the .7z file to CSV file
81
+ # 3. Transform CSV file into database
82
+ #
83
+
84
+ def check_download_integrity
85
+ require 'digest'
86
+ return false if !File.exist?(ECDict::RAW_DATA)
87
+ sha256 = Digest::SHA256.file(ECDict::RAW_DATA).hexdigest
88
+ if ECDict::DICT_SHA256 == sha256
89
+ true
90
+ else
91
+ false
92
+ end
93
+ end
94
+
95
+
96
+ def download_ecdict_raw_data
97
+
98
+ if File.exist?(ECDict::RAW_DATA) && check_download_integrity
99
+ puts "=> 已存在原始数据文件#{ECDict::RAW_DATA}, 无需再次下载"
100
+ else
101
+
102
+ if File.exist?(ECDict::RAW_DATA)
103
+ puts "=> 删除已存在但不完整的原始数据文件"
104
+ FileUtils.rm ECDict::RAW_DATA
105
+ end
106
+
107
+ # 若已经有拉取的仓库,先检查里面的数据是否可用
108
+ raw_data_git_dir = File.join(ECDict::STORAGE, 'ecdict-data')
109
+ raw_data_git_dir_data = File.join(raw_data_git_dir, 'ecdict-csv.7z')
110
+
111
+ if Dir.exist?(raw_data_git_dir)
112
+ if File.exist?(raw_data_git_dir_data)
113
+ FileUtils.cp(raw_data_git_dir_data, ECDict::STORAGE)
114
+ if check_download_integrity
115
+ puts "=> 从已经Git pull到的仓库中获取原始数据文件"
116
+ return true
117
+ else
118
+ FileUtils.rm ECDict::RAW_DATA
119
+ end
120
+ end
121
+ FileUtils.rm_rf(raw_data_git_dir)
122
+ end
123
+
124
+ begin
125
+ puts "=> 使用Git从#{ECDict::DICT_DOWNLOAD_LINK}获取原始数据库文件"
126
+ ret = system("git -C #{ECDict::STORAGE} clone #{ECDict::DICT_DOWNLOAD_LINK} ")
127
+ FileUtils.cp(File.join(ECDict::STORAGE, 'ecdict-data', 'ecdict-csv.7z'), ECDict::STORAGE) rescue nil
128
+ raise "Git拉取仓库过程失败,原始数据文件不完整!" if (ret != true || !check_download_integrity)
129
+ rescue StandardError => e
130
+ puts "=> #{e.message}"
131
+ FileUtils.rm(ECDict::RAW_DATA) rescue nil
132
+ return false
133
+ else
134
+ puts "=> 下载完成!"
135
+ end
136
+ end
137
+ true
138
+ end
139
+
140
+
141
+ def decompress_7z_to_csv()
142
+ require 'seven_zip_ruby'
143
+
144
+ # 正常解压出来的CSV应当至少200MB以上
145
+ if File.exist?(ECDict::CSV)
146
+ if File.size(ECDict::CSV) <= 200*1024*1024
147
+ puts "=> 删除旧有的不完全词典数据"
148
+ FileUtils.rm(ECDict::CSV)
149
+ else
150
+ puts "=> CSV数据已存在,无需再次解压,直接使用"
151
+ return true
152
+ end
153
+ end
154
+
155
+
156
+ File.open(ECDict::RAW_DATA, "rb") do |file|
157
+ puts "=> 正在解压ecdict-csv.7z(#{ '%.2f' % (file.size/1024.0/1024) }MB)"
158
+ SevenZipRuby::Reader.extract_all(file, ECDict::STORAGE)
159
+ end
160
+
161
+ puts "=> 已成功解压出ecdict.csv(#{ '%.2f' % (File.size(ECDict::CSV)/1024.0/1024) }MB)"
162
+ true
163
+ end
164
+
165
+
166
+ #
167
+ # 检查生成出来的db的完整性
168
+ # 不太方便检查,只能粗略的查看大小
169
+ #
170
+ def check_db_integrity
171
+ # 正常从CSV生成出来的数据库应当至少400MB以上
172
+ if File.size(ECDict::DB) <= 400*1024*1024
173
+ false
174
+ else
175
+ true
176
+ end
177
+ end
178
+
179
+
180
+ #
181
+ # This is the Ruby versioned [stardict.py] from
182
+ # https://github.com/skywind3000/ECDICT
183
+ #
184
+ # It generates [ecdict.db] in 3 steps:
185
+ #
186
+ # 1. create new and empty db file
187
+ # 2. create table in the db
188
+ # 3. insert data from CSV into the table
189
+ #
190
+ def generate_sqlite_db()
191
+
192
+ # SQLite3::Database.new已经生成了该文件,所以需要提前判断到底是否存在
193
+ if File.exist?(ECDict::DB)
194
+
195
+ if !check_db_integrity
196
+ puts "=> 删除旧有的不完整数据库"
197
+ $DB.close
198
+ FileUtils.rm(ECDict::DB)
199
+ $DB = SQLite3::Database.new ECDict::DB
200
+ else
201
+ puts "=> 完整(可能的)数据库已存在,无需再次从CSV文件生成,直接使用"
202
+ return true
203
+ end
204
+ end
205
+
206
+
207
+ # 共15个字段,存的时候只需要14个
208
+ # 1.id 主键,自增
209
+ # 2.sw strip word(删除非alnum字符)
210
+ # 3.word 单词名称
211
+ # 4.phonetic 音标,以英语英标为主
212
+ # 5.definition 单词释义(英文),每行一个释义
213
+ # 6.translation 单词释义(中文),每行一个释义
214
+ # 7.pos 词语位置,用 "/" 分割不同位置
215
+ # 8.collins 柯林斯星级
216
+ # 9.oxford 是否是牛津三千核心词汇
217
+ # 10.tag 字符串标签:zk/中考,gk/高考,cet4/四级 等等标签,空格分割
218
+ # 11.bnc 英国国家语料库词频顺序
219
+ # 12.frq 当代语料库词频顺序
220
+ # 13.exchange 时态复数等变换,使用 "/" 分割不同项目
221
+ # 14.detail json 扩展信息,字典形式保存例句(待添加)
222
+ # 15.audio 读音音频 url (待添加)
223
+
224
+ sql = <<-EOF
225
+ CREATE TABLE IF NOT EXISTS "ecdict" (
226
+ "id" INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL UNIQUE,
227
+ "word" VARCHAR(64) COLLATE NOCASE NOT NULL UNIQUE,
228
+ "sw" VARCHAR(64) COLLATE NOCASE NOT NULL,
229
+ "phonetic" VARCHAR(64),
230
+ "definition" TEXT,
231
+ "translation" TEXT,
232
+ "pos" VARCHAR(16),
233
+ "collins" INTEGER DEFAULT(0),
234
+ "oxford" INTEGER DEFAULT(0),
235
+ "tag" VARCHAR(64),
236
+ "bnc" INTEGER DEFAULT(NULL),
237
+ "frq" INTEGER DEFAULT(NULL),
238
+ "exchange" TEXT,
239
+ "detail" TEXT,
240
+ "audio" TEXT
241
+ );
242
+ CREATE UNIQUE INDEX IF NOT EXISTS "ecdict_1" ON ecdict (id);
243
+ CREATE UNIQUE INDEX IF NOT EXISTS "ecdict_2" ON ecdict (word);
244
+ CREATE INDEX IF NOT EXISTS "ecdict_3" ON ecdict (sw, word collate nocase);
245
+ CREATE INDEX IF NOT EXISTS "ecd_1" ON ecdict (word collate nocase);
246
+ EOF
247
+
248
+
249
+ begin
250
+ $DB.execute sql
251
+ puts "=> 创建数据库文件#{ECDict::DB},并生成表`ecdict`"
252
+ rescue Exception => e
253
+ puts "=> #{e.message}"
254
+ puts "=> 创建数据库文件#{ECDict::DB}失败,或表`ecdict`生成失败"
255
+ close_db
256
+ File.delete(ECDict::DB)
257
+ return false
258
+ end
259
+
260
+
261
+ # 这是生成数据库中sw字段的函数,用于模糊匹配,csv中并不包含此字段
262
+ def strip_word(word)
263
+ word.gsub(/\W/,'').downcase
264
+ end
265
+
266
+ your_machine = case RUBY_PLATFORM
267
+ when /linux/i then "linux"
268
+ when /ucrt/i, /mingw/i then "windows"
269
+ when /mac/i then "mac"
270
+ when /bsd/i then "bsd"
271
+ else "machine"
272
+ end
273
+
274
+ begin
275
+ puts <<~EOC
276
+
277
+ #{blue("gem_name = 'ecdict'")}
278
+ #{blue("version = '#{ECDict::VERSION}'")}
279
+ #{blue("author = 'ccmywish'")}
280
+ #{blue("bug_track = [ 'https://gitee.com/ccmywish/ecdict/issues'
281
+ 'https://github.com/ccmywish/ecdict/issues ]")}
282
+
283
+ #{green("if")} #{purple("your_#{your_machine}_is_good")}
284
+ wait n =~ #{red('3min or less')}
285
+ #{green("end")}
286
+
287
+ #{blue(bold("the_\#{author}_is_busily_inserting_data_for_you(...)"))}
288
+
289
+ EOC
290
+
291
+ require 'progress_bar'
292
+ # progress_bar的bar在Windows上显示有问题
293
+ progress = ProgressBar.new(3402560,:counter,:percentage,:elapsed)
294
+
295
+ columns = %w{word sw phonetic definition translation pos collins oxford
296
+ tag bnc frq exchange detail audio}.join(',')
297
+
298
+ insert = "INSERT INTO ecdict (#{columns}) VALUES(?,?,?,?,?,?,?,?,?,?,?,?,?,?) "
299
+
300
+ require 'csv'
301
+ num = 0
302
+ $DB.transaction
303
+ CSV.foreach(ECDict::CSV) do |row|
304
+ num = num + 1
305
+ next if num == 1 # headers不加入
306
+ row.map! do |n|
307
+ r = n.nil?? '' : n
308
+ r.gsub('\n',"\n") # 插入数据库时应直接转换为真的换行符
309
+ end
310
+ sw = strip_word(row[0])
311
+ row.insert(1,sw)
312
+ $DB.execute(insert,row)
313
+ progress.increment!
314
+ end
315
+ $DB.commit
316
+
317
+ rescue Exception => e
318
+ puts "=> #{e.message}"
319
+ puts "=> 数据库插入信息失败"
320
+ close_db
321
+ File.delete(ECDict::DB)
322
+ return false
323
+ end
324
+ puts
325
+ puts "=> 数据库#{ECDict::DB}已完整生成"
326
+ close_db
327
+ return true
328
+ end
329
+
330
+
331
+ def download_and_install_ecdict_data
332
+ download_ecdict_raw_data || ( puts("\e[31m=> 下载失败,安装取消!\e[0m") || exit(-1) )
333
+ decompress_7z_to_csv || ( puts("\e[31m=> 解压失败,安装取消!\e[0m") || exit(-2) )
334
+ generate_sqlite_db || ( puts("\e[31m=> 生成数据库失败,安装取消!\e[0m") || exit(-3) )
335
+ puts
336
+ puts "Hooray! 词典数据下载安装完成! 请尝试使用:\n\n"
337
+ puts " 1. ecdict word 查询单词 "
338
+ puts " 2. ecdict -r 进入交互式查询REPL"
339
+ puts " 3. ecdict -c 中文 搜索中文单词"
340
+ puts " 4. ecdict -cp 中文 搜索中文短语"
341
+ puts
342
+ end
343
+
344
+
345
+
346
+ ####################
347
+ # ecdict cmd
348
+ ####################
349
+ #
350
+ # Error code -1: No db or db not intact
351
+ # Error code 1: No input
352
+ # Error code 2: Doesn't find a result
353
+ #
354
+
355
+ #
356
+ # Look up the dictionary db once a call
357
+ #
358
+ def reply_once(query)
359
+
360
+ if !check_db_integrity
361
+ puts "ecdict: 数据库不存在或数据库不完整,请使用`ecdict -i`来下载安装词典数据"
362
+ close_db_and_exit(1)
363
+ end
364
+
365
+ tables = []
366
+ $DB.execute "SELECT name FROM sqlite_master WHERE TYPE = 'table' AND name != 'sqlite_sequence'; " do |tbl|
367
+ tables << tbl
368
+ end
369
+ # 每个表自己被数组包裹, [["gnuutils"], ["ruby"]]
370
+ tables.flatten!
371
+
372
+
373
+ rows = []
374
+ tables.each do |t|
375
+ $DB.execute "SELECT phonetic,translation,definition,exchange FROM #{t} WHERE word = '#{query}'" do |row|
376
+ rows << row
377
+ end
378
+ end
379
+
380
+ if rows.empty?
381
+ puts "ecdict: 未找到结果,请检查单词拼写是否正确"
382
+ close_db
383
+ return
384
+ else
385
+ line = rows[0]
386
+ phonetic,trans,definition,exchange = line[0],line[1],line[2],line[3]
387
+ puts "#{query} [#{phonetic.strip.empty?? :nil : phonetic}]"
388
+ puts
389
+ puts "* 中文翻译"
390
+ trans.split("\n").each {|t| puts "- #{t}"}
391
+ puts
392
+ if !definition.nil? && definition.strip != ""
393
+ puts "* 英文释义"
394
+ definition.split("\n").each {|t| puts "- #{t}"}
395
+ puts
396
+ end
397
+ if !exchange.nil? && !exchange.strip.empty?
398
+ puts "* 变化形式"
399
+ exchange.split("/").each do |e|
400
+ print "- "
401
+ type,word = e.split(":")
402
+ case type
403
+ when ?p then print "过去式 : "
404
+ when ?d then print "过去分词: "
405
+ when ?i then print "现在分词: "
406
+ when ?3 then print "第三人称: "
407
+ when ?r then print "比较级 : "
408
+ when ?t then print "最高级 : "
409
+ when ?s then print "名词复数: "
410
+ when ?0 then print "词根来源: "
411
+ when ?1 then print "词根变化: "
412
+ end
413
+ puts word
414
+ end
415
+ end
416
+ end
417
+
418
+ $DB.close if $DB
419
+ end
420
+
421
+
422
+ #
423
+ # Search Chinese word to find English words
424
+ #
425
+ def search_chinese(cn_word, support_phrase: false)
426
+
427
+ if cn_word.empty?
428
+ puts "ecdict: 请输入要查询的中文,或使用`ecdict -h`查看帮助"
429
+ close_db_and_exit(2)
430
+ end
431
+ if !check_db_integrity
432
+ puts "=> 数据库不存在或数据库不完整,请使用`ecdict -i`来下载安装词典数据"
433
+ close_db_and_exit(1)
434
+ end
435
+
436
+ if support_phrase
437
+ puts "ecdict: 搜索加强,包含短语"
438
+ end
439
+
440
+ tables = []
441
+ $DB.execute "SELECT name FROM sqlite_master WHERE TYPE = 'table' AND name != 'sqlite_sequence'; " do |tbl|
442
+ tables << tbl
443
+ end
444
+ tables.flatten!
445
+
446
+ rows = []
447
+ tables.each do |t|
448
+ $DB.execute "SELECT word, translation FROM #{t} WHERE translation LIKE '%#{cn_word}%' " do |row|
449
+ rows << row
450
+ end
451
+ end
452
+ if rows.empty?
453
+ puts "ecdict: 抱歉,未找到与之相关的英文"
454
+ close_db
455
+ return
456
+ else
457
+ # 有的时候并不是没有结果,而是被我们过滤掉了,对此确认一下
458
+ found_a_word = false
459
+
460
+ rows.each do |line|
461
+ en_word,trans = line[0],line[1]
462
+
463
+ # 过长的直接跳过
464
+ next if trans.length > 35
465
+ # 有换行符的太长,直接跳过
466
+ next if trans.include?("\r\n")
467
+ next if trans.include?("\n")
468
+
469
+
470
+ if !support_phrase
471
+ # 不要搜索词组
472
+ next if en_word.include?(' ')
473
+ # 不要搜索连字词
474
+ next if en_word.include?('-')
475
+ end
476
+
477
+
478
+ # filter
479
+ # "[网络] 微软,认证专家;微软认证产品专家;微软专家认证"
480
+ trans_words1 = trans.split(',') # 英文逗号!!!
481
+
482
+ trans_words = []
483
+ trans_words1.each do |w|
484
+ trans_words << w.split(';') # 中文分号!!!
485
+ end
486
+ # [ [] [] [] ]
487
+ trans_words = trans_words.flatten
488
+
489
+ ret = nil
490
+ trans_words.each do |t|
491
+ ret = t.split.index do
492
+ # 必须以搜索的中文开头,最多容纳两个字符
493
+ # _1 =~ /^#{cn_word}.{0,2}$/
494
+
495
+ # 往往中文都是精确搜索
496
+ _1 == cn_word
497
+ end
498
+ break if ret
499
+ end
500
+
501
+ if !ret.nil?
502
+ found_a_word = true
503
+ puts "#{blue(en_word)}: #{trans}"
504
+ end
505
+ end
506
+
507
+ if found_a_word
508
+ puts
509
+ else
510
+
511
+ if !support_phrase
512
+ puts "ecdict: 扩大搜索范围,再次尝试搜索..."
513
+ search_chinese(cn_word, support_phrase: true)
514
+ else
515
+ puts "ecdict: 抱歉,未找到与之相关的英文"
516
+ end
517
+
518
+ end
519
+ # end of else
520
+ end
521
+
522
+ end
523
+
524
+
525
+ ####################
526
+ # ecdict REPL
527
+ ####################
528
+
529
+ #
530
+ # Look up the dictionary db all the time in a not ending way,
531
+ # that is, interactively as a REPL does.
532
+ #
533
+ # Notice the similar function `reply_once` above,
534
+ # which only answer once.
535
+ #
536
+ def replying(query)
537
+
538
+ answer = nil
539
+ $DB.execute "SELECT phonetic,translation,definition,exchange FROM ecdict WHERE word = '#{query}'" do |row|
540
+ answer = row
541
+ end
542
+ if answer.nil?
543
+ puts "未找到结果,请检查单词拼写是否正确"
544
+ else
545
+ line = answer
546
+ phonetic,trans,definition,exchange = line[0],line[1],line[2],line[3]
547
+ puts "#{query} [#{phonetic.strip.empty?? :nil : phonetic}]"
548
+ puts
549
+ puts "* 中文翻译"
550
+ trans.split("\n").each {|t| puts "- #{t}"}
551
+ puts
552
+ if !definition.nil? && definition.strip != ""
553
+ puts "* 英文释义"
554
+ definition.split("\n").each {|t| puts "- #{t}"}
555
+ puts
556
+ end
557
+ if !exchange.nil? && !exchange.strip.empty?
558
+ puts "* 变化形式"
559
+ exchange.split("/").each do |e|
560
+ print "- "
561
+ type,word = e.split(":")
562
+ case type
563
+ when ?p then print "过去式 : "
564
+ when ?d then print "过去分词: "
565
+ when ?i then print "现在分词: "
566
+ when ?3 then print "第三人称: "
567
+ when ?r then print "比较级 : "
568
+ when ?t then print "最高级 : "
569
+ when ?s then print "名词复数: "
570
+ when ?0 then print "词根来源: "
571
+ when ?1 then print "词根变化: "
572
+ end
573
+ puts word
574
+ end
575
+ end
576
+ end
577
+ end
578
+
579
+
580
+ #
581
+ # Start ecrepl
582
+ #
583
+ def start_ecrepl
584
+
585
+ if !check_db_integrity
586
+ puts "ecdict: 数据库不存在或数据库不完整,请使用`ecdict -i`来下载安装词典数据"
587
+ close_db_and_exit(1)
588
+ end
589
+
590
+ begin
591
+ stty_save = `stty -g`.chomp
592
+ rescue
593
+ end
594
+
595
+ #
596
+ # completion
597
+ #
598
+ require 'ls_table'
599
+ require 'reline'
600
+ Reline.completion_proc = lambda do |word|
601
+ if word.strip.empty?
602
+ return %w[1.输入单词并回车查询含义
603
+ 2.输入单词时按一次tab键反馈搜索建议
604
+ 3.输入exit或按Ctrl-C或Ctrl-D退出]
605
+ end
606
+
607
+ max_len = word.length + 4
608
+ puts
609
+
610
+ # $DB.execute "SELECT DISTINCT sw FROM ecdict WHERE sw LIKE '#{word}%' AND length(sw)<#{max_len} LIMIT 12" { |row| puts row }
611
+
612
+
613
+ # Display suggestions
614
+ #
615
+ # @params word [String] The word we search
616
+ # @params ret [Array] The suggestions array returned
617
+ def _display_suggestions(word, ret)
618
+ return if ret.empty?
619
+ if word.length <= 7
620
+ LsTable.ls(ret) { puts blue(_1) }
621
+ else
622
+ LsTable.ls(ret, cell_len: 14, cell_num: 6) { puts blue(_1) }
623
+ end
624
+ puts
625
+ end
626
+
627
+ # Generate suggestions
628
+ # 1. the words beginning with our search word
629
+ # 2. the words including our search word
630
+ #
631
+ # @params word [String] The word we search
632
+ # @params max_len [Integer] Suggestion's max length
633
+ def _gen_suggestion_1(word, max_len)
634
+ ret = $DB.execute <<-SQL
635
+ SELECT DISTINCT sw FROM ecdict WHERE sw LIKE '#{word}%'
636
+ AND length(sw)<#{max_len} LIMIT 64
637
+ SQL
638
+ # [["baba"], ["babe"], ["babn"], ["baby"]]
639
+ ret = ret.to_a.flatten
640
+ end
641
+
642
+ def _gen_suggestion_2(word, max_len)
643
+ ret = $DB.execute <<-SQL
644
+ SELECT DISTINCT sw FROM ecdict WHERE sw LIKE '_%#{word}%'
645
+ AND length(sw)<#{max_len} LIMIT 64
646
+ SQL
647
+ ret = ret.to_a.flatten
648
+ end
649
+
650
+ suggestions = [
651
+ Thread.new {
652
+ _display_suggestions word, _gen_suggestion_1(word, max_len)
653
+ },
654
+ Thread.new {
655
+ _display_suggestions word, _gen_suggestion_2(word, max_len)
656
+ }
657
+ ].each(&:join)
658
+
659
+ ""
660
+ end
661
+
662
+ #
663
+ # main query
664
+ #
665
+ begin
666
+ puts red("ECDict REPL (Ruby #{RUBY_VERSION} Powered)")
667
+
668
+ while line = Reline.readline("\e[32mecdict> \e[0m", true)
669
+ case word = line.chomp
670
+ when 'exit'
671
+ close_db
672
+ return
673
+ when ''
674
+ # NOOP
675
+ else
676
+ replying(word)
677
+ end
678
+ end
679
+ rescue Interrupt
680
+ # puts '^C'
681
+ `stty #{stty_save}` if stty_save
682
+ close_db
683
+ return
684
+ end
685
+
686
+ end
687
+
688
+
689
+ #
690
+ # remove everything in #{ECDict::STORAGE}
691
+ #
692
+ def delete_cache
693
+ close_db
694
+ begin
695
+ FileUtils.rm_rf(ECDict::STORAGE)
696
+ rescue => e
697
+ puts "ecdict: #{e.message}"
698
+ puts "ecdict: 清空词典数据失败"
699
+ else
700
+ puts "ecdict: 清空词典数据完成"
701
+ end
702
+ puts
703
+ end
704
+
705
+
706
+ ####################
707
+ # others
708
+ ####################
709
+
710
+ def print_version
711
+ puts <<EOH
712
+ ecdict (v#{ECDict::VERSION}): A courteous cli translator.
713
+ EOH
714
+
715
+ end
716
+
717
+
718
+ def help
719
+ puts <<EOH
720
+ ecdict (v#{ECDict::VERSION}): A courteous cli translator.
721
+
722
+ usage:
723
+
724
+ ecdict word => 查询单词word
725
+ ecdict -r => 启动ecdict repl交互式查询,输入exit或Ctrl-C/D退出
726
+ ecdict -c 中文 => 搜索中文单词对应的英文单词/短语
727
+ ecdict -i => 下载ecdict词典数据并安装
728
+ ecdict -v => 打印此Gem版本号
729
+ ecdict -h => 打印此帮助
730
+ ecdict -d => 清空词典数据
731
+
732
+ EOH
733
+
734
+ end
735
+
736
+
737
+ ####################
738
+ # main
739
+ ####################
740
+
741
+ if ARGV.empty?
742
+ if !check_db_integrity
743
+ puts "ecdict: 数据库不存在或数据库不完整,请使用`ecdict -i`来下载安装词典数据"
744
+ close_db_and_exit(1)
745
+ end
746
+ puts "ecdict: 请输入要查询的词/词组 或使用`ecdict -h`查看帮助"
747
+ close_db_and_exit(2)
748
+ end
749
+
750
+ query = ARGV.shift
751
+ case query
752
+ when "-v" then print_version
753
+ when "-h" then help
754
+ when "-i" then download_and_install_ecdict_data
755
+ when "-r" then start_ecrepl
756
+ when "-d" then delete_cache
757
+ when "-c" then search_chinese(ARGV.join)
758
+ else
759
+ reply_once(query)
760
+ end
761
+
762
+ # ensure close db
763
+ close_db