zidian 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +8 -0
- data/Gemfile.lock +40 -0
- data/Rakefile +27 -13
- data/VERSION +1 -1
- data/lib/zidian.rb +21 -4
- data/test/helper.rb +18 -0
- data/test/test_zidian.rb +7 -2
- data/zidian.gemspec +28 -19
- metadata +59 -17
- data/.gitignore +0 -2
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
GEM
|
|
2
|
+
remote: http://rubygems.org/
|
|
3
|
+
specs:
|
|
4
|
+
archive-tar-minitar (0.5.2)
|
|
5
|
+
columnize (0.3.4)
|
|
6
|
+
git (1.2.5)
|
|
7
|
+
jeweler (1.6.2)
|
|
8
|
+
bundler (~> 1.0)
|
|
9
|
+
git (>= 1.2.5)
|
|
10
|
+
rake
|
|
11
|
+
linecache (0.46)
|
|
12
|
+
rbx-require-relative (> 0.0.4)
|
|
13
|
+
linecache19 (0.5.12)
|
|
14
|
+
ruby_core_source (>= 0.1.4)
|
|
15
|
+
rake (0.9.2)
|
|
16
|
+
rbx-require-relative (0.0.5)
|
|
17
|
+
ruby-debug (0.10.4)
|
|
18
|
+
columnize (>= 0.1)
|
|
19
|
+
ruby-debug-base (~> 0.10.4.0)
|
|
20
|
+
ruby-debug-base (0.10.4)
|
|
21
|
+
linecache (>= 0.3)
|
|
22
|
+
ruby-debug-base19 (0.11.25)
|
|
23
|
+
columnize (>= 0.3.1)
|
|
24
|
+
linecache19 (>= 0.5.11)
|
|
25
|
+
ruby_core_source (>= 0.1.4)
|
|
26
|
+
ruby-debug19 (0.11.6)
|
|
27
|
+
columnize (>= 0.3.1)
|
|
28
|
+
linecache19 (>= 0.5.11)
|
|
29
|
+
ruby-debug-base19 (>= 0.11.19)
|
|
30
|
+
ruby_core_source (0.1.5)
|
|
31
|
+
archive-tar-minitar (>= 0.5.2)
|
|
32
|
+
|
|
33
|
+
PLATFORMS
|
|
34
|
+
ruby
|
|
35
|
+
|
|
36
|
+
DEPENDENCIES
|
|
37
|
+
bundler (~> 1.0.0)
|
|
38
|
+
jeweler (~> 1.6.2)
|
|
39
|
+
ruby-debug
|
|
40
|
+
ruby-debug19
|
data/Rakefile
CHANGED
|
@@ -1,30 +1,44 @@
|
|
|
1
|
-
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
|
|
3
|
+
require 'rubygems'
|
|
4
|
+
require 'bundler'
|
|
5
|
+
begin
|
|
6
|
+
Bundler.setup(:default, :development)
|
|
7
|
+
rescue Bundler::BundlerError => e
|
|
8
|
+
$stderr.puts e.message
|
|
9
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
|
10
|
+
exit e.status_code
|
|
11
|
+
end
|
|
2
12
|
require 'rake'
|
|
3
13
|
require 'rake/rdoctask'
|
|
4
14
|
|
|
5
|
-
|
|
6
|
-
require 'jeweler'
|
|
15
|
+
require 'jeweler'
|
|
7
16
|
|
|
8
|
-
|
|
17
|
+
Jeweler::Tasks.new do |gemspec|
|
|
9
18
|
gemspec.name = "zidian"
|
|
10
19
|
gemspec.summary = "Chinese dictionary"
|
|
11
20
|
gemspec.description = "Chinese dictionary using the CEDICT word list"
|
|
12
21
|
gemspec.email = "bastien.vaucher@gmail.com"
|
|
13
22
|
gemspec.homepage = "http://github.com/bastien/zidian"
|
|
14
23
|
gemspec.authors = ["Bastien Vaucher"]
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
24
|
+
end
|
|
25
|
+
Jeweler::GemcutterTasks.new
|
|
26
|
+
|
|
27
|
+
require 'rake/testtask'
|
|
28
|
+
Rake::TestTask.new(:test) do |test|
|
|
29
|
+
test.libs << 'lib' << 'test'
|
|
30
|
+
test.pattern = 'test/**/test_*.rb'
|
|
31
|
+
test.verbose = true
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
task :default => :test
|
|
20
35
|
|
|
21
36
|
desc 'Generate documentation for the Zidian gem.'
|
|
22
|
-
Rake::RDocTask.new
|
|
37
|
+
Rake::RDocTask.new do |rdoc|
|
|
38
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
|
23
39
|
rdoc.rdoc_dir = 'rdoc'
|
|
24
40
|
rdoc.title = 'Zidian'
|
|
25
41
|
rdoc.options << '--line-numbers' << '--inline-source'
|
|
26
42
|
rdoc.rdoc_files.include('README.mkd')
|
|
27
43
|
rdoc.rdoc_files.include('lib/**/*.rb')
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
Dir["#{File.dirname(__FILE__)}/tasks/*.rake"].sort.each { |ext| load ext }
|
|
44
|
+
end
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.
|
|
1
|
+
0.3.0
|
data/lib/zidian.rb
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
module Zidian
|
|
2
2
|
|
|
3
3
|
def self.find(expression)
|
|
4
|
-
$KCODE = 'UTF8'
|
|
4
|
+
$KCODE = 'UTF8' if RUBY_VERSION < "1.9.0"
|
|
5
5
|
case expression.class.name
|
|
6
6
|
when "Array"
|
|
7
7
|
expression.collect{|e| find(e) }.flatten.uniq
|
|
8
8
|
when "Integer", "Fixnum" then
|
|
9
9
|
Word.new(get_line(expression), expression)
|
|
10
10
|
when "String" then
|
|
11
|
-
|
|
11
|
+
find_words(expression).collect{|raw_word| Word.new(raw_word) }
|
|
12
12
|
else
|
|
13
13
|
raise InvalFindInputException
|
|
14
14
|
end
|
|
@@ -16,10 +16,11 @@ module Zidian
|
|
|
16
16
|
|
|
17
17
|
protected
|
|
18
18
|
|
|
19
|
-
def self.
|
|
19
|
+
def self.find_words(word, case_sensitive = false) #:nodoc:
|
|
20
20
|
words = word.split.map{|w| "#{w}[1-4]?"}.join(" ")
|
|
21
21
|
# adding the -i option allows to search independently from the case, but it makes it very slow
|
|
22
|
-
`less #{File.dirname(__FILE__)}/cedict_ts.u8 | grep -n -E '(^|[^a-zA-Z])#{words}($|[^a-zA-Z])'`
|
|
22
|
+
results = `less #{File.dirname(__FILE__)}/cedict_ts.u8 | grep -n -E#{ case_sensitive ? ' -i' : ''} '(^|[^a-zA-Z])#{words}($|[^a-zA-Z])'`
|
|
23
|
+
sort_lines(results.lines.to_a, words)
|
|
23
24
|
end
|
|
24
25
|
|
|
25
26
|
def self.get_line(line_number) #:nodoc:
|
|
@@ -27,6 +28,22 @@ module Zidian
|
|
|
27
28
|
`sed -n '#{line_number}p' #{File.dirname(__FILE__)}/cedict_ts.u8`
|
|
28
29
|
end
|
|
29
30
|
|
|
31
|
+
# Sorts the lines by similarity to the words
|
|
32
|
+
#
|
|
33
|
+
def self.sort_lines(lines, words)
|
|
34
|
+
lines.sort do |a, b|
|
|
35
|
+
line_similarity_to_words(a, words) <=> line_similarity_to_words(b, words)
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def self.line_similarity_to_words(line, words)
|
|
40
|
+
# words delimited by : ],:/[
|
|
41
|
+
# Very basic similarity determination
|
|
42
|
+
# we count how many characters before and after the word, the less characters the more similar the match is
|
|
43
|
+
match_data = line.match("(^|[,:\\\[\/,])([^\\\[\/,:]*)#{words}([^\\\]\\\[\/,]*)($|[,\\\]\\\[\/,])").to_a
|
|
44
|
+
match_data[2].strip.size + match_data[3].strip.size
|
|
45
|
+
end
|
|
46
|
+
|
|
30
47
|
class Word
|
|
31
48
|
|
|
32
49
|
attr_reader :id, :traditional, :simplified, :pinyin, :english
|
data/test/helper.rb
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
require 'rubygems'
|
|
2
|
+
require 'bundler'
|
|
3
|
+
begin
|
|
4
|
+
Bundler.setup(:default, :development)
|
|
5
|
+
rescue Bundler::BundlerError => e
|
|
6
|
+
$stderr.puts e.message
|
|
7
|
+
$stderr.puts "Run `bundle install` to install missing gems"
|
|
8
|
+
exit e.status_code
|
|
9
|
+
end
|
|
10
|
+
require 'test/unit'
|
|
11
|
+
require "ruby-debug"
|
|
12
|
+
|
|
13
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
|
14
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
|
15
|
+
require 'zidian'
|
|
16
|
+
|
|
17
|
+
class Test::Unit::TestCase
|
|
18
|
+
end
|
data/test/test_zidian.rb
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
|
|
2
|
-
require "
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
require "helper"
|
|
3
3
|
|
|
4
4
|
class TestZidian < Test::Unit::TestCase
|
|
5
5
|
|
|
@@ -34,6 +34,11 @@ class TestZidian < Test::Unit::TestCase
|
|
|
34
34
|
assert_equal("围城", words.first.simplified)
|
|
35
35
|
end
|
|
36
36
|
|
|
37
|
+
def test_find_word_from_chinese
|
|
38
|
+
words = Zidian.find("围城")
|
|
39
|
+
assert_equal("wei2 cheng2", words.first.pinyin)
|
|
40
|
+
end
|
|
41
|
+
|
|
37
42
|
def test_find_word_from_pinyin_marked
|
|
38
43
|
words = Zidian.find("wei2 cheng2")
|
|
39
44
|
assert_equal("siege", words.first.english.first)
|
data/zidian.gemspec
CHANGED
|
@@ -1,48 +1,57 @@
|
|
|
1
1
|
# Generated by jeweler
|
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
|
3
|
-
# Instead, edit Jeweler::Tasks in Rakefile, and run
|
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
|
4
4
|
# -*- encoding: utf-8 -*-
|
|
5
5
|
|
|
6
6
|
Gem::Specification.new do |s|
|
|
7
7
|
s.name = %q{zidian}
|
|
8
|
-
s.version = "0.
|
|
8
|
+
s.version = "0.3.0"
|
|
9
9
|
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
|
11
11
|
s.authors = ["Bastien Vaucher"]
|
|
12
|
-
s.date = %q{
|
|
12
|
+
s.date = %q{2011-08-03}
|
|
13
13
|
s.description = %q{Chinese dictionary using the CEDICT word list}
|
|
14
14
|
s.email = %q{bastien.vaucher@gmail.com}
|
|
15
15
|
s.extra_rdoc_files = [
|
|
16
16
|
"README.mkd"
|
|
17
17
|
]
|
|
18
18
|
s.files = [
|
|
19
|
-
"
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
19
|
+
"Gemfile",
|
|
20
|
+
"Gemfile.lock",
|
|
21
|
+
"Manifest",
|
|
22
|
+
"README.mkd",
|
|
23
|
+
"Rakefile",
|
|
24
|
+
"VERSION",
|
|
25
|
+
"lib/cedict_ts.u8",
|
|
26
|
+
"lib/zidian.rb",
|
|
27
|
+
"test/helper.rb",
|
|
28
|
+
"test/test_zidian.rb",
|
|
29
|
+
"zidian.gemspec"
|
|
28
30
|
]
|
|
29
31
|
s.homepage = %q{http://github.com/bastien/zidian}
|
|
30
|
-
s.rdoc_options = ["--charset=UTF-8"]
|
|
31
32
|
s.require_paths = ["lib"]
|
|
32
|
-
s.rubygems_version = %q{1.
|
|
33
|
+
s.rubygems_version = %q{1.6.2}
|
|
33
34
|
s.summary = %q{Chinese dictionary}
|
|
34
|
-
s.test_files = [
|
|
35
|
-
"test/test_zidian.rb"
|
|
36
|
-
]
|
|
37
35
|
|
|
38
36
|
if s.respond_to? :specification_version then
|
|
39
|
-
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
|
40
37
|
s.specification_version = 3
|
|
41
38
|
|
|
42
|
-
if Gem::Version.new(Gem::
|
|
39
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
|
40
|
+
s.add_development_dependency(%q<bundler>, ["~> 1.0.0"])
|
|
41
|
+
s.add_development_dependency(%q<jeweler>, ["~> 1.6.2"])
|
|
42
|
+
s.add_development_dependency(%q<ruby-debug>, [">= 0"])
|
|
43
|
+
s.add_development_dependency(%q<ruby-debug19>, [">= 0"])
|
|
43
44
|
else
|
|
45
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
|
46
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.2"])
|
|
47
|
+
s.add_dependency(%q<ruby-debug>, [">= 0"])
|
|
48
|
+
s.add_dependency(%q<ruby-debug19>, [">= 0"])
|
|
44
49
|
end
|
|
45
50
|
else
|
|
51
|
+
s.add_dependency(%q<bundler>, ["~> 1.0.0"])
|
|
52
|
+
s.add_dependency(%q<jeweler>, ["~> 1.6.2"])
|
|
53
|
+
s.add_dependency(%q<ruby-debug>, [">= 0"])
|
|
54
|
+
s.add_dependency(%q<ruby-debug19>, [">= 0"])
|
|
46
55
|
end
|
|
47
56
|
end
|
|
48
57
|
|
metadata
CHANGED
|
@@ -1,12 +1,8 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: zidian
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
prerelease:
|
|
5
|
-
|
|
6
|
-
- 0
|
|
7
|
-
- 2
|
|
8
|
-
- 0
|
|
9
|
-
version: 0.2.0
|
|
4
|
+
prerelease:
|
|
5
|
+
version: 0.3.0
|
|
10
6
|
platform: ruby
|
|
11
7
|
authors:
|
|
12
8
|
- Bastien Vaucher
|
|
@@ -14,10 +10,53 @@ autorequire:
|
|
|
14
10
|
bindir: bin
|
|
15
11
|
cert_chain: []
|
|
16
12
|
|
|
17
|
-
date:
|
|
13
|
+
date: 2011-08-03 00:00:00 +02:00
|
|
18
14
|
default_executable:
|
|
19
|
-
dependencies:
|
|
20
|
-
|
|
15
|
+
dependencies:
|
|
16
|
+
- !ruby/object:Gem::Dependency
|
|
17
|
+
name: bundler
|
|
18
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
|
19
|
+
none: false
|
|
20
|
+
requirements:
|
|
21
|
+
- - ~>
|
|
22
|
+
- !ruby/object:Gem::Version
|
|
23
|
+
version: 1.0.0
|
|
24
|
+
type: :development
|
|
25
|
+
prerelease: false
|
|
26
|
+
version_requirements: *id001
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: jeweler
|
|
29
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
|
30
|
+
none: false
|
|
31
|
+
requirements:
|
|
32
|
+
- - ~>
|
|
33
|
+
- !ruby/object:Gem::Version
|
|
34
|
+
version: 1.6.2
|
|
35
|
+
type: :development
|
|
36
|
+
prerelease: false
|
|
37
|
+
version_requirements: *id002
|
|
38
|
+
- !ruby/object:Gem::Dependency
|
|
39
|
+
name: ruby-debug
|
|
40
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
|
41
|
+
none: false
|
|
42
|
+
requirements:
|
|
43
|
+
- - ">="
|
|
44
|
+
- !ruby/object:Gem::Version
|
|
45
|
+
version: "0"
|
|
46
|
+
type: :development
|
|
47
|
+
prerelease: false
|
|
48
|
+
version_requirements: *id003
|
|
49
|
+
- !ruby/object:Gem::Dependency
|
|
50
|
+
name: ruby-debug19
|
|
51
|
+
requirement: &id004 !ruby/object:Gem::Requirement
|
|
52
|
+
none: false
|
|
53
|
+
requirements:
|
|
54
|
+
- - ">="
|
|
55
|
+
- !ruby/object:Gem::Version
|
|
56
|
+
version: "0"
|
|
57
|
+
type: :development
|
|
58
|
+
prerelease: false
|
|
59
|
+
version_requirements: *id004
|
|
21
60
|
description: Chinese dictionary using the CEDICT word list
|
|
22
61
|
email: bastien.vaucher@gmail.com
|
|
23
62
|
executables: []
|
|
@@ -27,13 +66,15 @@ extensions: []
|
|
|
27
66
|
extra_rdoc_files:
|
|
28
67
|
- README.mkd
|
|
29
68
|
files:
|
|
30
|
-
-
|
|
69
|
+
- Gemfile
|
|
70
|
+
- Gemfile.lock
|
|
31
71
|
- Manifest
|
|
32
72
|
- README.mkd
|
|
33
73
|
- Rakefile
|
|
34
74
|
- VERSION
|
|
35
75
|
- lib/cedict_ts.u8
|
|
36
76
|
- lib/zidian.rb
|
|
77
|
+
- test/helper.rb
|
|
37
78
|
- test/test_zidian.rb
|
|
38
79
|
- zidian.gemspec
|
|
39
80
|
has_rdoc: true
|
|
@@ -41,30 +82,31 @@ homepage: http://github.com/bastien/zidian
|
|
|
41
82
|
licenses: []
|
|
42
83
|
|
|
43
84
|
post_install_message:
|
|
44
|
-
rdoc_options:
|
|
45
|
-
|
|
85
|
+
rdoc_options: []
|
|
86
|
+
|
|
46
87
|
require_paths:
|
|
47
88
|
- lib
|
|
48
89
|
required_ruby_version: !ruby/object:Gem::Requirement
|
|
90
|
+
none: false
|
|
49
91
|
requirements:
|
|
50
92
|
- - ">="
|
|
51
93
|
- !ruby/object:Gem::Version
|
|
94
|
+
hash: 4377275587190922647
|
|
52
95
|
segments:
|
|
53
96
|
- 0
|
|
54
97
|
version: "0"
|
|
55
98
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
99
|
+
none: false
|
|
56
100
|
requirements:
|
|
57
101
|
- - ">="
|
|
58
102
|
- !ruby/object:Gem::Version
|
|
59
|
-
segments:
|
|
60
|
-
- 0
|
|
61
103
|
version: "0"
|
|
62
104
|
requirements: []
|
|
63
105
|
|
|
64
106
|
rubyforge_project:
|
|
65
|
-
rubygems_version: 1.
|
|
107
|
+
rubygems_version: 1.6.2
|
|
66
108
|
signing_key:
|
|
67
109
|
specification_version: 3
|
|
68
110
|
summary: Chinese dictionary
|
|
69
|
-
test_files:
|
|
70
|
-
|
|
111
|
+
test_files: []
|
|
112
|
+
|
data/.gitignore
DELETED