analects 0.3.1 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/Gemfile.lock +8 -3
- data/README.md +2 -0
- data/Rakefile +0 -3
- data/analects.gemspec +5 -6
- data/lib/analects.rb +1 -0
- data/lib/analects/library.rb +40 -28
- data/lib/analects/rake_tasks.rb +10 -9
- data/lib/analects/source.rb +25 -10
- data/lib/analects/unihan_loader.rb +7 -0
- data/lib/analects/version.rb +1 -1
- data/spec/analects/library_spec.rb +1 -1
- metadata +48 -61
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9765c8c83a2eaee1072a513d296bb558b28dfe7e
|
4
|
+
data.tar.gz: 388599cfc445653f210dda46a5c1b26253ca37d6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4f11f282ce2684189ba63300a030481149e06a3e86835836443b171cffcfca9cbd37af360f54f24d0612fa7a0e5c3f20ca9f55d39fab2fe7cd8f5925f8bba2ec
|
7
|
+
data.tar.gz: 2049a14e1c204438ba004a92f69127d221c452d9b04bdd243dbfebb99f070a918df2e17e1e5dba151e44128120ab07f47639363cc1fbf1652cbe215a215276fc
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -7,12 +7,18 @@ GIT
|
|
7
7
|
PATH
|
8
8
|
remote: .
|
9
9
|
specs:
|
10
|
-
analects (0.
|
10
|
+
analects (0.4.0)
|
11
11
|
ice_nine (~> 0.11.0)
|
12
12
|
inflecto (~> 0.0.2)
|
13
13
|
plexus-rmmseg (~> 0.1.6)
|
14
|
+
rubyzip (~> 1.1)
|
14
15
|
ting (~> 0.9.0)
|
15
16
|
|
17
|
+
PATH
|
18
|
+
remote: /home/arne/github/rubyzip
|
19
|
+
specs:
|
20
|
+
rubyzip (1.1.3)
|
21
|
+
|
16
22
|
GEM
|
17
23
|
remote: https://rubygems.org/
|
18
24
|
specs:
|
@@ -149,7 +155,6 @@ GEM
|
|
149
155
|
sexp_processor (~> 4.0)
|
150
156
|
ruby_parser (3.4.1)
|
151
157
|
sexp_processor (~> 4.1)
|
152
|
-
rubygems-tasks (0.2.4)
|
153
158
|
rubysl-logger (2.0.0)
|
154
159
|
rubysl-open-uri (2.0.0)
|
155
160
|
rubysl-prettyprint (2.0.3)
|
@@ -212,11 +217,11 @@ DEPENDENCIES
|
|
212
217
|
rspec (~> 2.14.1)
|
213
218
|
rspec-core (~> 2.14.8)
|
214
219
|
rubocop (~> 0.18.1)
|
215
|
-
rubygems-tasks
|
216
220
|
rubysl-logger (~> 2.0.0)
|
217
221
|
rubysl-open-uri (~> 2.0.0)
|
218
222
|
rubysl-prettyprint (~> 2.0.2)
|
219
223
|
rubysl-singleton (~> 2.0.0)
|
224
|
+
rubyzip!
|
220
225
|
simplecov (~> 0.8.2)
|
221
226
|
terminal-notifier-guard (~> 1.5.3)
|
222
227
|
yard (~> 0.8.7)
|
data/README.md
CHANGED
@@ -36,6 +36,8 @@ end
|
|
36
36
|
rake analects:download:all # download all sources
|
37
37
|
rake analects:download:cedict # download CC-CEDICT
|
38
38
|
rake analects:download:chise_ids # download Chise-IDS
|
39
|
+
rake analects:download:hsk # download HSK data
|
40
|
+
rake analects:download:unihan # download Unihan database
|
39
41
|
```
|
40
42
|
|
41
43
|
Or from Ruby
|
data/Rakefile
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
require 'rspec/core/rake_task'
|
2
2
|
require 'devtools'
|
3
|
-
require 'rubygems/tasks'
|
4
3
|
require 'rubygems/package_task'
|
5
4
|
|
6
5
|
$LOAD_PATH.unshift File.expand_path('../lib', __FILE__)
|
@@ -12,8 +11,6 @@ Analects.init_rake_tasks
|
|
12
11
|
RSpec::Core::RakeTask.new(:spec)
|
13
12
|
task :default => :spec
|
14
13
|
|
15
|
-
Gem::Tasks.new
|
16
|
-
|
17
14
|
spec = Gem::Specification.load(File.expand_path('../analects.gemspec', __FILE__))
|
18
15
|
gem = Gem::PackageTask.new(spec)
|
19
16
|
gem.define
|
data/analects.gemspec
CHANGED
@@ -16,14 +16,13 @@ Gem::Specification.new do |gem|
|
|
16
16
|
gem.test_files = `git ls-files -- spec`.split($/)
|
17
17
|
gem.extra_rdoc_files = %w[README.md]
|
18
18
|
|
19
|
-
gem.add_development_dependency 'rspec'
|
20
|
-
gem.add_development_dependency 'simplecov'
|
21
|
-
gem.add_development_dependency 'rake'
|
22
|
-
gem.add_development_dependency 'rubygems-tasks'
|
23
|
-
gem.add_development_dependency 'pry'
|
24
|
-
|
25
19
|
gem.add_runtime_dependency 'inflecto' , '~> 0.0.2'
|
26
20
|
gem.add_runtime_dependency 'plexus-rmmseg' , '~> 0.1.6'
|
27
21
|
gem.add_runtime_dependency 'ting' , '~> 0.9.0'
|
28
22
|
gem.add_runtime_dependency 'ice_nine' , '~> 0.11.0'
|
23
|
+
gem.add_runtime_dependency 'rubyzip' , '~> 1.1'
|
24
|
+
|
25
|
+
gem.add_development_dependency 'rspec'
|
26
|
+
gem.add_development_dependency 'rake'
|
27
|
+
gem.add_development_dependency 'pry'
|
29
28
|
end
|
data/lib/analects.rb
CHANGED
@@ -30,6 +30,7 @@ require 'analects/cli/progress'
|
|
30
30
|
require 'analects/cedict_loader'
|
31
31
|
require 'analects/hsk_loader'
|
32
32
|
require 'analects/chise_ids_loader'
|
33
|
+
require 'analects/unihan_loader'
|
33
34
|
require 'analects/source'
|
34
35
|
require 'analects/library'
|
35
36
|
require 'analects/tokenizer'
|
data/lib/analects/library.rb
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
module Analects
|
2
|
-
CEDICT_URL
|
3
|
-
CHISE_IDS_URL
|
4
|
-
UNIHAN_URL
|
5
|
-
HSK_URL
|
2
|
+
CEDICT_URL = 'http://www.mdbg.net/chindict/export/cedict/cedict_1_0_ts_utf-8_mdbg.txt.gz'
|
3
|
+
CHISE_IDS_URL = 'http://git.chise.org/git/chise/ids.git'
|
4
|
+
UNIHAN_URL = 'http://www.unicode.org/Public/UCD/latest/ucd/Unihan.zip'
|
5
|
+
HSK_URL = 'https://raw.githubusercontent.com/plexus/analects-data/master/hsk/hsk.csv'
|
6
|
+
TW_CURRICULUM_URL = 'https://raw.githubusercontent.com/plexus/analects-data/master/taiwan_school_curriculum.txt'
|
6
7
|
|
7
8
|
class Library
|
8
9
|
attr_reader :options
|
@@ -23,48 +24,59 @@ module Analects
|
|
23
24
|
end
|
24
25
|
end
|
25
26
|
|
27
|
+
def sources
|
28
|
+
[
|
29
|
+
cedict,
|
30
|
+
chise_ids,
|
31
|
+
unihan,
|
32
|
+
hsk
|
33
|
+
]
|
34
|
+
end
|
35
|
+
|
26
36
|
def cedict
|
27
|
-
@cedict ||=
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
}.merge(options_for :cedict)
|
37
|
+
@cedict ||= create_source(
|
38
|
+
:cedict,
|
39
|
+
data_file: 'cedict_1_0_ts_utf-8_mdbg.txt',
|
40
|
+
retrieval: [ :http, :gunzip, :save ]
|
32
41
|
)
|
33
42
|
end
|
34
43
|
|
35
44
|
def chise_ids
|
36
|
-
@chise_ids ||=
|
37
|
-
|
38
|
-
|
39
|
-
}.merge(options_for :chise_ids)
|
45
|
+
@chise_ids ||= create_source(
|
46
|
+
:chise_ids,
|
47
|
+
retrieval: :git
|
40
48
|
)
|
41
49
|
end
|
42
50
|
|
43
51
|
def unihan
|
44
|
-
@unihan ||=
|
45
|
-
|
46
|
-
|
47
|
-
}.merge(options_for :chise_ids)
|
52
|
+
@unihan ||= create_source(
|
53
|
+
:unihan,
|
54
|
+
retrieval: [ :http, :unzip ]
|
48
55
|
)
|
49
56
|
end
|
50
57
|
|
51
58
|
def hsk
|
52
|
-
@hsk ||=
|
53
|
-
|
54
|
-
|
59
|
+
@hsk ||= create_source(
|
60
|
+
:hsk,
|
61
|
+
data_file: 'hsk.csv',
|
62
|
+
retrieval: [ :http, :save ]
|
55
63
|
)
|
56
64
|
end
|
57
65
|
|
58
66
|
private
|
59
67
|
|
60
|
-
def
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
+
def create_source(name, source_options)
|
69
|
+
Source.new(
|
70
|
+
source_options.merge(
|
71
|
+
{
|
72
|
+
name: name,
|
73
|
+
library: self,
|
74
|
+
url: Analects.const_get("#{name.to_s.upcase}_URL"),
|
75
|
+
loader: Analects.const_get("#{Inflecto.camelize name}Loader"),
|
76
|
+
data_dir: data_dir
|
77
|
+
}
|
78
|
+
).merge(options.fetch(name, {}))
|
79
|
+
)
|
68
80
|
end
|
69
81
|
|
70
82
|
end
|
data/lib/analects/rake_tasks.rb
CHANGED
@@ -18,6 +18,10 @@ module Analects
|
|
18
18
|
@library ||= Analects::Library.new(options)
|
19
19
|
end
|
20
20
|
|
21
|
+
def sources
|
22
|
+
library.sources
|
23
|
+
end
|
24
|
+
|
21
25
|
def options
|
22
26
|
@options ||= {}
|
23
27
|
end
|
@@ -29,18 +33,15 @@ module Analects
|
|
29
33
|
def define
|
30
34
|
namespace @name do
|
31
35
|
namespace :download do
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
desc 'download Chise-IDS'
|
38
|
-
task :chise_ids do
|
39
|
-
library.chise_ids.retrieve!
|
36
|
+
sources.each do |source|
|
37
|
+
desc "download #{source.name}"
|
38
|
+
task source.name do
|
39
|
+
source.retrieve!
|
40
|
+
end
|
40
41
|
end
|
41
42
|
|
42
43
|
desc 'download all sources'
|
43
|
-
task :all =>
|
44
|
+
task :all => sources.map(&:name)
|
44
45
|
end
|
45
46
|
end
|
46
47
|
|
data/lib/analects/source.rb
CHANGED
@@ -17,15 +17,17 @@ module Analects
|
|
17
17
|
end
|
18
18
|
|
19
19
|
def data_dir
|
20
|
-
options[:data_dir]
|
20
|
+
Pathname(options[:data_dir])
|
21
21
|
end
|
22
22
|
|
23
23
|
def location
|
24
|
-
options[:data_file] ?
|
24
|
+
options[:data_file] ?
|
25
|
+
data_dir.join(options[:data_file]) :
|
26
|
+
data_dir.join(options[:name].to_s)
|
25
27
|
end
|
26
28
|
|
27
29
|
def data_file_present?
|
28
|
-
|
30
|
+
location.exist?
|
29
31
|
end
|
30
32
|
|
31
33
|
def retrieve
|
@@ -33,32 +35,45 @@ module Analects
|
|
33
35
|
end
|
34
36
|
|
35
37
|
def retrieve!
|
36
|
-
retrieval.inject(
|
38
|
+
retrieval.inject(url) do | result, method |
|
37
39
|
self.send( "retrieve_#{method}", result )
|
38
40
|
end
|
39
41
|
end
|
40
42
|
|
41
43
|
# url -> stream
|
42
|
-
def retrieve_http(
|
44
|
+
def retrieve_http(url)
|
43
45
|
require 'open-uri'
|
44
|
-
open(
|
46
|
+
StringIO.new(open(url).read)
|
45
47
|
end
|
46
48
|
|
47
49
|
# gzipped stream -> uncompressed stream
|
48
|
-
def retrieve_gunzip(
|
50
|
+
def retrieve_gunzip(stream)
|
49
51
|
require 'zlib'
|
50
|
-
Zlib::GzipReader.new(
|
52
|
+
Zlib::GzipReader.new(stream)
|
53
|
+
end
|
54
|
+
|
55
|
+
def retrieve_unzip(stream)
|
56
|
+
require 'zip'
|
57
|
+
location.mkdir unless location.exist?
|
58
|
+
Zip::InputStream.open(stream) do |io|
|
59
|
+
while (entry = io.get_next_entry)
|
60
|
+
next if entry.ftype == :symlink
|
61
|
+
loc = location.join(entry.name)
|
62
|
+
loc.delete if loc.exist?
|
63
|
+
entry.extract(loc)
|
64
|
+
end
|
65
|
+
end
|
51
66
|
end
|
52
67
|
|
53
68
|
# stream|string -> create data file
|
54
|
-
def retrieve_save(
|
69
|
+
def retrieve_save(data)
|
55
70
|
File.open( location, 'w' ) do |f|
|
56
71
|
f << ( data.respond_to?(:read) ? data.read : data )
|
57
72
|
end
|
58
73
|
end
|
59
74
|
|
60
75
|
# url -> clones repo
|
61
|
-
def retrieve_git(
|
76
|
+
def retrieve_git(url)
|
62
77
|
`git clone #{url} #{data_dir}/#{name}` # Admittedly crude
|
63
78
|
end
|
64
79
|
|
data/lib/analects/version.rb
CHANGED
@@ -8,7 +8,7 @@ describe Analects::Library do
|
|
8
8
|
subject(:library) {
|
9
9
|
described_class.new(options)
|
10
10
|
}
|
11
|
-
let(:data_dir) {
|
11
|
+
let(:data_dir) { Pathname(Dir.tmpdir).join('analects-' + SecureRandom.hex(16)) }
|
12
12
|
let(:options) {
|
13
13
|
{ data_dir: data_dir }
|
14
14
|
}
|
metadata
CHANGED
@@ -1,141 +1,127 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: analects
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Arne Brasseur
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-04-
|
11
|
+
date: 2014-04-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - '>='
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0'
|
20
|
-
type: :development
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - '>='
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '0'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: simplecov
|
14
|
+
name: inflecto
|
29
15
|
requirement: !ruby/object:Gem::Requirement
|
30
16
|
requirements:
|
31
|
-
- -
|
17
|
+
- - ~>
|
32
18
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
34
|
-
type: :
|
19
|
+
version: 0.0.2
|
20
|
+
type: :runtime
|
35
21
|
prerelease: false
|
36
22
|
version_requirements: !ruby/object:Gem::Requirement
|
37
23
|
requirements:
|
38
|
-
- -
|
24
|
+
- - ~>
|
39
25
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
26
|
+
version: 0.0.2
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
28
|
+
name: plexus-rmmseg
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|
44
30
|
requirements:
|
45
|
-
- -
|
31
|
+
- - ~>
|
46
32
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
48
|
-
type: :
|
33
|
+
version: 0.1.6
|
34
|
+
type: :runtime
|
49
35
|
prerelease: false
|
50
36
|
version_requirements: !ruby/object:Gem::Requirement
|
51
37
|
requirements:
|
52
|
-
- -
|
38
|
+
- - ~>
|
53
39
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
40
|
+
version: 0.1.6
|
55
41
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
42
|
+
name: ting
|
57
43
|
requirement: !ruby/object:Gem::Requirement
|
58
44
|
requirements:
|
59
|
-
- -
|
45
|
+
- - ~>
|
60
46
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
62
|
-
type: :
|
47
|
+
version: 0.9.0
|
48
|
+
type: :runtime
|
63
49
|
prerelease: false
|
64
50
|
version_requirements: !ruby/object:Gem::Requirement
|
65
51
|
requirements:
|
66
|
-
- -
|
52
|
+
- - ~>
|
67
53
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
54
|
+
version: 0.9.0
|
69
55
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
56
|
+
name: ice_nine
|
71
57
|
requirement: !ruby/object:Gem::Requirement
|
72
58
|
requirements:
|
73
|
-
- -
|
59
|
+
- - ~>
|
74
60
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
76
|
-
type: :
|
61
|
+
version: 0.11.0
|
62
|
+
type: :runtime
|
77
63
|
prerelease: false
|
78
64
|
version_requirements: !ruby/object:Gem::Requirement
|
79
65
|
requirements:
|
80
|
-
- -
|
66
|
+
- - ~>
|
81
67
|
- !ruby/object:Gem::Version
|
82
|
-
version:
|
68
|
+
version: 0.11.0
|
83
69
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
70
|
+
name: rubyzip
|
85
71
|
requirement: !ruby/object:Gem::Requirement
|
86
72
|
requirements:
|
87
73
|
- - ~>
|
88
74
|
- !ruby/object:Gem::Version
|
89
|
-
version:
|
75
|
+
version: '1.1'
|
90
76
|
type: :runtime
|
91
77
|
prerelease: false
|
92
78
|
version_requirements: !ruby/object:Gem::Requirement
|
93
79
|
requirements:
|
94
80
|
- - ~>
|
95
81
|
- !ruby/object:Gem::Version
|
96
|
-
version:
|
82
|
+
version: '1.1'
|
97
83
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
84
|
+
name: rspec
|
99
85
|
requirement: !ruby/object:Gem::Requirement
|
100
86
|
requirements:
|
101
|
-
- -
|
87
|
+
- - '>='
|
102
88
|
- !ruby/object:Gem::Version
|
103
|
-
version: 0
|
104
|
-
type: :
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
105
91
|
prerelease: false
|
106
92
|
version_requirements: !ruby/object:Gem::Requirement
|
107
93
|
requirements:
|
108
|
-
- -
|
94
|
+
- - '>='
|
109
95
|
- !ruby/object:Gem::Version
|
110
|
-
version: 0
|
96
|
+
version: '0'
|
111
97
|
- !ruby/object:Gem::Dependency
|
112
|
-
name:
|
98
|
+
name: rake
|
113
99
|
requirement: !ruby/object:Gem::Requirement
|
114
100
|
requirements:
|
115
|
-
- -
|
101
|
+
- - '>='
|
116
102
|
- !ruby/object:Gem::Version
|
117
|
-
version: 0
|
118
|
-
type: :
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
119
105
|
prerelease: false
|
120
106
|
version_requirements: !ruby/object:Gem::Requirement
|
121
107
|
requirements:
|
122
|
-
- -
|
108
|
+
- - '>='
|
123
109
|
- !ruby/object:Gem::Version
|
124
|
-
version: 0
|
110
|
+
version: '0'
|
125
111
|
- !ruby/object:Gem::Dependency
|
126
|
-
name:
|
112
|
+
name: pry
|
127
113
|
requirement: !ruby/object:Gem::Requirement
|
128
114
|
requirements:
|
129
|
-
- -
|
115
|
+
- - '>='
|
130
116
|
- !ruby/object:Gem::Version
|
131
|
-
version: 0
|
132
|
-
type: :
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
133
119
|
prerelease: false
|
134
120
|
version_requirements: !ruby/object:Gem::Requirement
|
135
121
|
requirements:
|
136
|
-
- -
|
122
|
+
- - '>='
|
137
123
|
- !ruby/object:Gem::Version
|
138
|
-
version: 0
|
124
|
+
version: '0'
|
139
125
|
description: Toolkit for Mandarin language learning apps
|
140
126
|
email:
|
141
127
|
- arne.brasseur@gmail.com
|
@@ -176,6 +162,7 @@ files:
|
|
176
162
|
- lib/analects/rake_tasks.rb
|
177
163
|
- lib/analects/source.rb
|
178
164
|
- lib/analects/tokenizer.rb
|
165
|
+
- lib/analects/unihan_loader.rb
|
179
166
|
- lib/analects/version.rb
|
180
167
|
- lib/cjk_string.rb
|
181
168
|
- lib/generators/analects.rb
|