analects 0.3.1 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/Gemfile.lock +8 -3
- data/README.md +2 -0
- data/Rakefile +0 -3
- data/analects.gemspec +5 -6
- data/lib/analects.rb +1 -0
- data/lib/analects/library.rb +40 -28
- data/lib/analects/rake_tasks.rb +10 -9
- data/lib/analects/source.rb +25 -10
- data/lib/analects/unihan_loader.rb +7 -0
- data/lib/analects/version.rb +1 -1
- data/spec/analects/library_spec.rb +1 -1
- metadata +48 -61
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9765c8c83a2eaee1072a513d296bb558b28dfe7e
|
4
|
+
data.tar.gz: 388599cfc445653f210dda46a5c1b26253ca37d6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4f11f282ce2684189ba63300a030481149e06a3e86835836443b171cffcfca9cbd37af360f54f24d0612fa7a0e5c3f20ca9f55d39fab2fe7cd8f5925f8bba2ec
|
7
|
+
data.tar.gz: 2049a14e1c204438ba004a92f69127d221c452d9b04bdd243dbfebb99f070a918df2e17e1e5dba151e44128120ab07f47639363cc1fbf1652cbe215a215276fc
|
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -7,12 +7,18 @@ GIT
|
|
7
7
|
PATH
|
8
8
|
remote: .
|
9
9
|
specs:
|
10
|
-
analects (0.
|
10
|
+
analects (0.4.0)
|
11
11
|
ice_nine (~> 0.11.0)
|
12
12
|
inflecto (~> 0.0.2)
|
13
13
|
plexus-rmmseg (~> 0.1.6)
|
14
|
+
rubyzip (~> 1.1)
|
14
15
|
ting (~> 0.9.0)
|
15
16
|
|
17
|
+
PATH
|
18
|
+
remote: /home/arne/github/rubyzip
|
19
|
+
specs:
|
20
|
+
rubyzip (1.1.3)
|
21
|
+
|
16
22
|
GEM
|
17
23
|
remote: https://rubygems.org/
|
18
24
|
specs:
|
@@ -149,7 +155,6 @@ GEM
|
|
149
155
|
sexp_processor (~> 4.0)
|
150
156
|
ruby_parser (3.4.1)
|
151
157
|
sexp_processor (~> 4.1)
|
152
|
-
rubygems-tasks (0.2.4)
|
153
158
|
rubysl-logger (2.0.0)
|
154
159
|
rubysl-open-uri (2.0.0)
|
155
160
|
rubysl-prettyprint (2.0.3)
|
@@ -212,11 +217,11 @@ DEPENDENCIES
|
|
212
217
|
rspec (~> 2.14.1)
|
213
218
|
rspec-core (~> 2.14.8)
|
214
219
|
rubocop (~> 0.18.1)
|
215
|
-
rubygems-tasks
|
216
220
|
rubysl-logger (~> 2.0.0)
|
217
221
|
rubysl-open-uri (~> 2.0.0)
|
218
222
|
rubysl-prettyprint (~> 2.0.2)
|
219
223
|
rubysl-singleton (~> 2.0.0)
|
224
|
+
rubyzip!
|
220
225
|
simplecov (~> 0.8.2)
|
221
226
|
terminal-notifier-guard (~> 1.5.3)
|
222
227
|
yard (~> 0.8.7)
|
data/README.md
CHANGED
@@ -36,6 +36,8 @@ end
|
|
36
36
|
rake analects:download:all # download all sources
|
37
37
|
rake analects:download:cedict # download CC-CEDICT
|
38
38
|
rake analects:download:chise_ids # download Chise-IDS
|
39
|
+
rake analects:download:hsk # download HSK data
|
40
|
+
rake analects:download:unihan # download Unihan database
|
39
41
|
```
|
40
42
|
|
41
43
|
Or from Ruby
|
data/Rakefile
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
require 'rspec/core/rake_task'
|
2
2
|
require 'devtools'
|
3
|
-
require 'rubygems/tasks'
|
4
3
|
require 'rubygems/package_task'
|
5
4
|
|
6
5
|
$LOAD_PATH.unshift File.expand_path('../lib', __FILE__)
|
@@ -12,8 +11,6 @@ Analects.init_rake_tasks
|
|
12
11
|
RSpec::Core::RakeTask.new(:spec)
|
13
12
|
task :default => :spec
|
14
13
|
|
15
|
-
Gem::Tasks.new
|
16
|
-
|
17
14
|
spec = Gem::Specification.load(File.expand_path('../analects.gemspec', __FILE__))
|
18
15
|
gem = Gem::PackageTask.new(spec)
|
19
16
|
gem.define
|
data/analects.gemspec
CHANGED
@@ -16,14 +16,13 @@ Gem::Specification.new do |gem|
|
|
16
16
|
gem.test_files = `git ls-files -- spec`.split($/)
|
17
17
|
gem.extra_rdoc_files = %w[README.md]
|
18
18
|
|
19
|
-
gem.add_development_dependency 'rspec'
|
20
|
-
gem.add_development_dependency 'simplecov'
|
21
|
-
gem.add_development_dependency 'rake'
|
22
|
-
gem.add_development_dependency 'rubygems-tasks'
|
23
|
-
gem.add_development_dependency 'pry'
|
24
|
-
|
25
19
|
gem.add_runtime_dependency 'inflecto' , '~> 0.0.2'
|
26
20
|
gem.add_runtime_dependency 'plexus-rmmseg' , '~> 0.1.6'
|
27
21
|
gem.add_runtime_dependency 'ting' , '~> 0.9.0'
|
28
22
|
gem.add_runtime_dependency 'ice_nine' , '~> 0.11.0'
|
23
|
+
gem.add_runtime_dependency 'rubyzip' , '~> 1.1'
|
24
|
+
|
25
|
+
gem.add_development_dependency 'rspec'
|
26
|
+
gem.add_development_dependency 'rake'
|
27
|
+
gem.add_development_dependency 'pry'
|
29
28
|
end
|
data/lib/analects.rb
CHANGED
@@ -30,6 +30,7 @@ require 'analects/cli/progress'
|
|
30
30
|
require 'analects/cedict_loader'
|
31
31
|
require 'analects/hsk_loader'
|
32
32
|
require 'analects/chise_ids_loader'
|
33
|
+
require 'analects/unihan_loader'
|
33
34
|
require 'analects/source'
|
34
35
|
require 'analects/library'
|
35
36
|
require 'analects/tokenizer'
|
data/lib/analects/library.rb
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
module Analects
|
2
|
-
CEDICT_URL
|
3
|
-
CHISE_IDS_URL
|
4
|
-
UNIHAN_URL
|
5
|
-
HSK_URL
|
2
|
+
CEDICT_URL = 'http://www.mdbg.net/chindict/export/cedict/cedict_1_0_ts_utf-8_mdbg.txt.gz'
|
3
|
+
CHISE_IDS_URL = 'http://git.chise.org/git/chise/ids.git'
|
4
|
+
UNIHAN_URL = 'http://www.unicode.org/Public/UCD/latest/ucd/Unihan.zip'
|
5
|
+
HSK_URL = 'https://raw.githubusercontent.com/plexus/analects-data/master/hsk/hsk.csv'
|
6
|
+
TW_CURRICULUM_URL = 'https://raw.githubusercontent.com/plexus/analects-data/master/taiwan_school_curriculum.txt'
|
6
7
|
|
7
8
|
class Library
|
8
9
|
attr_reader :options
|
@@ -23,48 +24,59 @@ module Analects
|
|
23
24
|
end
|
24
25
|
end
|
25
26
|
|
27
|
+
def sources
|
28
|
+
[
|
29
|
+
cedict,
|
30
|
+
chise_ids,
|
31
|
+
unihan,
|
32
|
+
hsk
|
33
|
+
]
|
34
|
+
end
|
35
|
+
|
26
36
|
def cedict
|
27
|
-
@cedict ||=
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
}.merge(options_for :cedict)
|
37
|
+
@cedict ||= create_source(
|
38
|
+
:cedict,
|
39
|
+
data_file: 'cedict_1_0_ts_utf-8_mdbg.txt',
|
40
|
+
retrieval: [ :http, :gunzip, :save ]
|
32
41
|
)
|
33
42
|
end
|
34
43
|
|
35
44
|
def chise_ids
|
36
|
-
@chise_ids ||=
|
37
|
-
|
38
|
-
|
39
|
-
}.merge(options_for :chise_ids)
|
45
|
+
@chise_ids ||= create_source(
|
46
|
+
:chise_ids,
|
47
|
+
retrieval: :git
|
40
48
|
)
|
41
49
|
end
|
42
50
|
|
43
51
|
def unihan
|
44
|
-
@unihan ||=
|
45
|
-
|
46
|
-
|
47
|
-
}.merge(options_for :chise_ids)
|
52
|
+
@unihan ||= create_source(
|
53
|
+
:unihan,
|
54
|
+
retrieval: [ :http, :unzip ]
|
48
55
|
)
|
49
56
|
end
|
50
57
|
|
51
58
|
def hsk
|
52
|
-
@hsk ||=
|
53
|
-
|
54
|
-
|
59
|
+
@hsk ||= create_source(
|
60
|
+
:hsk,
|
61
|
+
data_file: 'hsk.csv',
|
62
|
+
retrieval: [ :http, :save ]
|
55
63
|
)
|
56
64
|
end
|
57
65
|
|
58
66
|
private
|
59
67
|
|
60
|
-
def
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
+
def create_source(name, source_options)
|
69
|
+
Source.new(
|
70
|
+
source_options.merge(
|
71
|
+
{
|
72
|
+
name: name,
|
73
|
+
library: self,
|
74
|
+
url: Analects.const_get("#{name.to_s.upcase}_URL"),
|
75
|
+
loader: Analects.const_get("#{Inflecto.camelize name}Loader"),
|
76
|
+
data_dir: data_dir
|
77
|
+
}
|
78
|
+
).merge(options.fetch(name, {}))
|
79
|
+
)
|
68
80
|
end
|
69
81
|
|
70
82
|
end
|
data/lib/analects/rake_tasks.rb
CHANGED
@@ -18,6 +18,10 @@ module Analects
|
|
18
18
|
@library ||= Analects::Library.new(options)
|
19
19
|
end
|
20
20
|
|
21
|
+
def sources
|
22
|
+
library.sources
|
23
|
+
end
|
24
|
+
|
21
25
|
def options
|
22
26
|
@options ||= {}
|
23
27
|
end
|
@@ -29,18 +33,15 @@ module Analects
|
|
29
33
|
def define
|
30
34
|
namespace @name do
|
31
35
|
namespace :download do
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
desc 'download Chise-IDS'
|
38
|
-
task :chise_ids do
|
39
|
-
library.chise_ids.retrieve!
|
36
|
+
sources.each do |source|
|
37
|
+
desc "download #{source.name}"
|
38
|
+
task source.name do
|
39
|
+
source.retrieve!
|
40
|
+
end
|
40
41
|
end
|
41
42
|
|
42
43
|
desc 'download all sources'
|
43
|
-
task :all =>
|
44
|
+
task :all => sources.map(&:name)
|
44
45
|
end
|
45
46
|
end
|
46
47
|
|
data/lib/analects/source.rb
CHANGED
@@ -17,15 +17,17 @@ module Analects
|
|
17
17
|
end
|
18
18
|
|
19
19
|
def data_dir
|
20
|
-
options[:data_dir]
|
20
|
+
Pathname(options[:data_dir])
|
21
21
|
end
|
22
22
|
|
23
23
|
def location
|
24
|
-
options[:data_file] ?
|
24
|
+
options[:data_file] ?
|
25
|
+
data_dir.join(options[:data_file]) :
|
26
|
+
data_dir.join(options[:name].to_s)
|
25
27
|
end
|
26
28
|
|
27
29
|
def data_file_present?
|
28
|
-
|
30
|
+
location.exist?
|
29
31
|
end
|
30
32
|
|
31
33
|
def retrieve
|
@@ -33,32 +35,45 @@ module Analects
|
|
33
35
|
end
|
34
36
|
|
35
37
|
def retrieve!
|
36
|
-
retrieval.inject(
|
38
|
+
retrieval.inject(url) do | result, method |
|
37
39
|
self.send( "retrieve_#{method}", result )
|
38
40
|
end
|
39
41
|
end
|
40
42
|
|
41
43
|
# url -> stream
|
42
|
-
def retrieve_http(
|
44
|
+
def retrieve_http(url)
|
43
45
|
require 'open-uri'
|
44
|
-
open(
|
46
|
+
StringIO.new(open(url).read)
|
45
47
|
end
|
46
48
|
|
47
49
|
# gzipped stream -> uncompressed stream
|
48
|
-
def retrieve_gunzip(
|
50
|
+
def retrieve_gunzip(stream)
|
49
51
|
require 'zlib'
|
50
|
-
Zlib::GzipReader.new(
|
52
|
+
Zlib::GzipReader.new(stream)
|
53
|
+
end
|
54
|
+
|
55
|
+
def retrieve_unzip(stream)
|
56
|
+
require 'zip'
|
57
|
+
location.mkdir unless location.exist?
|
58
|
+
Zip::InputStream.open(stream) do |io|
|
59
|
+
while (entry = io.get_next_entry)
|
60
|
+
next if entry.ftype == :symlink
|
61
|
+
loc = location.join(entry.name)
|
62
|
+
loc.delete if loc.exist?
|
63
|
+
entry.extract(loc)
|
64
|
+
end
|
65
|
+
end
|
51
66
|
end
|
52
67
|
|
53
68
|
# stream|string -> create data file
|
54
|
-
def retrieve_save(
|
69
|
+
def retrieve_save(data)
|
55
70
|
File.open( location, 'w' ) do |f|
|
56
71
|
f << ( data.respond_to?(:read) ? data.read : data )
|
57
72
|
end
|
58
73
|
end
|
59
74
|
|
60
75
|
# url -> clones repo
|
61
|
-
def retrieve_git(
|
76
|
+
def retrieve_git(url)
|
62
77
|
`git clone #{url} #{data_dir}/#{name}` # Admittedly crude
|
63
78
|
end
|
64
79
|
|
data/lib/analects/version.rb
CHANGED
@@ -8,7 +8,7 @@ describe Analects::Library do
|
|
8
8
|
subject(:library) {
|
9
9
|
described_class.new(options)
|
10
10
|
}
|
11
|
-
let(:data_dir) {
|
11
|
+
let(:data_dir) { Pathname(Dir.tmpdir).join('analects-' + SecureRandom.hex(16)) }
|
12
12
|
let(:options) {
|
13
13
|
{ data_dir: data_dir }
|
14
14
|
}
|
metadata
CHANGED
@@ -1,141 +1,127 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: analects
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Arne Brasseur
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-04-
|
11
|
+
date: 2014-04-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
15
|
-
requirement: !ruby/object:Gem::Requirement
|
16
|
-
requirements:
|
17
|
-
- - '>='
|
18
|
-
- !ruby/object:Gem::Version
|
19
|
-
version: '0'
|
20
|
-
type: :development
|
21
|
-
prerelease: false
|
22
|
-
version_requirements: !ruby/object:Gem::Requirement
|
23
|
-
requirements:
|
24
|
-
- - '>='
|
25
|
-
- !ruby/object:Gem::Version
|
26
|
-
version: '0'
|
27
|
-
- !ruby/object:Gem::Dependency
|
28
|
-
name: simplecov
|
14
|
+
name: inflecto
|
29
15
|
requirement: !ruby/object:Gem::Requirement
|
30
16
|
requirements:
|
31
|
-
- -
|
17
|
+
- - ~>
|
32
18
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
34
|
-
type: :
|
19
|
+
version: 0.0.2
|
20
|
+
type: :runtime
|
35
21
|
prerelease: false
|
36
22
|
version_requirements: !ruby/object:Gem::Requirement
|
37
23
|
requirements:
|
38
|
-
- -
|
24
|
+
- - ~>
|
39
25
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
26
|
+
version: 0.0.2
|
41
27
|
- !ruby/object:Gem::Dependency
|
42
|
-
name:
|
28
|
+
name: plexus-rmmseg
|
43
29
|
requirement: !ruby/object:Gem::Requirement
|
44
30
|
requirements:
|
45
|
-
- -
|
31
|
+
- - ~>
|
46
32
|
- !ruby/object:Gem::Version
|
47
|
-
version:
|
48
|
-
type: :
|
33
|
+
version: 0.1.6
|
34
|
+
type: :runtime
|
49
35
|
prerelease: false
|
50
36
|
version_requirements: !ruby/object:Gem::Requirement
|
51
37
|
requirements:
|
52
|
-
- -
|
38
|
+
- - ~>
|
53
39
|
- !ruby/object:Gem::Version
|
54
|
-
version:
|
40
|
+
version: 0.1.6
|
55
41
|
- !ruby/object:Gem::Dependency
|
56
|
-
name:
|
42
|
+
name: ting
|
57
43
|
requirement: !ruby/object:Gem::Requirement
|
58
44
|
requirements:
|
59
|
-
- -
|
45
|
+
- - ~>
|
60
46
|
- !ruby/object:Gem::Version
|
61
|
-
version:
|
62
|
-
type: :
|
47
|
+
version: 0.9.0
|
48
|
+
type: :runtime
|
63
49
|
prerelease: false
|
64
50
|
version_requirements: !ruby/object:Gem::Requirement
|
65
51
|
requirements:
|
66
|
-
- -
|
52
|
+
- - ~>
|
67
53
|
- !ruby/object:Gem::Version
|
68
|
-
version:
|
54
|
+
version: 0.9.0
|
69
55
|
- !ruby/object:Gem::Dependency
|
70
|
-
name:
|
56
|
+
name: ice_nine
|
71
57
|
requirement: !ruby/object:Gem::Requirement
|
72
58
|
requirements:
|
73
|
-
- -
|
59
|
+
- - ~>
|
74
60
|
- !ruby/object:Gem::Version
|
75
|
-
version:
|
76
|
-
type: :
|
61
|
+
version: 0.11.0
|
62
|
+
type: :runtime
|
77
63
|
prerelease: false
|
78
64
|
version_requirements: !ruby/object:Gem::Requirement
|
79
65
|
requirements:
|
80
|
-
- -
|
66
|
+
- - ~>
|
81
67
|
- !ruby/object:Gem::Version
|
82
|
-
version:
|
68
|
+
version: 0.11.0
|
83
69
|
- !ruby/object:Gem::Dependency
|
84
|
-
name:
|
70
|
+
name: rubyzip
|
85
71
|
requirement: !ruby/object:Gem::Requirement
|
86
72
|
requirements:
|
87
73
|
- - ~>
|
88
74
|
- !ruby/object:Gem::Version
|
89
|
-
version:
|
75
|
+
version: '1.1'
|
90
76
|
type: :runtime
|
91
77
|
prerelease: false
|
92
78
|
version_requirements: !ruby/object:Gem::Requirement
|
93
79
|
requirements:
|
94
80
|
- - ~>
|
95
81
|
- !ruby/object:Gem::Version
|
96
|
-
version:
|
82
|
+
version: '1.1'
|
97
83
|
- !ruby/object:Gem::Dependency
|
98
|
-
name:
|
84
|
+
name: rspec
|
99
85
|
requirement: !ruby/object:Gem::Requirement
|
100
86
|
requirements:
|
101
|
-
- -
|
87
|
+
- - '>='
|
102
88
|
- !ruby/object:Gem::Version
|
103
|
-
version: 0
|
104
|
-
type: :
|
89
|
+
version: '0'
|
90
|
+
type: :development
|
105
91
|
prerelease: false
|
106
92
|
version_requirements: !ruby/object:Gem::Requirement
|
107
93
|
requirements:
|
108
|
-
- -
|
94
|
+
- - '>='
|
109
95
|
- !ruby/object:Gem::Version
|
110
|
-
version: 0
|
96
|
+
version: '0'
|
111
97
|
- !ruby/object:Gem::Dependency
|
112
|
-
name:
|
98
|
+
name: rake
|
113
99
|
requirement: !ruby/object:Gem::Requirement
|
114
100
|
requirements:
|
115
|
-
- -
|
101
|
+
- - '>='
|
116
102
|
- !ruby/object:Gem::Version
|
117
|
-
version: 0
|
118
|
-
type: :
|
103
|
+
version: '0'
|
104
|
+
type: :development
|
119
105
|
prerelease: false
|
120
106
|
version_requirements: !ruby/object:Gem::Requirement
|
121
107
|
requirements:
|
122
|
-
- -
|
108
|
+
- - '>='
|
123
109
|
- !ruby/object:Gem::Version
|
124
|
-
version: 0
|
110
|
+
version: '0'
|
125
111
|
- !ruby/object:Gem::Dependency
|
126
|
-
name:
|
112
|
+
name: pry
|
127
113
|
requirement: !ruby/object:Gem::Requirement
|
128
114
|
requirements:
|
129
|
-
- -
|
115
|
+
- - '>='
|
130
116
|
- !ruby/object:Gem::Version
|
131
|
-
version: 0
|
132
|
-
type: :
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
133
119
|
prerelease: false
|
134
120
|
version_requirements: !ruby/object:Gem::Requirement
|
135
121
|
requirements:
|
136
|
-
- -
|
122
|
+
- - '>='
|
137
123
|
- !ruby/object:Gem::Version
|
138
|
-
version: 0
|
124
|
+
version: '0'
|
139
125
|
description: Toolkit for Mandarin language learning apps
|
140
126
|
email:
|
141
127
|
- arne.brasseur@gmail.com
|
@@ -176,6 +162,7 @@ files:
|
|
176
162
|
- lib/analects/rake_tasks.rb
|
177
163
|
- lib/analects/source.rb
|
178
164
|
- lib/analects/tokenizer.rb
|
165
|
+
- lib/analects/unihan_loader.rb
|
179
166
|
- lib/analects/version.rb
|
180
167
|
- lib/cjk_string.rb
|
181
168
|
- lib/generators/analects.rb
|