analects 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c11afafa3e6efdbad98b84a73d3a190e2dc7fb52
4
- data.tar.gz: 74b50dbb99b0fbb20182e682d1a5b044930e089a
3
+ metadata.gz: 9765c8c83a2eaee1072a513d296bb558b28dfe7e
4
+ data.tar.gz: 388599cfc445653f210dda46a5c1b26253ca37d6
5
5
  SHA512:
6
- metadata.gz: bf0ef56017cdf8a29b8931ca7b6e838c0c0255b7e9cfa9f878e23a531bbb43c176f4ffc1840308c0a82427e2c00b764e625226d20b912b7eb4731a8ba8d4f834
7
- data.tar.gz: ad3972c46c16426698709ecf2a1cf86246b7221333398ec5d8a2d9b51e9b3dd0532e66754c173bb3e04c05cb288bf475e20229dc844b5a8e0991d00101cceccf
6
+ metadata.gz: 4f11f282ce2684189ba63300a030481149e06a3e86835836443b171cffcfca9cbd37af360f54f24d0612fa7a0e5c3f20ca9f55d39fab2fe7cd8f5925f8bba2ec
7
+ data.tar.gz: 2049a14e1c204438ba004a92f69127d221c452d9b04bdd243dbfebb99f070a918df2e17e1e5dba151e44128120ab07f47639363cc1fbf1652cbe215a215276fc
data/Gemfile CHANGED
@@ -5,4 +5,6 @@ group :development, :test do
5
5
  eval_gemfile 'Gemfile.devtools'
6
6
  end
7
7
 
8
+ gem 'rubyzip' , path: '/home/arne/github/rubyzip'
9
+
8
10
  gemspec
data/Gemfile.lock CHANGED
@@ -7,12 +7,18 @@ GIT
7
7
  PATH
8
8
  remote: .
9
9
  specs:
10
- analects (0.3.1)
10
+ analects (0.4.0)
11
11
  ice_nine (~> 0.11.0)
12
12
  inflecto (~> 0.0.2)
13
13
  plexus-rmmseg (~> 0.1.6)
14
+ rubyzip (~> 1.1)
14
15
  ting (~> 0.9.0)
15
16
 
17
+ PATH
18
+ remote: /home/arne/github/rubyzip
19
+ specs:
20
+ rubyzip (1.1.3)
21
+
16
22
  GEM
17
23
  remote: https://rubygems.org/
18
24
  specs:
@@ -149,7 +155,6 @@ GEM
149
155
  sexp_processor (~> 4.0)
150
156
  ruby_parser (3.4.1)
151
157
  sexp_processor (~> 4.1)
152
- rubygems-tasks (0.2.4)
153
158
  rubysl-logger (2.0.0)
154
159
  rubysl-open-uri (2.0.0)
155
160
  rubysl-prettyprint (2.0.3)
@@ -212,11 +217,11 @@ DEPENDENCIES
212
217
  rspec (~> 2.14.1)
213
218
  rspec-core (~> 2.14.8)
214
219
  rubocop (~> 0.18.1)
215
- rubygems-tasks
216
220
  rubysl-logger (~> 2.0.0)
217
221
  rubysl-open-uri (~> 2.0.0)
218
222
  rubysl-prettyprint (~> 2.0.2)
219
223
  rubysl-singleton (~> 2.0.0)
224
+ rubyzip!
220
225
  simplecov (~> 0.8.2)
221
226
  terminal-notifier-guard (~> 1.5.3)
222
227
  yard (~> 0.8.7)
data/README.md CHANGED
@@ -36,6 +36,8 @@ end
36
36
  rake analects:download:all # download all sources
37
37
  rake analects:download:cedict # download CC-CEDICT
38
38
  rake analects:download:chise_ids # download Chise-IDS
39
+ rake analects:download:hsk # download HSK data
40
+ rake analects:download:unihan # download Unihan database
39
41
  ```
40
42
 
41
43
  Or from Ruby
data/Rakefile CHANGED
@@ -1,6 +1,5 @@
1
1
  require 'rspec/core/rake_task'
2
2
  require 'devtools'
3
- require 'rubygems/tasks'
4
3
  require 'rubygems/package_task'
5
4
 
6
5
  $LOAD_PATH.unshift File.expand_path('../lib', __FILE__)
@@ -12,8 +11,6 @@ Analects.init_rake_tasks
12
11
  RSpec::Core::RakeTask.new(:spec)
13
12
  task :default => :spec
14
13
 
15
- Gem::Tasks.new
16
-
17
14
  spec = Gem::Specification.load(File.expand_path('../analects.gemspec', __FILE__))
18
15
  gem = Gem::PackageTask.new(spec)
19
16
  gem.define
data/analects.gemspec CHANGED
@@ -16,14 +16,13 @@ Gem::Specification.new do |gem|
16
16
  gem.test_files = `git ls-files -- spec`.split($/)
17
17
  gem.extra_rdoc_files = %w[README.md]
18
18
 
19
- gem.add_development_dependency 'rspec'
20
- gem.add_development_dependency 'simplecov'
21
- gem.add_development_dependency 'rake'
22
- gem.add_development_dependency 'rubygems-tasks'
23
- gem.add_development_dependency 'pry'
24
-
25
19
  gem.add_runtime_dependency 'inflecto' , '~> 0.0.2'
26
20
  gem.add_runtime_dependency 'plexus-rmmseg' , '~> 0.1.6'
27
21
  gem.add_runtime_dependency 'ting' , '~> 0.9.0'
28
22
  gem.add_runtime_dependency 'ice_nine' , '~> 0.11.0'
23
+ gem.add_runtime_dependency 'rubyzip' , '~> 1.1'
24
+
25
+ gem.add_development_dependency 'rspec'
26
+ gem.add_development_dependency 'rake'
27
+ gem.add_development_dependency 'pry'
29
28
  end
data/lib/analects.rb CHANGED
@@ -30,6 +30,7 @@ require 'analects/cli/progress'
30
30
  require 'analects/cedict_loader'
31
31
  require 'analects/hsk_loader'
32
32
  require 'analects/chise_ids_loader'
33
+ require 'analects/unihan_loader'
33
34
  require 'analects/source'
34
35
  require 'analects/library'
35
36
  require 'analects/tokenizer'
@@ -1,8 +1,9 @@
1
1
  module Analects
2
- CEDICT_URL = 'http://www.mdbg.net/chindict/export/cedict/cedict_1_0_ts_utf-8_mdbg.txt.gz'
3
- CHISE_IDS_URL = 'http://git.chise.org/git/chise/ids.git'
4
- UNIHAN_URL = ''
5
- HSK_URL = ''
2
+ CEDICT_URL = 'http://www.mdbg.net/chindict/export/cedict/cedict_1_0_ts_utf-8_mdbg.txt.gz'
3
+ CHISE_IDS_URL = 'http://git.chise.org/git/chise/ids.git'
4
+ UNIHAN_URL = 'http://www.unicode.org/Public/UCD/latest/ucd/Unihan.zip'
5
+ HSK_URL = 'https://raw.githubusercontent.com/plexus/analects-data/master/hsk/hsk.csv'
6
+ TW_CURRICULUM_URL = 'https://raw.githubusercontent.com/plexus/analects-data/master/taiwan_school_curriculum.txt'
6
7
 
7
8
  class Library
8
9
  attr_reader :options
@@ -23,48 +24,59 @@ module Analects
23
24
  end
24
25
  end
25
26
 
27
+ def sources
28
+ [
29
+ cedict,
30
+ chise_ids,
31
+ unihan,
32
+ hsk
33
+ ]
34
+ end
35
+
26
36
  def cedict
27
- @cedict ||= Source.new(
28
- {
29
- data_file: 'cedict_1_0_ts_utf-8_mdbg.txt',
30
- retrieval: [ :http, :gunzip, :save ]
31
- }.merge(options_for :cedict)
37
+ @cedict ||= create_source(
38
+ :cedict,
39
+ data_file: 'cedict_1_0_ts_utf-8_mdbg.txt',
40
+ retrieval: [ :http, :gunzip, :save ]
32
41
  )
33
42
  end
34
43
 
35
44
  def chise_ids
36
- @chise_ids ||= Source.new(
37
- {
38
- retrieval: :git
39
- }.merge(options_for :chise_ids)
45
+ @chise_ids ||= create_source(
46
+ :chise_ids,
47
+ retrieval: :git
40
48
  )
41
49
  end
42
50
 
43
51
  def unihan
44
- @unihan ||= Source.new(
45
- {
46
- data_file: ''
47
- }.merge(options_for :chise_ids)
52
+ @unihan ||= create_source(
53
+ :unihan,
54
+ retrieval: [ :http, :unzip ]
48
55
  )
49
56
  end
50
57
 
51
58
  def hsk
52
- @hsk ||= Source.new( {
53
- data_file: 'hsk.csv'
54
- }.merge(options_for :hsk)
59
+ @hsk ||= create_source(
60
+ :hsk,
61
+ data_file: 'hsk.csv',
62
+ retrieval: [ :http, :save ]
55
63
  )
56
64
  end
57
65
 
58
66
  private
59
67
 
60
- def options_for(name)
61
- {
62
- name: name,
63
- library: self,
64
- url: Analects.const_get("#{name.to_s.upcase}_URL"),
65
- loader: Analects.const_get("#{Inflecto.camelize name}Loader"),
66
- data_dir: data_dir
67
- }.merge(options.fetch(name, {}))
68
+ def create_source(name, source_options)
69
+ Source.new(
70
+ source_options.merge(
71
+ {
72
+ name: name,
73
+ library: self,
74
+ url: Analects.const_get("#{name.to_s.upcase}_URL"),
75
+ loader: Analects.const_get("#{Inflecto.camelize name}Loader"),
76
+ data_dir: data_dir
77
+ }
78
+ ).merge(options.fetch(name, {}))
79
+ )
68
80
  end
69
81
 
70
82
  end
@@ -18,6 +18,10 @@ module Analects
18
18
  @library ||= Analects::Library.new(options)
19
19
  end
20
20
 
21
+ def sources
22
+ library.sources
23
+ end
24
+
21
25
  def options
22
26
  @options ||= {}
23
27
  end
@@ -29,18 +33,15 @@ module Analects
29
33
  def define
30
34
  namespace @name do
31
35
  namespace :download do
32
- desc 'download CC-CEDICT'
33
- task :cedict do
34
- library.cedict.retrieve!
35
- end
36
-
37
- desc 'download Chise-IDS'
38
- task :chise_ids do
39
- library.chise_ids.retrieve!
36
+ sources.each do |source|
37
+ desc "download #{source.name}"
38
+ task source.name do
39
+ source.retrieve!
40
+ end
40
41
  end
41
42
 
42
43
  desc 'download all sources'
43
- task :all => [:cedict, :chise_ids]
44
+ task :all => sources.map(&:name)
44
45
  end
45
46
  end
46
47
 
@@ -17,15 +17,17 @@ module Analects
17
17
  end
18
18
 
19
19
  def data_dir
20
- options[:data_dir]
20
+ Pathname(options[:data_dir])
21
21
  end
22
22
 
23
23
  def location
24
- options[:data_file] ? File.join( data_dir, options[:data_file] ) : File.join( data_dir, options[:name].to_s )
24
+ options[:data_file] ?
25
+ data_dir.join(options[:data_file]) :
26
+ data_dir.join(options[:name].to_s)
25
27
  end
26
28
 
27
29
  def data_file_present?
28
- File.exist? location
30
+ location.exist?
29
31
  end
30
32
 
31
33
  def retrieve
@@ -33,32 +35,45 @@ module Analects
33
35
  end
34
36
 
35
37
  def retrieve!
36
- retrieval.inject( url ) do | result, method |
38
+ retrieval.inject(url) do | result, method |
37
39
  self.send( "retrieve_#{method}", result )
38
40
  end
39
41
  end
40
42
 
41
43
  # url -> stream
42
- def retrieve_http( url )
44
+ def retrieve_http(url)
43
45
  require 'open-uri'
44
- open( url )
46
+ StringIO.new(open(url).read)
45
47
  end
46
48
 
47
49
  # gzipped stream -> uncompressed stream
48
- def retrieve_gunzip( stream )
50
+ def retrieve_gunzip(stream)
49
51
  require 'zlib'
50
- Zlib::GzipReader.new( stream )
52
+ Zlib::GzipReader.new(stream)
53
+ end
54
+
55
+ def retrieve_unzip(stream)
56
+ require 'zip'
57
+ location.mkdir unless location.exist?
58
+ Zip::InputStream.open(stream) do |io|
59
+ while (entry = io.get_next_entry)
60
+ next if entry.ftype == :symlink
61
+ loc = location.join(entry.name)
62
+ loc.delete if loc.exist?
63
+ entry.extract(loc)
64
+ end
65
+ end
51
66
  end
52
67
 
53
68
  # stream|string -> create data file
54
- def retrieve_save( data )
69
+ def retrieve_save(data)
55
70
  File.open( location, 'w' ) do |f|
56
71
  f << ( data.respond_to?(:read) ? data.read : data )
57
72
  end
58
73
  end
59
74
 
60
75
  # url -> clones repo
61
- def retrieve_git( url )
76
+ def retrieve_git(url)
62
77
  `git clone #{url} #{data_dir}/#{name}` # Admittedly crude
63
78
  end
64
79
 
@@ -0,0 +1,7 @@
1
+ # encoding: UTF-8
2
+
3
+ module Analects
4
+ class UnihanLoader
5
+ include Enumerable
6
+ end
7
+ end
@@ -1,3 +1,3 @@
1
1
  module Analects
2
- VERSION = '0.3.1'
2
+ VERSION = '0.4.0'
3
3
  end
@@ -8,7 +8,7 @@ describe Analects::Library do
8
8
  subject(:library) {
9
9
  described_class.new(options)
10
10
  }
11
- let(:data_dir) { File.join(Dir.tmpdir, 'analects-' + SecureRandom.hex(16)) }
11
+ let(:data_dir) { Pathname(Dir.tmpdir).join('analects-' + SecureRandom.hex(16)) }
12
12
  let(:options) {
13
13
  { data_dir: data_dir }
14
14
  }
metadata CHANGED
@@ -1,141 +1,127 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: analects
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arne Brasseur
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-04-10 00:00:00.000000000 Z
11
+ date: 2014-04-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: rspec
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - '>='
18
- - !ruby/object:Gem::Version
19
- version: '0'
20
- type: :development
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - '>='
25
- - !ruby/object:Gem::Version
26
- version: '0'
27
- - !ruby/object:Gem::Dependency
28
- name: simplecov
14
+ name: inflecto
29
15
  requirement: !ruby/object:Gem::Requirement
30
16
  requirements:
31
- - - '>='
17
+ - - ~>
32
18
  - !ruby/object:Gem::Version
33
- version: '0'
34
- type: :development
19
+ version: 0.0.2
20
+ type: :runtime
35
21
  prerelease: false
36
22
  version_requirements: !ruby/object:Gem::Requirement
37
23
  requirements:
38
- - - '>='
24
+ - - ~>
39
25
  - !ruby/object:Gem::Version
40
- version: '0'
26
+ version: 0.0.2
41
27
  - !ruby/object:Gem::Dependency
42
- name: rake
28
+ name: plexus-rmmseg
43
29
  requirement: !ruby/object:Gem::Requirement
44
30
  requirements:
45
- - - '>='
31
+ - - ~>
46
32
  - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :development
33
+ version: 0.1.6
34
+ type: :runtime
49
35
  prerelease: false
50
36
  version_requirements: !ruby/object:Gem::Requirement
51
37
  requirements:
52
- - - '>='
38
+ - - ~>
53
39
  - !ruby/object:Gem::Version
54
- version: '0'
40
+ version: 0.1.6
55
41
  - !ruby/object:Gem::Dependency
56
- name: rubygems-tasks
42
+ name: ting
57
43
  requirement: !ruby/object:Gem::Requirement
58
44
  requirements:
59
- - - '>='
45
+ - - ~>
60
46
  - !ruby/object:Gem::Version
61
- version: '0'
62
- type: :development
47
+ version: 0.9.0
48
+ type: :runtime
63
49
  prerelease: false
64
50
  version_requirements: !ruby/object:Gem::Requirement
65
51
  requirements:
66
- - - '>='
52
+ - - ~>
67
53
  - !ruby/object:Gem::Version
68
- version: '0'
54
+ version: 0.9.0
69
55
  - !ruby/object:Gem::Dependency
70
- name: pry
56
+ name: ice_nine
71
57
  requirement: !ruby/object:Gem::Requirement
72
58
  requirements:
73
- - - '>='
59
+ - - ~>
74
60
  - !ruby/object:Gem::Version
75
- version: '0'
76
- type: :development
61
+ version: 0.11.0
62
+ type: :runtime
77
63
  prerelease: false
78
64
  version_requirements: !ruby/object:Gem::Requirement
79
65
  requirements:
80
- - - '>='
66
+ - - ~>
81
67
  - !ruby/object:Gem::Version
82
- version: '0'
68
+ version: 0.11.0
83
69
  - !ruby/object:Gem::Dependency
84
- name: inflecto
70
+ name: rubyzip
85
71
  requirement: !ruby/object:Gem::Requirement
86
72
  requirements:
87
73
  - - ~>
88
74
  - !ruby/object:Gem::Version
89
- version: 0.0.2
75
+ version: '1.1'
90
76
  type: :runtime
91
77
  prerelease: false
92
78
  version_requirements: !ruby/object:Gem::Requirement
93
79
  requirements:
94
80
  - - ~>
95
81
  - !ruby/object:Gem::Version
96
- version: 0.0.2
82
+ version: '1.1'
97
83
  - !ruby/object:Gem::Dependency
98
- name: plexus-rmmseg
84
+ name: rspec
99
85
  requirement: !ruby/object:Gem::Requirement
100
86
  requirements:
101
- - - ~>
87
+ - - '>='
102
88
  - !ruby/object:Gem::Version
103
- version: 0.1.6
104
- type: :runtime
89
+ version: '0'
90
+ type: :development
105
91
  prerelease: false
106
92
  version_requirements: !ruby/object:Gem::Requirement
107
93
  requirements:
108
- - - ~>
94
+ - - '>='
109
95
  - !ruby/object:Gem::Version
110
- version: 0.1.6
96
+ version: '0'
111
97
  - !ruby/object:Gem::Dependency
112
- name: ting
98
+ name: rake
113
99
  requirement: !ruby/object:Gem::Requirement
114
100
  requirements:
115
- - - ~>
101
+ - - '>='
116
102
  - !ruby/object:Gem::Version
117
- version: 0.9.0
118
- type: :runtime
103
+ version: '0'
104
+ type: :development
119
105
  prerelease: false
120
106
  version_requirements: !ruby/object:Gem::Requirement
121
107
  requirements:
122
- - - ~>
108
+ - - '>='
123
109
  - !ruby/object:Gem::Version
124
- version: 0.9.0
110
+ version: '0'
125
111
  - !ruby/object:Gem::Dependency
126
- name: ice_nine
112
+ name: pry
127
113
  requirement: !ruby/object:Gem::Requirement
128
114
  requirements:
129
- - - ~>
115
+ - - '>='
130
116
  - !ruby/object:Gem::Version
131
- version: 0.11.0
132
- type: :runtime
117
+ version: '0'
118
+ type: :development
133
119
  prerelease: false
134
120
  version_requirements: !ruby/object:Gem::Requirement
135
121
  requirements:
136
- - - ~>
122
+ - - '>='
137
123
  - !ruby/object:Gem::Version
138
- version: 0.11.0
124
+ version: '0'
139
125
  description: Toolkit for Mandarin language learning apps
140
126
  email:
141
127
  - arne.brasseur@gmail.com
@@ -176,6 +162,7 @@ files:
176
162
  - lib/analects/rake_tasks.rb
177
163
  - lib/analects/source.rb
178
164
  - lib/analects/tokenizer.rb
165
+ - lib/analects/unihan_loader.rb
179
166
  - lib/analects/version.rb
180
167
  - lib/cjk_string.rb
181
168
  - lib/generators/analects.rb