analects 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c11afafa3e6efdbad98b84a73d3a190e2dc7fb52
4
- data.tar.gz: 74b50dbb99b0fbb20182e682d1a5b044930e089a
3
+ metadata.gz: 9765c8c83a2eaee1072a513d296bb558b28dfe7e
4
+ data.tar.gz: 388599cfc445653f210dda46a5c1b26253ca37d6
5
5
  SHA512:
6
- metadata.gz: bf0ef56017cdf8a29b8931ca7b6e838c0c0255b7e9cfa9f878e23a531bbb43c176f4ffc1840308c0a82427e2c00b764e625226d20b912b7eb4731a8ba8d4f834
7
- data.tar.gz: ad3972c46c16426698709ecf2a1cf86246b7221333398ec5d8a2d9b51e9b3dd0532e66754c173bb3e04c05cb288bf475e20229dc844b5a8e0991d00101cceccf
6
+ metadata.gz: 4f11f282ce2684189ba63300a030481149e06a3e86835836443b171cffcfca9cbd37af360f54f24d0612fa7a0e5c3f20ca9f55d39fab2fe7cd8f5925f8bba2ec
7
+ data.tar.gz: 2049a14e1c204438ba004a92f69127d221c452d9b04bdd243dbfebb99f070a918df2e17e1e5dba151e44128120ab07f47639363cc1fbf1652cbe215a215276fc
data/Gemfile CHANGED
@@ -5,4 +5,6 @@ group :development, :test do
5
5
  eval_gemfile 'Gemfile.devtools'
6
6
  end
7
7
 
8
+ gem 'rubyzip' , path: '/home/arne/github/rubyzip'
9
+
8
10
  gemspec
data/Gemfile.lock CHANGED
@@ -7,12 +7,18 @@ GIT
7
7
  PATH
8
8
  remote: .
9
9
  specs:
10
- analects (0.3.1)
10
+ analects (0.4.0)
11
11
  ice_nine (~> 0.11.0)
12
12
  inflecto (~> 0.0.2)
13
13
  plexus-rmmseg (~> 0.1.6)
14
+ rubyzip (~> 1.1)
14
15
  ting (~> 0.9.0)
15
16
 
17
+ PATH
18
+ remote: /home/arne/github/rubyzip
19
+ specs:
20
+ rubyzip (1.1.3)
21
+
16
22
  GEM
17
23
  remote: https://rubygems.org/
18
24
  specs:
@@ -149,7 +155,6 @@ GEM
149
155
  sexp_processor (~> 4.0)
150
156
  ruby_parser (3.4.1)
151
157
  sexp_processor (~> 4.1)
152
- rubygems-tasks (0.2.4)
153
158
  rubysl-logger (2.0.0)
154
159
  rubysl-open-uri (2.0.0)
155
160
  rubysl-prettyprint (2.0.3)
@@ -212,11 +217,11 @@ DEPENDENCIES
212
217
  rspec (~> 2.14.1)
213
218
  rspec-core (~> 2.14.8)
214
219
  rubocop (~> 0.18.1)
215
- rubygems-tasks
216
220
  rubysl-logger (~> 2.0.0)
217
221
  rubysl-open-uri (~> 2.0.0)
218
222
  rubysl-prettyprint (~> 2.0.2)
219
223
  rubysl-singleton (~> 2.0.0)
224
+ rubyzip!
220
225
  simplecov (~> 0.8.2)
221
226
  terminal-notifier-guard (~> 1.5.3)
222
227
  yard (~> 0.8.7)
data/README.md CHANGED
@@ -36,6 +36,8 @@ end
36
36
  rake analects:download:all # download all sources
37
37
  rake analects:download:cedict # download CC-CEDICT
38
38
  rake analects:download:chise_ids # download Chise-IDS
39
+ rake analects:download:hsk # download HSK data
40
+ rake analects:download:unihan # download Unihan database
39
41
  ```
40
42
 
41
43
  Or from Ruby
data/Rakefile CHANGED
@@ -1,6 +1,5 @@
1
1
  require 'rspec/core/rake_task'
2
2
  require 'devtools'
3
- require 'rubygems/tasks'
4
3
  require 'rubygems/package_task'
5
4
 
6
5
  $LOAD_PATH.unshift File.expand_path('../lib', __FILE__)
@@ -12,8 +11,6 @@ Analects.init_rake_tasks
12
11
  RSpec::Core::RakeTask.new(:spec)
13
12
  task :default => :spec
14
13
 
15
- Gem::Tasks.new
16
-
17
14
  spec = Gem::Specification.load(File.expand_path('../analects.gemspec', __FILE__))
18
15
  gem = Gem::PackageTask.new(spec)
19
16
  gem.define
data/analects.gemspec CHANGED
@@ -16,14 +16,13 @@ Gem::Specification.new do |gem|
16
16
  gem.test_files = `git ls-files -- spec`.split($/)
17
17
  gem.extra_rdoc_files = %w[README.md]
18
18
 
19
- gem.add_development_dependency 'rspec'
20
- gem.add_development_dependency 'simplecov'
21
- gem.add_development_dependency 'rake'
22
- gem.add_development_dependency 'rubygems-tasks'
23
- gem.add_development_dependency 'pry'
24
-
25
19
  gem.add_runtime_dependency 'inflecto' , '~> 0.0.2'
26
20
  gem.add_runtime_dependency 'plexus-rmmseg' , '~> 0.1.6'
27
21
  gem.add_runtime_dependency 'ting' , '~> 0.9.0'
28
22
  gem.add_runtime_dependency 'ice_nine' , '~> 0.11.0'
23
+ gem.add_runtime_dependency 'rubyzip' , '~> 1.1'
24
+
25
+ gem.add_development_dependency 'rspec'
26
+ gem.add_development_dependency 'rake'
27
+ gem.add_development_dependency 'pry'
29
28
  end
data/lib/analects.rb CHANGED
@@ -30,6 +30,7 @@ require 'analects/cli/progress'
30
30
  require 'analects/cedict_loader'
31
31
  require 'analects/hsk_loader'
32
32
  require 'analects/chise_ids_loader'
33
+ require 'analects/unihan_loader'
33
34
  require 'analects/source'
34
35
  require 'analects/library'
35
36
  require 'analects/tokenizer'
@@ -1,8 +1,9 @@
1
1
  module Analects
2
- CEDICT_URL = 'http://www.mdbg.net/chindict/export/cedict/cedict_1_0_ts_utf-8_mdbg.txt.gz'
3
- CHISE_IDS_URL = 'http://git.chise.org/git/chise/ids.git'
4
- UNIHAN_URL = ''
5
- HSK_URL = ''
2
+ CEDICT_URL = 'http://www.mdbg.net/chindict/export/cedict/cedict_1_0_ts_utf-8_mdbg.txt.gz'
3
+ CHISE_IDS_URL = 'http://git.chise.org/git/chise/ids.git'
4
+ UNIHAN_URL = 'http://www.unicode.org/Public/UCD/latest/ucd/Unihan.zip'
5
+ HSK_URL = 'https://raw.githubusercontent.com/plexus/analects-data/master/hsk/hsk.csv'
6
+ TW_CURRICULUM_URL = 'https://raw.githubusercontent.com/plexus/analects-data/master/taiwan_school_curriculum.txt'
6
7
 
7
8
  class Library
8
9
  attr_reader :options
@@ -23,48 +24,59 @@ module Analects
23
24
  end
24
25
  end
25
26
 
27
+ def sources
28
+ [
29
+ cedict,
30
+ chise_ids,
31
+ unihan,
32
+ hsk
33
+ ]
34
+ end
35
+
26
36
  def cedict
27
- @cedict ||= Source.new(
28
- {
29
- data_file: 'cedict_1_0_ts_utf-8_mdbg.txt',
30
- retrieval: [ :http, :gunzip, :save ]
31
- }.merge(options_for :cedict)
37
+ @cedict ||= create_source(
38
+ :cedict,
39
+ data_file: 'cedict_1_0_ts_utf-8_mdbg.txt',
40
+ retrieval: [ :http, :gunzip, :save ]
32
41
  )
33
42
  end
34
43
 
35
44
  def chise_ids
36
- @chise_ids ||= Source.new(
37
- {
38
- retrieval: :git
39
- }.merge(options_for :chise_ids)
45
+ @chise_ids ||= create_source(
46
+ :chise_ids,
47
+ retrieval: :git
40
48
  )
41
49
  end
42
50
 
43
51
  def unihan
44
- @unihan ||= Source.new(
45
- {
46
- data_file: ''
47
- }.merge(options_for :chise_ids)
52
+ @unihan ||= create_source(
53
+ :unihan,
54
+ retrieval: [ :http, :unzip ]
48
55
  )
49
56
  end
50
57
 
51
58
  def hsk
52
- @hsk ||= Source.new( {
53
- data_file: 'hsk.csv'
54
- }.merge(options_for :hsk)
59
+ @hsk ||= create_source(
60
+ :hsk,
61
+ data_file: 'hsk.csv',
62
+ retrieval: [ :http, :save ]
55
63
  )
56
64
  end
57
65
 
58
66
  private
59
67
 
60
- def options_for(name)
61
- {
62
- name: name,
63
- library: self,
64
- url: Analects.const_get("#{name.to_s.upcase}_URL"),
65
- loader: Analects.const_get("#{Inflecto.camelize name}Loader"),
66
- data_dir: data_dir
67
- }.merge(options.fetch(name, {}))
68
+ def create_source(name, source_options)
69
+ Source.new(
70
+ source_options.merge(
71
+ {
72
+ name: name,
73
+ library: self,
74
+ url: Analects.const_get("#{name.to_s.upcase}_URL"),
75
+ loader: Analects.const_get("#{Inflecto.camelize name}Loader"),
76
+ data_dir: data_dir
77
+ }
78
+ ).merge(options.fetch(name, {}))
79
+ )
68
80
  end
69
81
 
70
82
  end
@@ -18,6 +18,10 @@ module Analects
18
18
  @library ||= Analects::Library.new(options)
19
19
  end
20
20
 
21
+ def sources
22
+ library.sources
23
+ end
24
+
21
25
  def options
22
26
  @options ||= {}
23
27
  end
@@ -29,18 +33,15 @@ module Analects
29
33
  def define
30
34
  namespace @name do
31
35
  namespace :download do
32
- desc 'download CC-CEDICT'
33
- task :cedict do
34
- library.cedict.retrieve!
35
- end
36
-
37
- desc 'download Chise-IDS'
38
- task :chise_ids do
39
- library.chise_ids.retrieve!
36
+ sources.each do |source|
37
+ desc "download #{source.name}"
38
+ task source.name do
39
+ source.retrieve!
40
+ end
40
41
  end
41
42
 
42
43
  desc 'download all sources'
43
- task :all => [:cedict, :chise_ids]
44
+ task :all => sources.map(&:name)
44
45
  end
45
46
  end
46
47
 
@@ -17,15 +17,17 @@ module Analects
17
17
  end
18
18
 
19
19
  def data_dir
20
- options[:data_dir]
20
+ Pathname(options[:data_dir])
21
21
  end
22
22
 
23
23
  def location
24
- options[:data_file] ? File.join( data_dir, options[:data_file] ) : File.join( data_dir, options[:name].to_s )
24
+ options[:data_file] ?
25
+ data_dir.join(options[:data_file]) :
26
+ data_dir.join(options[:name].to_s)
25
27
  end
26
28
 
27
29
  def data_file_present?
28
- File.exist? location
30
+ location.exist?
29
31
  end
30
32
 
31
33
  def retrieve
@@ -33,32 +35,45 @@ module Analects
33
35
  end
34
36
 
35
37
  def retrieve!
36
- retrieval.inject( url ) do | result, method |
38
+ retrieval.inject(url) do | result, method |
37
39
  self.send( "retrieve_#{method}", result )
38
40
  end
39
41
  end
40
42
 
41
43
  # url -> stream
42
- def retrieve_http( url )
44
+ def retrieve_http(url)
43
45
  require 'open-uri'
44
- open( url )
46
+ StringIO.new(open(url).read)
45
47
  end
46
48
 
47
49
  # gzipped stream -> uncompressed stream
48
- def retrieve_gunzip( stream )
50
+ def retrieve_gunzip(stream)
49
51
  require 'zlib'
50
- Zlib::GzipReader.new( stream )
52
+ Zlib::GzipReader.new(stream)
53
+ end
54
+
55
+ def retrieve_unzip(stream)
56
+ require 'zip'
57
+ location.mkdir unless location.exist?
58
+ Zip::InputStream.open(stream) do |io|
59
+ while (entry = io.get_next_entry)
60
+ next if entry.ftype == :symlink
61
+ loc = location.join(entry.name)
62
+ loc.delete if loc.exist?
63
+ entry.extract(loc)
64
+ end
65
+ end
51
66
  end
52
67
 
53
68
  # stream|string -> create data file
54
- def retrieve_save( data )
69
+ def retrieve_save(data)
55
70
  File.open( location, 'w' ) do |f|
56
71
  f << ( data.respond_to?(:read) ? data.read : data )
57
72
  end
58
73
  end
59
74
 
60
75
  # url -> clones repo
61
- def retrieve_git( url )
76
+ def retrieve_git(url)
62
77
  `git clone #{url} #{data_dir}/#{name}` # Admittedly crude
63
78
  end
64
79
 
@@ -0,0 +1,7 @@
1
+ # encoding: UTF-8
2
+
3
+ module Analects
4
+ class UnihanLoader
5
+ include Enumerable
6
+ end
7
+ end
@@ -1,3 +1,3 @@
1
1
  module Analects
2
- VERSION = '0.3.1'
2
+ VERSION = '0.4.0'
3
3
  end
@@ -8,7 +8,7 @@ describe Analects::Library do
8
8
  subject(:library) {
9
9
  described_class.new(options)
10
10
  }
11
- let(:data_dir) { File.join(Dir.tmpdir, 'analects-' + SecureRandom.hex(16)) }
11
+ let(:data_dir) { Pathname(Dir.tmpdir).join('analects-' + SecureRandom.hex(16)) }
12
12
  let(:options) {
13
13
  { data_dir: data_dir }
14
14
  }
metadata CHANGED
@@ -1,141 +1,127 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: analects
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Arne Brasseur
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-04-10 00:00:00.000000000 Z
11
+ date: 2014-04-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
- name: rspec
15
- requirement: !ruby/object:Gem::Requirement
16
- requirements:
17
- - - '>='
18
- - !ruby/object:Gem::Version
19
- version: '0'
20
- type: :development
21
- prerelease: false
22
- version_requirements: !ruby/object:Gem::Requirement
23
- requirements:
24
- - - '>='
25
- - !ruby/object:Gem::Version
26
- version: '0'
27
- - !ruby/object:Gem::Dependency
28
- name: simplecov
14
+ name: inflecto
29
15
  requirement: !ruby/object:Gem::Requirement
30
16
  requirements:
31
- - - '>='
17
+ - - ~>
32
18
  - !ruby/object:Gem::Version
33
- version: '0'
34
- type: :development
19
+ version: 0.0.2
20
+ type: :runtime
35
21
  prerelease: false
36
22
  version_requirements: !ruby/object:Gem::Requirement
37
23
  requirements:
38
- - - '>='
24
+ - - ~>
39
25
  - !ruby/object:Gem::Version
40
- version: '0'
26
+ version: 0.0.2
41
27
  - !ruby/object:Gem::Dependency
42
- name: rake
28
+ name: plexus-rmmseg
43
29
  requirement: !ruby/object:Gem::Requirement
44
30
  requirements:
45
- - - '>='
31
+ - - ~>
46
32
  - !ruby/object:Gem::Version
47
- version: '0'
48
- type: :development
33
+ version: 0.1.6
34
+ type: :runtime
49
35
  prerelease: false
50
36
  version_requirements: !ruby/object:Gem::Requirement
51
37
  requirements:
52
- - - '>='
38
+ - - ~>
53
39
  - !ruby/object:Gem::Version
54
- version: '0'
40
+ version: 0.1.6
55
41
  - !ruby/object:Gem::Dependency
56
- name: rubygems-tasks
42
+ name: ting
57
43
  requirement: !ruby/object:Gem::Requirement
58
44
  requirements:
59
- - - '>='
45
+ - - ~>
60
46
  - !ruby/object:Gem::Version
61
- version: '0'
62
- type: :development
47
+ version: 0.9.0
48
+ type: :runtime
63
49
  prerelease: false
64
50
  version_requirements: !ruby/object:Gem::Requirement
65
51
  requirements:
66
- - - '>='
52
+ - - ~>
67
53
  - !ruby/object:Gem::Version
68
- version: '0'
54
+ version: 0.9.0
69
55
  - !ruby/object:Gem::Dependency
70
- name: pry
56
+ name: ice_nine
71
57
  requirement: !ruby/object:Gem::Requirement
72
58
  requirements:
73
- - - '>='
59
+ - - ~>
74
60
  - !ruby/object:Gem::Version
75
- version: '0'
76
- type: :development
61
+ version: 0.11.0
62
+ type: :runtime
77
63
  prerelease: false
78
64
  version_requirements: !ruby/object:Gem::Requirement
79
65
  requirements:
80
- - - '>='
66
+ - - ~>
81
67
  - !ruby/object:Gem::Version
82
- version: '0'
68
+ version: 0.11.0
83
69
  - !ruby/object:Gem::Dependency
84
- name: inflecto
70
+ name: rubyzip
85
71
  requirement: !ruby/object:Gem::Requirement
86
72
  requirements:
87
73
  - - ~>
88
74
  - !ruby/object:Gem::Version
89
- version: 0.0.2
75
+ version: '1.1'
90
76
  type: :runtime
91
77
  prerelease: false
92
78
  version_requirements: !ruby/object:Gem::Requirement
93
79
  requirements:
94
80
  - - ~>
95
81
  - !ruby/object:Gem::Version
96
- version: 0.0.2
82
+ version: '1.1'
97
83
  - !ruby/object:Gem::Dependency
98
- name: plexus-rmmseg
84
+ name: rspec
99
85
  requirement: !ruby/object:Gem::Requirement
100
86
  requirements:
101
- - - ~>
87
+ - - '>='
102
88
  - !ruby/object:Gem::Version
103
- version: 0.1.6
104
- type: :runtime
89
+ version: '0'
90
+ type: :development
105
91
  prerelease: false
106
92
  version_requirements: !ruby/object:Gem::Requirement
107
93
  requirements:
108
- - - ~>
94
+ - - '>='
109
95
  - !ruby/object:Gem::Version
110
- version: 0.1.6
96
+ version: '0'
111
97
  - !ruby/object:Gem::Dependency
112
- name: ting
98
+ name: rake
113
99
  requirement: !ruby/object:Gem::Requirement
114
100
  requirements:
115
- - - ~>
101
+ - - '>='
116
102
  - !ruby/object:Gem::Version
117
- version: 0.9.0
118
- type: :runtime
103
+ version: '0'
104
+ type: :development
119
105
  prerelease: false
120
106
  version_requirements: !ruby/object:Gem::Requirement
121
107
  requirements:
122
- - - ~>
108
+ - - '>='
123
109
  - !ruby/object:Gem::Version
124
- version: 0.9.0
110
+ version: '0'
125
111
  - !ruby/object:Gem::Dependency
126
- name: ice_nine
112
+ name: pry
127
113
  requirement: !ruby/object:Gem::Requirement
128
114
  requirements:
129
- - - ~>
115
+ - - '>='
130
116
  - !ruby/object:Gem::Version
131
- version: 0.11.0
132
- type: :runtime
117
+ version: '0'
118
+ type: :development
133
119
  prerelease: false
134
120
  version_requirements: !ruby/object:Gem::Requirement
135
121
  requirements:
136
- - - ~>
122
+ - - '>='
137
123
  - !ruby/object:Gem::Version
138
- version: 0.11.0
124
+ version: '0'
139
125
  description: Toolkit for Mandarin language learning apps
140
126
  email:
141
127
  - arne.brasseur@gmail.com
@@ -176,6 +162,7 @@ files:
176
162
  - lib/analects/rake_tasks.rb
177
163
  - lib/analects/source.rb
178
164
  - lib/analects/tokenizer.rb
165
+ - lib/analects/unihan_loader.rb
179
166
  - lib/analects/version.rb
180
167
  - lib/cjk_string.rb
181
168
  - lib/generators/analects.rb