nhkore 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +37 -1
- data/README.md +18 -6
- data/Rakefile +11 -16
- data/bin/nhkore +1 -3
- data/lib/nhkore/app.rb +616 -0
- data/lib/nhkore/article.rb +130 -0
- data/lib/nhkore/article_scraper.rb +653 -0
- data/lib/nhkore/cleaner.rb +91 -0
- data/lib/nhkore/cli/bing_cmd.rb +220 -0
- data/lib/nhkore/cli/fx_cmd.rb +116 -0
- data/lib/nhkore/cli/get_cmd.rb +153 -0
- data/lib/nhkore/cli/news_cmd.rb +375 -0
- data/lib/nhkore/cli/sift_cmd.rb +382 -0
- data/lib/nhkore/defn.rb +104 -0
- data/lib/nhkore/dict.rb +80 -0
- data/lib/nhkore/dict_scraper.rb +76 -0
- data/lib/nhkore/entry.rb +104 -0
- data/lib/nhkore/error.rb +35 -0
- data/lib/nhkore/fileable.rb +48 -0
- data/lib/nhkore/missingno.rb +92 -0
- data/lib/nhkore/news.rb +176 -0
- data/lib/nhkore/polisher.rb +93 -0
- data/lib/nhkore/scraper.rb +137 -0
- data/lib/nhkore/search_link.rb +188 -0
- data/lib/nhkore/search_scraper.rb +152 -0
- data/lib/nhkore/sifter.rb +339 -0
- data/lib/nhkore/splitter.rb +90 -0
- data/lib/nhkore/util.rb +190 -0
- data/lib/nhkore/variator.rb +87 -0
- data/lib/nhkore/version.rb +1 -1
- data/lib/nhkore/word.rb +134 -17
- data/lib/nhkore.rb +39 -40
- data/nhkore.gemspec +23 -8
- data/test/{nhkore_tester.rb → nhkore/test_helper.rb} +3 -1
- data/test/nhkore_test.rb +8 -6
- metadata +204 -11
data/nhkore.gemspec
CHANGED
@@ -33,8 +33,11 @@ Gem::Specification.new() do |spec|
|
|
33
33
|
spec.email = ['bradley@esotericpig.com']
|
34
34
|
spec.licenses = ['LGPL-3.0-or-later']
|
35
35
|
spec.homepage = 'https://github.com/esotericpig/nhkore'
|
36
|
-
spec.summary = 'NHK News Web (Easy) word frequency (core) for Japanese language learners.'
|
37
|
-
spec.description = '
|
36
|
+
spec.summary = 'NHK News Web (Easy) word frequency (core) scraper for Japanese language learners.'
|
37
|
+
spec.description = <<-EOD.gsub(/\s{2,}/,' ').strip()
|
38
|
+
Scrapes NHK News Web (Easy) for the word frequency (core list) for Japanese language learners.
|
39
|
+
Includes a CLI app and a scraper library.
|
40
|
+
EOD
|
38
41
|
|
39
42
|
spec.metadata = {
|
40
43
|
'bug_tracker_uri' => 'https://github.com/esotericpig/nhkore/issues',
|
@@ -48,25 +51,37 @@ Gem::Specification.new() do |spec|
|
|
48
51
|
spec.executables = [spec.name]
|
49
52
|
|
50
53
|
spec.files = Dir.glob(File.join("{#{spec.require_paths.join(',')}}",'**','*.{erb,rb}')) +
|
51
|
-
Dir.glob(File.join(spec.bindir,'
|
54
|
+
Dir.glob(File.join(spec.bindir,'*')) +
|
52
55
|
Dir.glob(File.join('{test,yard}','**','*.{erb,rb}')) +
|
53
56
|
%W( Gemfile #{spec.name}.gemspec Rakefile ) +
|
54
57
|
%w( CHANGELOG.md LICENSE.txt README.md )
|
55
58
|
|
56
|
-
spec.required_ruby_version = '>= 2.4
|
59
|
+
spec.required_ruby_version = '>= 2.4'
|
57
60
|
|
58
61
|
spec.requirements << 'Nokogiri: https://www.nokogiri.org/tutorials/installing_nokogiri.html'
|
59
62
|
|
60
|
-
spec.add_runtime_dependency '
|
61
|
-
spec.add_runtime_dependency '
|
62
|
-
spec.add_runtime_dependency '
|
63
|
+
spec.add_runtime_dependency 'bimyou_segmenter' ,'~> 1.2' # For splitting Japanese sentences into words
|
64
|
+
spec.add_runtime_dependency 'cri' ,'~> 2.15' # For CLI commands/options
|
65
|
+
spec.add_runtime_dependency 'down' ,'~> 5.1' # For downloading files (GetCmd)
|
66
|
+
spec.add_runtime_dependency 'highline' ,'~> 2.0' # For CLI input/output
|
67
|
+
spec.add_runtime_dependency 'japanese_deinflector','~> 0.0' # For unconjugating Japanese words (plain/dictionary form)
|
68
|
+
spec.add_runtime_dependency 'nokogiri' ,'~> 1.10' # For scraping/hacking
|
69
|
+
spec.add_runtime_dependency 'psychgus' ,'~> 1.2' # For styling Psych YAML
|
70
|
+
spec.add_runtime_dependency 'public_suffix' ,'~> 4.0' # For parsing URL domain names
|
71
|
+
spec.add_runtime_dependency 'rainbow' ,'~> 3.0' # For CLI color output
|
72
|
+
spec.add_runtime_dependency 'rubyzip' ,'~> 2.3' # For extracting Zip files (GetCmd)
|
73
|
+
spec.add_runtime_dependency 'tiny_segmenter' ,'~> 0.0' # For splitting Japanese sentences into words
|
74
|
+
spec.add_runtime_dependency 'tty-progressbar' ,'~> 0.17' # For CLI progress bars
|
75
|
+
spec.add_runtime_dependency 'tty-spinner' ,'~> 0.9' # For CLI spinning progress
|
63
76
|
|
64
77
|
spec.add_development_dependency 'bundler' ,'~> 2.1'
|
65
78
|
spec.add_development_dependency 'minitest' ,'~> 5.14'
|
66
79
|
spec.add_development_dependency 'rake' ,'~> 13.0'
|
67
80
|
spec.add_development_dependency 'raketeer' ,'~> 0.2' # For extra Rake tasks
|
81
|
+
spec.add_development_dependency 'rdoc' ,'~> 6.2' # For YARDoc RDoc (*.rb)
|
82
|
+
spec.add_development_dependency 'redcarpet' ,'~> 3.5' # For YARDoc Markdown (*.md)
|
68
83
|
spec.add_development_dependency 'yard' ,'~> 0.9' # For documentation
|
69
|
-
spec.add_development_dependency 'yard_ghurt','~> 1.
|
84
|
+
spec.add_development_dependency 'yard_ghurt','~> 1.2' # For extra YARDoc Rake tasks
|
70
85
|
|
71
86
|
spec.post_install_message = "You can now use [#{spec.executables.join(', ')}] on the command line."
|
72
87
|
end
|
data/test/nhkore_test.rb
CHANGED
@@ -21,13 +21,15 @@
|
|
21
21
|
#++
|
22
22
|
|
23
23
|
|
24
|
-
require '
|
24
|
+
require 'nhkore/test_helper'
|
25
25
|
|
26
26
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
27
|
+
module NHKore
|
28
|
+
class NHKoreTest < TestHelper
|
29
|
+
def setup()
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_something()
|
33
|
+
end
|
32
34
|
end
|
33
35
|
end
|
metadata
CHANGED
@@ -1,29 +1,85 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nhkore
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonathan Bradley Whited (@esotericpig)
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-03-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: bimyou_segmenter
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '1.2'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.2'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: cri
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '2.15'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '2.15'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: down
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '5.1'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '5.1'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: highline
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '2.0'
|
20
62
|
type: :runtime
|
21
63
|
prerelease: false
|
22
64
|
version_requirements: !ruby/object:Gem::Requirement
|
23
65
|
requirements:
|
24
66
|
- - "~>"
|
25
67
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
68
|
+
version: '2.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: japanese_deinflector
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0.0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0.0'
|
27
83
|
- !ruby/object:Gem::Dependency
|
28
84
|
name: nokogiri
|
29
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -52,6 +108,90 @@ dependencies:
|
|
52
108
|
- - "~>"
|
53
109
|
- !ruby/object:Gem::Version
|
54
110
|
version: '1.2'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: public_suffix
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '4.0'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '4.0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: rainbow
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '3.0'
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '3.0'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: rubyzip
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - "~>"
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '2.3'
|
146
|
+
type: :runtime
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - "~>"
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '2.3'
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: tiny_segmenter
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - "~>"
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0.0'
|
160
|
+
type: :runtime
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - "~>"
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '0.0'
|
167
|
+
- !ruby/object:Gem::Dependency
|
168
|
+
name: tty-progressbar
|
169
|
+
requirement: !ruby/object:Gem::Requirement
|
170
|
+
requirements:
|
171
|
+
- - "~>"
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '0.17'
|
174
|
+
type: :runtime
|
175
|
+
prerelease: false
|
176
|
+
version_requirements: !ruby/object:Gem::Requirement
|
177
|
+
requirements:
|
178
|
+
- - "~>"
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
version: '0.17'
|
181
|
+
- !ruby/object:Gem::Dependency
|
182
|
+
name: tty-spinner
|
183
|
+
requirement: !ruby/object:Gem::Requirement
|
184
|
+
requirements:
|
185
|
+
- - "~>"
|
186
|
+
- !ruby/object:Gem::Version
|
187
|
+
version: '0.9'
|
188
|
+
type: :runtime
|
189
|
+
prerelease: false
|
190
|
+
version_requirements: !ruby/object:Gem::Requirement
|
191
|
+
requirements:
|
192
|
+
- - "~>"
|
193
|
+
- !ruby/object:Gem::Version
|
194
|
+
version: '0.9'
|
55
195
|
- !ruby/object:Gem::Dependency
|
56
196
|
name: bundler
|
57
197
|
requirement: !ruby/object:Gem::Requirement
|
@@ -108,6 +248,34 @@ dependencies:
|
|
108
248
|
- - "~>"
|
109
249
|
- !ruby/object:Gem::Version
|
110
250
|
version: '0.2'
|
251
|
+
- !ruby/object:Gem::Dependency
|
252
|
+
name: rdoc
|
253
|
+
requirement: !ruby/object:Gem::Requirement
|
254
|
+
requirements:
|
255
|
+
- - "~>"
|
256
|
+
- !ruby/object:Gem::Version
|
257
|
+
version: '6.2'
|
258
|
+
type: :development
|
259
|
+
prerelease: false
|
260
|
+
version_requirements: !ruby/object:Gem::Requirement
|
261
|
+
requirements:
|
262
|
+
- - "~>"
|
263
|
+
- !ruby/object:Gem::Version
|
264
|
+
version: '6.2'
|
265
|
+
- !ruby/object:Gem::Dependency
|
266
|
+
name: redcarpet
|
267
|
+
requirement: !ruby/object:Gem::Requirement
|
268
|
+
requirements:
|
269
|
+
- - "~>"
|
270
|
+
- !ruby/object:Gem::Version
|
271
|
+
version: '3.5'
|
272
|
+
type: :development
|
273
|
+
prerelease: false
|
274
|
+
version_requirements: !ruby/object:Gem::Requirement
|
275
|
+
requirements:
|
276
|
+
- - "~>"
|
277
|
+
- !ruby/object:Gem::Version
|
278
|
+
version: '3.5'
|
111
279
|
- !ruby/object:Gem::Dependency
|
112
280
|
name: yard
|
113
281
|
requirement: !ruby/object:Gem::Requirement
|
@@ -128,16 +296,16 @@ dependencies:
|
|
128
296
|
requirements:
|
129
297
|
- - "~>"
|
130
298
|
- !ruby/object:Gem::Version
|
131
|
-
version: '1.
|
299
|
+
version: '1.2'
|
132
300
|
type: :development
|
133
301
|
prerelease: false
|
134
302
|
version_requirements: !ruby/object:Gem::Requirement
|
135
303
|
requirements:
|
136
304
|
- - "~>"
|
137
305
|
- !ruby/object:Gem::Version
|
138
|
-
version: '1.
|
306
|
+
version: '1.2'
|
139
307
|
description: Scrapes NHK News Web (Easy) for the word frequency (core list) for Japanese
|
140
|
-
language learners.
|
308
|
+
language learners. Includes a CLI app and a scraper library.
|
141
309
|
email:
|
142
310
|
- bradley@esotericpig.com
|
143
311
|
executables:
|
@@ -152,11 +320,36 @@ files:
|
|
152
320
|
- Rakefile
|
153
321
|
- bin/nhkore
|
154
322
|
- lib/nhkore.rb
|
323
|
+
- lib/nhkore/app.rb
|
324
|
+
- lib/nhkore/article.rb
|
325
|
+
- lib/nhkore/article_scraper.rb
|
326
|
+
- lib/nhkore/cleaner.rb
|
327
|
+
- lib/nhkore/cli/bing_cmd.rb
|
328
|
+
- lib/nhkore/cli/fx_cmd.rb
|
329
|
+
- lib/nhkore/cli/get_cmd.rb
|
330
|
+
- lib/nhkore/cli/news_cmd.rb
|
331
|
+
- lib/nhkore/cli/sift_cmd.rb
|
332
|
+
- lib/nhkore/defn.rb
|
333
|
+
- lib/nhkore/dict.rb
|
334
|
+
- lib/nhkore/dict_scraper.rb
|
335
|
+
- lib/nhkore/entry.rb
|
336
|
+
- lib/nhkore/error.rb
|
337
|
+
- lib/nhkore/fileable.rb
|
338
|
+
- lib/nhkore/missingno.rb
|
339
|
+
- lib/nhkore/news.rb
|
340
|
+
- lib/nhkore/polisher.rb
|
341
|
+
- lib/nhkore/scraper.rb
|
342
|
+
- lib/nhkore/search_link.rb
|
343
|
+
- lib/nhkore/search_scraper.rb
|
344
|
+
- lib/nhkore/sifter.rb
|
345
|
+
- lib/nhkore/splitter.rb
|
346
|
+
- lib/nhkore/util.rb
|
347
|
+
- lib/nhkore/variator.rb
|
155
348
|
- lib/nhkore/version.rb
|
156
349
|
- lib/nhkore/word.rb
|
157
350
|
- nhkore.gemspec
|
351
|
+
- test/nhkore/test_helper.rb
|
158
352
|
- test/nhkore_test.rb
|
159
|
-
- test/nhkore_tester.rb
|
160
353
|
- yard/templates/default/layout/html/footer.erb
|
161
354
|
homepage: https://github.com/esotericpig/nhkore
|
162
355
|
licenses:
|
@@ -174,7 +367,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
174
367
|
requirements:
|
175
368
|
- - ">="
|
176
369
|
- !ruby/object:Gem::Version
|
177
|
-
version: 2.4
|
370
|
+
version: '2.4'
|
178
371
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
179
372
|
requirements:
|
180
373
|
- - ">="
|
@@ -185,5 +378,5 @@ requirements:
|
|
185
378
|
rubygems_version: 3.1.2
|
186
379
|
signing_key:
|
187
380
|
specification_version: 4
|
188
|
-
summary: NHK News Web (Easy) word frequency (core) for Japanese language learners.
|
381
|
+
summary: NHK News Web (Easy) word frequency (core) scraper for Japanese language learners.
|
189
382
|
test_files: []
|