nhkore 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +37 -1
- data/README.md +18 -6
- data/Rakefile +11 -16
- data/bin/nhkore +1 -3
- data/lib/nhkore/app.rb +616 -0
- data/lib/nhkore/article.rb +130 -0
- data/lib/nhkore/article_scraper.rb +653 -0
- data/lib/nhkore/cleaner.rb +91 -0
- data/lib/nhkore/cli/bing_cmd.rb +220 -0
- data/lib/nhkore/cli/fx_cmd.rb +116 -0
- data/lib/nhkore/cli/get_cmd.rb +153 -0
- data/lib/nhkore/cli/news_cmd.rb +375 -0
- data/lib/nhkore/cli/sift_cmd.rb +382 -0
- data/lib/nhkore/defn.rb +104 -0
- data/lib/nhkore/dict.rb +80 -0
- data/lib/nhkore/dict_scraper.rb +76 -0
- data/lib/nhkore/entry.rb +104 -0
- data/lib/nhkore/error.rb +35 -0
- data/lib/nhkore/fileable.rb +48 -0
- data/lib/nhkore/missingno.rb +92 -0
- data/lib/nhkore/news.rb +176 -0
- data/lib/nhkore/polisher.rb +93 -0
- data/lib/nhkore/scraper.rb +137 -0
- data/lib/nhkore/search_link.rb +188 -0
- data/lib/nhkore/search_scraper.rb +152 -0
- data/lib/nhkore/sifter.rb +339 -0
- data/lib/nhkore/splitter.rb +90 -0
- data/lib/nhkore/util.rb +190 -0
- data/lib/nhkore/variator.rb +87 -0
- data/lib/nhkore/version.rb +1 -1
- data/lib/nhkore/word.rb +134 -17
- data/lib/nhkore.rb +39 -40
- data/nhkore.gemspec +23 -8
- data/test/{nhkore_tester.rb → nhkore/test_helper.rb} +3 -1
- data/test/nhkore_test.rb +8 -6
- metadata +204 -11
data/nhkore.gemspec
CHANGED
@@ -33,8 +33,11 @@ Gem::Specification.new() do |spec|
|
|
33
33
|
spec.email = ['bradley@esotericpig.com']
|
34
34
|
spec.licenses = ['LGPL-3.0-or-later']
|
35
35
|
spec.homepage = 'https://github.com/esotericpig/nhkore'
|
36
|
-
spec.summary = 'NHK News Web (Easy) word frequency (core) for Japanese language learners.'
|
37
|
-
spec.description = '
|
36
|
+
spec.summary = 'NHK News Web (Easy) word frequency (core) scraper for Japanese language learners.'
|
37
|
+
spec.description = <<-EOD.gsub(/\s{2,}/,' ').strip()
|
38
|
+
Scrapes NHK News Web (Easy) for the word frequency (core list) for Japanese language learners.
|
39
|
+
Includes a CLI app and a scraper library.
|
40
|
+
EOD
|
38
41
|
|
39
42
|
spec.metadata = {
|
40
43
|
'bug_tracker_uri' => 'https://github.com/esotericpig/nhkore/issues',
|
@@ -48,25 +51,37 @@ Gem::Specification.new() do |spec|
|
|
48
51
|
spec.executables = [spec.name]
|
49
52
|
|
50
53
|
spec.files = Dir.glob(File.join("{#{spec.require_paths.join(',')}}",'**','*.{erb,rb}')) +
|
51
|
-
Dir.glob(File.join(spec.bindir,'
|
54
|
+
Dir.glob(File.join(spec.bindir,'*')) +
|
52
55
|
Dir.glob(File.join('{test,yard}','**','*.{erb,rb}')) +
|
53
56
|
%W( Gemfile #{spec.name}.gemspec Rakefile ) +
|
54
57
|
%w( CHANGELOG.md LICENSE.txt README.md )
|
55
58
|
|
56
|
-
spec.required_ruby_version = '>= 2.4
|
59
|
+
spec.required_ruby_version = '>= 2.4'
|
57
60
|
|
58
61
|
spec.requirements << 'Nokogiri: https://www.nokogiri.org/tutorials/installing_nokogiri.html'
|
59
62
|
|
60
|
-
spec.add_runtime_dependency '
|
61
|
-
spec.add_runtime_dependency '
|
62
|
-
spec.add_runtime_dependency '
|
63
|
+
spec.add_runtime_dependency 'bimyou_segmenter' ,'~> 1.2' # For splitting Japanese sentences into words
|
64
|
+
spec.add_runtime_dependency 'cri' ,'~> 2.15' # For CLI commands/options
|
65
|
+
spec.add_runtime_dependency 'down' ,'~> 5.1' # For downloading files (GetCmd)
|
66
|
+
spec.add_runtime_dependency 'highline' ,'~> 2.0' # For CLI input/output
|
67
|
+
spec.add_runtime_dependency 'japanese_deinflector','~> 0.0' # For unconjugating Japanese words (plain/dictionary form)
|
68
|
+
spec.add_runtime_dependency 'nokogiri' ,'~> 1.10' # For scraping/hacking
|
69
|
+
spec.add_runtime_dependency 'psychgus' ,'~> 1.2' # For styling Psych YAML
|
70
|
+
spec.add_runtime_dependency 'public_suffix' ,'~> 4.0' # For parsing URL domain names
|
71
|
+
spec.add_runtime_dependency 'rainbow' ,'~> 3.0' # For CLI color output
|
72
|
+
spec.add_runtime_dependency 'rubyzip' ,'~> 2.3' # For extracting Zip files (GetCmd)
|
73
|
+
spec.add_runtime_dependency 'tiny_segmenter' ,'~> 0.0' # For splitting Japanese sentences into words
|
74
|
+
spec.add_runtime_dependency 'tty-progressbar' ,'~> 0.17' # For CLI progress bars
|
75
|
+
spec.add_runtime_dependency 'tty-spinner' ,'~> 0.9' # For CLI spinning progress
|
63
76
|
|
64
77
|
spec.add_development_dependency 'bundler' ,'~> 2.1'
|
65
78
|
spec.add_development_dependency 'minitest' ,'~> 5.14'
|
66
79
|
spec.add_development_dependency 'rake' ,'~> 13.0'
|
67
80
|
spec.add_development_dependency 'raketeer' ,'~> 0.2' # For extra Rake tasks
|
81
|
+
spec.add_development_dependency 'rdoc' ,'~> 6.2' # For YARDoc RDoc (*.rb)
|
82
|
+
spec.add_development_dependency 'redcarpet' ,'~> 3.5' # For YARDoc Markdown (*.md)
|
68
83
|
spec.add_development_dependency 'yard' ,'~> 0.9' # For documentation
|
69
|
-
spec.add_development_dependency 'yard_ghurt','~> 1.
|
84
|
+
spec.add_development_dependency 'yard_ghurt','~> 1.2' # For extra YARDoc Rake tasks
|
70
85
|
|
71
86
|
spec.post_install_message = "You can now use [#{spec.executables.join(', ')}] on the command line."
|
72
87
|
end
|
data/test/nhkore_test.rb
CHANGED
@@ -21,13 +21,15 @@
|
|
21
21
|
#++
|
22
22
|
|
23
23
|
|
24
|
-
require '
|
24
|
+
require 'nhkore/test_helper'
|
25
25
|
|
26
26
|
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
27
|
+
module NHKore
|
28
|
+
class NHKoreTest < TestHelper
|
29
|
+
def setup()
|
30
|
+
end
|
31
|
+
|
32
|
+
def test_something()
|
33
|
+
end
|
32
34
|
end
|
33
35
|
end
|
metadata
CHANGED
@@ -1,29 +1,85 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nhkore
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonathan Bradley Whited (@esotericpig)
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-03-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
|
-
name:
|
14
|
+
name: bimyou_segmenter
|
15
15
|
requirement: !ruby/object:Gem::Requirement
|
16
16
|
requirements:
|
17
17
|
- - "~>"
|
18
18
|
- !ruby/object:Gem::Version
|
19
|
-
version: '
|
19
|
+
version: '1.2'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.2'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: cri
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '2.15'
|
34
|
+
type: :runtime
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '2.15'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: down
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '5.1'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '5.1'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: highline
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - "~>"
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '2.0'
|
20
62
|
type: :runtime
|
21
63
|
prerelease: false
|
22
64
|
version_requirements: !ruby/object:Gem::Requirement
|
23
65
|
requirements:
|
24
66
|
- - "~>"
|
25
67
|
- !ruby/object:Gem::Version
|
26
|
-
version: '
|
68
|
+
version: '2.0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: japanese_deinflector
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - "~>"
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0.0'
|
76
|
+
type: :runtime
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0.0'
|
27
83
|
- !ruby/object:Gem::Dependency
|
28
84
|
name: nokogiri
|
29
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -52,6 +108,90 @@ dependencies:
|
|
52
108
|
- - "~>"
|
53
109
|
- !ruby/object:Gem::Version
|
54
110
|
version: '1.2'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: public_suffix
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '4.0'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '4.0'
|
125
|
+
- !ruby/object:Gem::Dependency
|
126
|
+
name: rainbow
|
127
|
+
requirement: !ruby/object:Gem::Requirement
|
128
|
+
requirements:
|
129
|
+
- - "~>"
|
130
|
+
- !ruby/object:Gem::Version
|
131
|
+
version: '3.0'
|
132
|
+
type: :runtime
|
133
|
+
prerelease: false
|
134
|
+
version_requirements: !ruby/object:Gem::Requirement
|
135
|
+
requirements:
|
136
|
+
- - "~>"
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '3.0'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: rubyzip
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - "~>"
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: '2.3'
|
146
|
+
type: :runtime
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - "~>"
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: '2.3'
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: tiny_segmenter
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - "~>"
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: '0.0'
|
160
|
+
type: :runtime
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - "~>"
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: '0.0'
|
167
|
+
- !ruby/object:Gem::Dependency
|
168
|
+
name: tty-progressbar
|
169
|
+
requirement: !ruby/object:Gem::Requirement
|
170
|
+
requirements:
|
171
|
+
- - "~>"
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '0.17'
|
174
|
+
type: :runtime
|
175
|
+
prerelease: false
|
176
|
+
version_requirements: !ruby/object:Gem::Requirement
|
177
|
+
requirements:
|
178
|
+
- - "~>"
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
version: '0.17'
|
181
|
+
- !ruby/object:Gem::Dependency
|
182
|
+
name: tty-spinner
|
183
|
+
requirement: !ruby/object:Gem::Requirement
|
184
|
+
requirements:
|
185
|
+
- - "~>"
|
186
|
+
- !ruby/object:Gem::Version
|
187
|
+
version: '0.9'
|
188
|
+
type: :runtime
|
189
|
+
prerelease: false
|
190
|
+
version_requirements: !ruby/object:Gem::Requirement
|
191
|
+
requirements:
|
192
|
+
- - "~>"
|
193
|
+
- !ruby/object:Gem::Version
|
194
|
+
version: '0.9'
|
55
195
|
- !ruby/object:Gem::Dependency
|
56
196
|
name: bundler
|
57
197
|
requirement: !ruby/object:Gem::Requirement
|
@@ -108,6 +248,34 @@ dependencies:
|
|
108
248
|
- - "~>"
|
109
249
|
- !ruby/object:Gem::Version
|
110
250
|
version: '0.2'
|
251
|
+
- !ruby/object:Gem::Dependency
|
252
|
+
name: rdoc
|
253
|
+
requirement: !ruby/object:Gem::Requirement
|
254
|
+
requirements:
|
255
|
+
- - "~>"
|
256
|
+
- !ruby/object:Gem::Version
|
257
|
+
version: '6.2'
|
258
|
+
type: :development
|
259
|
+
prerelease: false
|
260
|
+
version_requirements: !ruby/object:Gem::Requirement
|
261
|
+
requirements:
|
262
|
+
- - "~>"
|
263
|
+
- !ruby/object:Gem::Version
|
264
|
+
version: '6.2'
|
265
|
+
- !ruby/object:Gem::Dependency
|
266
|
+
name: redcarpet
|
267
|
+
requirement: !ruby/object:Gem::Requirement
|
268
|
+
requirements:
|
269
|
+
- - "~>"
|
270
|
+
- !ruby/object:Gem::Version
|
271
|
+
version: '3.5'
|
272
|
+
type: :development
|
273
|
+
prerelease: false
|
274
|
+
version_requirements: !ruby/object:Gem::Requirement
|
275
|
+
requirements:
|
276
|
+
- - "~>"
|
277
|
+
- !ruby/object:Gem::Version
|
278
|
+
version: '3.5'
|
111
279
|
- !ruby/object:Gem::Dependency
|
112
280
|
name: yard
|
113
281
|
requirement: !ruby/object:Gem::Requirement
|
@@ -128,16 +296,16 @@ dependencies:
|
|
128
296
|
requirements:
|
129
297
|
- - "~>"
|
130
298
|
- !ruby/object:Gem::Version
|
131
|
-
version: '1.
|
299
|
+
version: '1.2'
|
132
300
|
type: :development
|
133
301
|
prerelease: false
|
134
302
|
version_requirements: !ruby/object:Gem::Requirement
|
135
303
|
requirements:
|
136
304
|
- - "~>"
|
137
305
|
- !ruby/object:Gem::Version
|
138
|
-
version: '1.
|
306
|
+
version: '1.2'
|
139
307
|
description: Scrapes NHK News Web (Easy) for the word frequency (core list) for Japanese
|
140
|
-
language learners.
|
308
|
+
language learners. Includes a CLI app and a scraper library.
|
141
309
|
email:
|
142
310
|
- bradley@esotericpig.com
|
143
311
|
executables:
|
@@ -152,11 +320,36 @@ files:
|
|
152
320
|
- Rakefile
|
153
321
|
- bin/nhkore
|
154
322
|
- lib/nhkore.rb
|
323
|
+
- lib/nhkore/app.rb
|
324
|
+
- lib/nhkore/article.rb
|
325
|
+
- lib/nhkore/article_scraper.rb
|
326
|
+
- lib/nhkore/cleaner.rb
|
327
|
+
- lib/nhkore/cli/bing_cmd.rb
|
328
|
+
- lib/nhkore/cli/fx_cmd.rb
|
329
|
+
- lib/nhkore/cli/get_cmd.rb
|
330
|
+
- lib/nhkore/cli/news_cmd.rb
|
331
|
+
- lib/nhkore/cli/sift_cmd.rb
|
332
|
+
- lib/nhkore/defn.rb
|
333
|
+
- lib/nhkore/dict.rb
|
334
|
+
- lib/nhkore/dict_scraper.rb
|
335
|
+
- lib/nhkore/entry.rb
|
336
|
+
- lib/nhkore/error.rb
|
337
|
+
- lib/nhkore/fileable.rb
|
338
|
+
- lib/nhkore/missingno.rb
|
339
|
+
- lib/nhkore/news.rb
|
340
|
+
- lib/nhkore/polisher.rb
|
341
|
+
- lib/nhkore/scraper.rb
|
342
|
+
- lib/nhkore/search_link.rb
|
343
|
+
- lib/nhkore/search_scraper.rb
|
344
|
+
- lib/nhkore/sifter.rb
|
345
|
+
- lib/nhkore/splitter.rb
|
346
|
+
- lib/nhkore/util.rb
|
347
|
+
- lib/nhkore/variator.rb
|
155
348
|
- lib/nhkore/version.rb
|
156
349
|
- lib/nhkore/word.rb
|
157
350
|
- nhkore.gemspec
|
351
|
+
- test/nhkore/test_helper.rb
|
158
352
|
- test/nhkore_test.rb
|
159
|
-
- test/nhkore_tester.rb
|
160
353
|
- yard/templates/default/layout/html/footer.erb
|
161
354
|
homepage: https://github.com/esotericpig/nhkore
|
162
355
|
licenses:
|
@@ -174,7 +367,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
174
367
|
requirements:
|
175
368
|
- - ">="
|
176
369
|
- !ruby/object:Gem::Version
|
177
|
-
version: 2.4
|
370
|
+
version: '2.4'
|
178
371
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
179
372
|
requirements:
|
180
373
|
- - ">="
|
@@ -185,5 +378,5 @@ requirements:
|
|
185
378
|
rubygems_version: 3.1.2
|
186
379
|
signing_key:
|
187
380
|
specification_version: 4
|
188
|
-
summary: NHK News Web (Easy) word frequency (core) for Japanese language learners.
|
381
|
+
summary: NHK News Web (Easy) word frequency (core) scraper for Japanese language learners.
|
189
382
|
test_files: []
|