nhkore 0.3.13 → 0.3.16
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +17 -1
- data/Gemfile.lock +41 -41
- data/README.md +4 -5
- data/Rakefile +5 -3
- data/lib/nhkore/app.rb +8 -19
- data/lib/nhkore/article.rb +5 -9
- data/lib/nhkore/article_scraper.rb +15 -14
- data/lib/nhkore/cleaner.rb +0 -12
- data/lib/nhkore/cli/fx_cmd.rb +0 -4
- data/lib/nhkore/cli/get_cmd.rb +0 -4
- data/lib/nhkore/cli/news_cmd.rb +29 -17
- data/lib/nhkore/cli/search_cmd.rb +45 -35
- data/lib/nhkore/cli/sift_cmd.rb +1 -5
- data/lib/nhkore/datetime_parser.rb +1 -5
- data/lib/nhkore/defn.rb +1 -5
- data/lib/nhkore/dict.rb +2 -5
- data/lib/nhkore/dict_scraper.rb +2 -6
- data/lib/nhkore/entry.rb +3 -9
- data/lib/nhkore/error.rb +1 -11
- data/lib/nhkore/fileable.rb +0 -4
- data/lib/nhkore/lib.rb +0 -3
- data/lib/nhkore/missingno.rb +2 -6
- data/lib/nhkore/news.rb +3 -15
- data/lib/nhkore/polisher.rb +0 -12
- data/lib/nhkore/scraper.rb +8 -5
- data/lib/nhkore/search_link.rb +9 -17
- data/lib/nhkore/search_scraper.rb +34 -24
- data/lib/nhkore/sifter.rb +7 -8
- data/lib/nhkore/splitter.rb +0 -18
- data/lib/nhkore/user_agents.rb +1 -4
- data/lib/nhkore/util.rb +0 -4
- data/lib/nhkore/variator.rb +0 -14
- data/lib/nhkore/version.rb +1 -1
- data/lib/nhkore/word.rb +0 -4
- data/lib/nhkore.rb +0 -5
- data/nhkore.gemspec +40 -37
- data/samples/looper.rb +0 -3
- metadata +24 -24
data/lib/nhkore/sifter.rb
CHANGED
@@ -15,10 +15,6 @@ require 'nhkore/util'
|
|
15
15
|
|
16
16
|
|
17
17
|
module NHKore
|
18
|
-
###
|
19
|
-
# @author Jonathan Bradley Whited
|
20
|
-
# @since 0.2.0
|
21
|
-
###
|
22
18
|
class Sifter
|
23
19
|
include Fileable
|
24
20
|
|
@@ -61,10 +57,8 @@ module NHKore
|
|
61
57
|
end
|
62
58
|
|
63
59
|
def build_rows(words)
|
64
|
-
rows =
|
65
|
-
|
66
|
-
words.each do |word|
|
67
|
-
rows << build_word_row(word)
|
60
|
+
rows = words.map do |word|
|
61
|
+
build_word_row(word)
|
68
62
|
end
|
69
63
|
|
70
64
|
return rows
|
@@ -336,6 +330,11 @@ module NHKore
|
|
336
330
|
next if filter?(article)
|
337
331
|
|
338
332
|
article.words.each_value do |word|
|
333
|
+
# TODO: Try to remove garbage data better.
|
334
|
+
next if word.word.length < 2
|
335
|
+
next if word.freq <= 1
|
336
|
+
next if word.word =~ /\p{Latin}|[[:digit:]]/
|
337
|
+
|
339
338
|
master_article.add_word(word,use_freq: true)
|
340
339
|
end
|
341
340
|
end
|
data/lib/nhkore/splitter.rb
CHANGED
@@ -13,10 +13,6 @@ require 'nhkore/util'
|
|
13
13
|
|
14
14
|
|
15
15
|
module NHKore
|
16
|
-
###
|
17
|
-
# @author Jonathan Bradley Whited
|
18
|
-
# @since 0.2.0
|
19
|
-
###
|
20
16
|
class Splitter
|
21
17
|
def begin_split(str)
|
22
18
|
return str
|
@@ -30,19 +26,12 @@ module NHKore
|
|
30
26
|
end
|
31
27
|
end
|
32
28
|
|
33
|
-
###
|
34
|
-
# @author Jonathan Bradley Whited
|
35
|
-
# @since 0.2.0
|
36
|
-
###
|
37
29
|
class BasicSplitter < Splitter
|
38
30
|
def end_split(str)
|
39
31
|
return str.split(Util::NORMALIZE_STR_REGEX)
|
40
32
|
end
|
41
33
|
end
|
42
34
|
|
43
|
-
###
|
44
|
-
# @since 0.2.0
|
45
|
-
###
|
46
35
|
class BimyouSplitter < Splitter
|
47
36
|
def initialize(*)
|
48
37
|
require 'bimyou_segmenter'
|
@@ -55,9 +44,6 @@ module NHKore
|
|
55
44
|
end
|
56
45
|
end
|
57
46
|
|
58
|
-
###
|
59
|
-
# @since 0.2.0
|
60
|
-
###
|
61
47
|
class TinySplitter < Splitter
|
62
48
|
attr_accessor :tiny
|
63
49
|
|
@@ -74,10 +60,6 @@ module NHKore
|
|
74
60
|
end
|
75
61
|
end
|
76
62
|
|
77
|
-
###
|
78
|
-
# @author Jonathan Bradley Whited
|
79
|
-
# @since 0.2.0
|
80
|
-
###
|
81
63
|
class BestSplitter < BimyouSplitter
|
82
64
|
end
|
83
65
|
end
|
data/lib/nhkore/user_agents.rb
CHANGED
@@ -38,9 +38,6 @@ module NHKore
|
|
38
38
|
#
|
39
39
|
# The gem is really old and had a lot of warnings, so decided to make this class.
|
40
40
|
# Maybe I'll fork the gem and maintain a new version in the future...
|
41
|
-
#
|
42
|
-
# @author Jonathan Bradley Whited
|
43
|
-
# @since 0.2.1
|
44
41
|
###
|
45
42
|
class UserAgents
|
46
43
|
attr_accessor :data
|
@@ -53,7 +50,7 @@ module NHKore
|
|
53
50
|
# because we don't need all of the data in memory after getting just 1
|
54
51
|
# sample, even though it's slower.
|
55
52
|
def initialize
|
56
|
-
super
|
53
|
+
super
|
57
54
|
|
58
55
|
# rubocop:disable all
|
59
56
|
@data = [
|
data/lib/nhkore/util.rb
CHANGED
data/lib/nhkore/variator.rb
CHANGED
@@ -10,10 +10,6 @@
|
|
10
10
|
|
11
11
|
|
12
12
|
module NHKore
|
13
|
-
###
|
14
|
-
# @author Jonathan Bradley Whited
|
15
|
-
# @since 0.2.0
|
16
|
-
###
|
17
13
|
class Variator
|
18
14
|
def begin_variate(str)
|
19
15
|
return str
|
@@ -27,10 +23,6 @@ module NHKore
|
|
27
23
|
end
|
28
24
|
end
|
29
25
|
|
30
|
-
###
|
31
|
-
# @author Jonathan Bradley Whited
|
32
|
-
# @since 0.2.0
|
33
|
-
###
|
34
26
|
class BasicVariator < Variator
|
35
27
|
def end_variate(str)
|
36
28
|
return [] # No variations; don't return nil
|
@@ -41,8 +33,6 @@ module NHKore
|
|
41
33
|
# Guesses a word's dictionary/plain form (辞書形).
|
42
34
|
#
|
43
35
|
# It doesn't work very well,but better than nothing...
|
44
|
-
#
|
45
|
-
# @since 0.2.0
|
46
36
|
###
|
47
37
|
class DictFormVariator < Variator
|
48
38
|
attr_accessor :deinflector
|
@@ -66,10 +56,6 @@ module NHKore
|
|
66
56
|
end
|
67
57
|
end
|
68
58
|
|
69
|
-
###
|
70
|
-
# @author Jonathan Bradley Whited
|
71
|
-
# @since 0.2.0
|
72
|
-
###
|
73
59
|
class BestVariator < DictFormVariator
|
74
60
|
end
|
75
61
|
end
|
data/lib/nhkore/version.rb
CHANGED
data/lib/nhkore/word.rb
CHANGED
data/lib/nhkore.rb
CHANGED
data/nhkore.gemspec
CHANGED
@@ -26,48 +26,57 @@ Gem::Specification.new do |spec|
|
|
26
26
|
#'mailing_list_uri' => '',
|
27
27
|
}
|
28
28
|
|
29
|
+
spec.required_ruby_version = '>= 2.5'
|
29
30
|
spec.requirements = [
|
30
31
|
'Nokogiri: https://www.nokogiri.org/tutorials/installing_nokogiri.html',
|
31
32
|
]
|
32
33
|
|
33
|
-
spec.
|
34
|
-
spec.
|
35
|
-
spec.
|
36
|
-
|
34
|
+
spec.require_paths = ['lib']
|
35
|
+
spec.bindir = 'bin'
|
36
|
+
spec.executables = [spec.name]
|
37
|
+
|
38
|
+
spec.extra_rdoc_files = %w[ CHANGELOG.md LICENSE.txt README.md ]
|
39
|
+
spec.rdoc_options = [
|
40
|
+
'--hyperlink-all','--show-hash',
|
41
|
+
'--title',"NHKore v#{NHKore::VERSION} Doc",
|
42
|
+
'--main','README.md',
|
43
|
+
]
|
37
44
|
|
38
45
|
spec.files = [
|
39
46
|
Dir.glob(File.join("{#{spec.require_paths.join(',')}}",'**','*.{erb,rb}')),
|
40
47
|
Dir.glob(File.join(spec.bindir,'*')),
|
41
48
|
Dir.glob(File.join('{samples,test,yard}','**','*.{erb,rb}')),
|
42
49
|
%W[ Gemfile Gemfile.lock #{spec.name}.gemspec Rakefile .yardopts ],
|
43
|
-
|
50
|
+
spec.extra_rdoc_files,
|
44
51
|
].flatten
|
45
52
|
|
46
|
-
spec.add_runtime_dependency
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
53
|
+
run_dep = spec.method(:add_runtime_dependency)
|
54
|
+
run_dep[ 'attr_bool' ,'~> 0.2' ] # attr_accessor?/attr_reader?.
|
55
|
+
run_dep[ 'bimyou_segmenter' ,'~> 1.2' ] # Splitting Japanese sentences into words.
|
56
|
+
run_dep[ 'cri' ,'~> 2.15' ] # CLI commands/options.
|
57
|
+
run_dep[ 'down' ,'~> 5.4' ] # Downloading files (GetCmd).
|
58
|
+
run_dep[ 'highline' ,'~> 3.1' ] # CLI input/output.
|
59
|
+
run_dep[ 'http-cookie' ,'~> 1.0' ] # Parsing/Setting cookies [(Bing)Scraper].
|
60
|
+
run_dep[ 'japanese_deinflector','~> 0.0' ] # Unconjugating Japanese words (dictionary form).
|
61
|
+
run_dep[ 'nokogiri' ,'~> 1.16' ] # Scraping/Hacking.
|
62
|
+
run_dep[ 'psychgus' ,'~> 1.3' ] # Styling Psych YAML.
|
63
|
+
run_dep[ 'public_suffix' ,'~> 6.0' ] # Parsing URL domain names.
|
64
|
+
run_dep[ 'rainbow' ,'~> 3.1' ] # CLI color output.
|
65
|
+
run_dep[ 'rss' ,'~> 0.3' ] # Scraping [(Bing)Scraper].
|
66
|
+
run_dep[ 'rubyzip' ,'~> 2.3' ] # Extracting Zip files (GetCmd).
|
67
|
+
run_dep[ 'tiny_segmenter' ,'~> 0.0' ] # Splitting Japanese sentences into words.
|
68
|
+
run_dep[ 'tty-progressbar' ,'~> 0.18' ] # CLI progress bars.
|
69
|
+
run_dep[ 'tty-spinner' ,'~> 0.9' ] # CLI spinning progress.
|
62
70
|
|
63
|
-
spec.add_development_dependency
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
+
dev_dep = spec.method(:add_development_dependency)
|
72
|
+
dev_dep[ 'bundler' ,'~> 2.5' ]
|
73
|
+
dev_dep[ 'minitest' ,'~> 5.25' ]
|
74
|
+
dev_dep[ 'rake' ,'~> 13.2' ]
|
75
|
+
dev_dep[ 'raketeer' ,'~> 0.2' ] # Extra Rake tasks.
|
76
|
+
dev_dep[ 'rdoc' ,'~> 6.7' ] # YARDoc RDoc (*.rb).
|
77
|
+
dev_dep[ 'redcarpet' ,'~> 3.6' ] # YARDoc Markdown (*.md).
|
78
|
+
dev_dep[ 'yard' ,'~> 0.9' ] # Doc.
|
79
|
+
dev_dep[ 'yard_ghurt','~> 1.2' ] # Extra YARDoc Rake tasks.
|
71
80
|
|
72
81
|
spec.post_install_message = <<~MSG
|
73
82
|
+=============================================================================+
|
@@ -81,13 +90,7 @@ Gem::Specification.new do |spec|
|
|
81
90
|
| Changelog: #{spec.metadata['changelog_uri']}
|
82
91
|
+=============================================================================+
|
83
92
|
MSG
|
84
|
-
#puts spec.post_install_message.split("\n").map(&:length).max
|
85
|
-
|
86
|
-
spec.extra_rdoc_files = %w[ CHANGELOG.md LICENSE.txt README.md ]
|
87
93
|
|
88
|
-
|
89
|
-
|
90
|
-
'--title',"NHKore v#{NHKore::VERSION} Doc",
|
91
|
-
'--main','README.md',
|
92
|
-
]
|
94
|
+
# Uncomment to see max line length:
|
95
|
+
#puts spec.post_install_message.split("\n").map(&:length).max
|
93
96
|
end
|
data/samples/looper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nhkore
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.16
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonathan Bradley Whited
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-08-14 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: attr_bool
|
@@ -58,28 +58,28 @@ dependencies:
|
|
58
58
|
requirements:
|
59
59
|
- - "~>"
|
60
60
|
- !ruby/object:Gem::Version
|
61
|
-
version: '5.
|
61
|
+
version: '5.4'
|
62
62
|
type: :runtime
|
63
63
|
prerelease: false
|
64
64
|
version_requirements: !ruby/object:Gem::Requirement
|
65
65
|
requirements:
|
66
66
|
- - "~>"
|
67
67
|
- !ruby/object:Gem::Version
|
68
|
-
version: '5.
|
68
|
+
version: '5.4'
|
69
69
|
- !ruby/object:Gem::Dependency
|
70
70
|
name: highline
|
71
71
|
requirement: !ruby/object:Gem::Requirement
|
72
72
|
requirements:
|
73
73
|
- - "~>"
|
74
74
|
- !ruby/object:Gem::Version
|
75
|
-
version: '
|
75
|
+
version: '3.1'
|
76
76
|
type: :runtime
|
77
77
|
prerelease: false
|
78
78
|
version_requirements: !ruby/object:Gem::Requirement
|
79
79
|
requirements:
|
80
80
|
- - "~>"
|
81
81
|
- !ruby/object:Gem::Version
|
82
|
-
version: '
|
82
|
+
version: '3.1'
|
83
83
|
- !ruby/object:Gem::Dependency
|
84
84
|
name: http-cookie
|
85
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -114,14 +114,14 @@ dependencies:
|
|
114
114
|
requirements:
|
115
115
|
- - "~>"
|
116
116
|
- !ruby/object:Gem::Version
|
117
|
-
version: '1.
|
117
|
+
version: '1.16'
|
118
118
|
type: :runtime
|
119
119
|
prerelease: false
|
120
120
|
version_requirements: !ruby/object:Gem::Requirement
|
121
121
|
requirements:
|
122
122
|
- - "~>"
|
123
123
|
- !ruby/object:Gem::Version
|
124
|
-
version: '1.
|
124
|
+
version: '1.16'
|
125
125
|
- !ruby/object:Gem::Dependency
|
126
126
|
name: psychgus
|
127
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -142,14 +142,14 @@ dependencies:
|
|
142
142
|
requirements:
|
143
143
|
- - "~>"
|
144
144
|
- !ruby/object:Gem::Version
|
145
|
-
version: '
|
145
|
+
version: '6.0'
|
146
146
|
type: :runtime
|
147
147
|
prerelease: false
|
148
148
|
version_requirements: !ruby/object:Gem::Requirement
|
149
149
|
requirements:
|
150
150
|
- - "~>"
|
151
151
|
- !ruby/object:Gem::Version
|
152
|
-
version: '
|
152
|
+
version: '6.0'
|
153
153
|
- !ruby/object:Gem::Dependency
|
154
154
|
name: rainbow
|
155
155
|
requirement: !ruby/object:Gem::Requirement
|
@@ -170,14 +170,14 @@ dependencies:
|
|
170
170
|
requirements:
|
171
171
|
- - "~>"
|
172
172
|
- !ruby/object:Gem::Version
|
173
|
-
version: '0.
|
173
|
+
version: '0.3'
|
174
174
|
type: :runtime
|
175
175
|
prerelease: false
|
176
176
|
version_requirements: !ruby/object:Gem::Requirement
|
177
177
|
requirements:
|
178
178
|
- - "~>"
|
179
179
|
- !ruby/object:Gem::Version
|
180
|
-
version: '0.
|
180
|
+
version: '0.3'
|
181
181
|
- !ruby/object:Gem::Dependency
|
182
182
|
name: rubyzip
|
183
183
|
requirement: !ruby/object:Gem::Requirement
|
@@ -240,42 +240,42 @@ dependencies:
|
|
240
240
|
requirements:
|
241
241
|
- - "~>"
|
242
242
|
- !ruby/object:Gem::Version
|
243
|
-
version: '2.
|
243
|
+
version: '2.5'
|
244
244
|
type: :development
|
245
245
|
prerelease: false
|
246
246
|
version_requirements: !ruby/object:Gem::Requirement
|
247
247
|
requirements:
|
248
248
|
- - "~>"
|
249
249
|
- !ruby/object:Gem::Version
|
250
|
-
version: '2.
|
250
|
+
version: '2.5'
|
251
251
|
- !ruby/object:Gem::Dependency
|
252
252
|
name: minitest
|
253
253
|
requirement: !ruby/object:Gem::Requirement
|
254
254
|
requirements:
|
255
255
|
- - "~>"
|
256
256
|
- !ruby/object:Gem::Version
|
257
|
-
version: '5.
|
257
|
+
version: '5.25'
|
258
258
|
type: :development
|
259
259
|
prerelease: false
|
260
260
|
version_requirements: !ruby/object:Gem::Requirement
|
261
261
|
requirements:
|
262
262
|
- - "~>"
|
263
263
|
- !ruby/object:Gem::Version
|
264
|
-
version: '5.
|
264
|
+
version: '5.25'
|
265
265
|
- !ruby/object:Gem::Dependency
|
266
266
|
name: rake
|
267
267
|
requirement: !ruby/object:Gem::Requirement
|
268
268
|
requirements:
|
269
269
|
- - "~>"
|
270
270
|
- !ruby/object:Gem::Version
|
271
|
-
version: '13.
|
271
|
+
version: '13.2'
|
272
272
|
type: :development
|
273
273
|
prerelease: false
|
274
274
|
version_requirements: !ruby/object:Gem::Requirement
|
275
275
|
requirements:
|
276
276
|
- - "~>"
|
277
277
|
- !ruby/object:Gem::Version
|
278
|
-
version: '13.
|
278
|
+
version: '13.2'
|
279
279
|
- !ruby/object:Gem::Dependency
|
280
280
|
name: raketeer
|
281
281
|
requirement: !ruby/object:Gem::Requirement
|
@@ -296,28 +296,28 @@ dependencies:
|
|
296
296
|
requirements:
|
297
297
|
- - "~>"
|
298
298
|
- !ruby/object:Gem::Version
|
299
|
-
version: '6.
|
299
|
+
version: '6.7'
|
300
300
|
type: :development
|
301
301
|
prerelease: false
|
302
302
|
version_requirements: !ruby/object:Gem::Requirement
|
303
303
|
requirements:
|
304
304
|
- - "~>"
|
305
305
|
- !ruby/object:Gem::Version
|
306
|
-
version: '6.
|
306
|
+
version: '6.7'
|
307
307
|
- !ruby/object:Gem::Dependency
|
308
308
|
name: redcarpet
|
309
309
|
requirement: !ruby/object:Gem::Requirement
|
310
310
|
requirements:
|
311
311
|
- - "~>"
|
312
312
|
- !ruby/object:Gem::Version
|
313
|
-
version: '3.
|
313
|
+
version: '3.6'
|
314
314
|
type: :development
|
315
315
|
prerelease: false
|
316
316
|
version_requirements: !ruby/object:Gem::Requirement
|
317
317
|
requirements:
|
318
318
|
- - "~>"
|
319
319
|
- !ruby/object:Gem::Version
|
320
|
-
version: '3.
|
320
|
+
version: '3.6'
|
321
321
|
- !ruby/object:Gem::Dependency
|
322
322
|
name: yard
|
323
323
|
requirement: !ruby/object:Gem::Requirement
|
@@ -412,7 +412,7 @@ metadata:
|
|
412
412
|
changelog_uri: https://github.com/esotericpig/nhkore/blob/master/CHANGELOG.md
|
413
413
|
post_install_message: |
|
414
414
|
+=============================================================================+
|
415
|
-
| NHKore v0.3.
|
415
|
+
| NHKore v0.3.16
|
416
416
|
|
|
417
417
|
| You can now use [nhkore] on the command line.
|
418
418
|
|
|
@@ -425,7 +425,7 @@ rdoc_options:
|
|
425
425
|
- "--hyperlink-all"
|
426
426
|
- "--show-hash"
|
427
427
|
- "--title"
|
428
|
-
- NHKore v0.3.
|
428
|
+
- NHKore v0.3.16 Doc
|
429
429
|
- "--main"
|
430
430
|
- README.md
|
431
431
|
require_paths:
|