nhkore 0.3.13 → 0.3.14
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +10 -1
- data/Gemfile.lock +11 -11
- data/README.md +1 -1
- data/Rakefile +2 -2
- data/lib/nhkore/app.rb +6 -2
- data/lib/nhkore/cli/search_cmd.rb +45 -31
- data/lib/nhkore/dict_scraper.rb +2 -2
- data/lib/nhkore/version.rb +1 -1
- data/nhkore.gemspec +40 -37
- metadata +6 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5416a0276ff4d50ef49bac27caea9fc6c6629f1d7736a4429661f5c2d730d7fd
|
4
|
+
data.tar.gz: a82b809808ca9b0ae44db8718ea1259b261b7e9ad60292acd22697a02d0a6510
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e1702feb47ff9db7654306169b08cbc301b198dd047e88667070dab34f4044e037fca1ca45a8e6e35aa2cb08d90fdfb22bab48456ae3a8edac8878a1fc1d89fa
|
7
|
+
data.tar.gz: 5893f8a805f5134a5a4fc5fd78233ae70e6196799a8278733f05ffa0cba2447cba18123b00e8a14820f30fba44ce481e03a99b0a55e0d225392865ff27008ec2
|
data/CHANGELOG.md
CHANGED
@@ -5,10 +5,19 @@ All notable changes to this project will be documented in this file.
|
|
5
5
|
Format is based on [Keep a Changelog v1.0.0](https://keepachangelog.com/en/1.0.0),
|
6
6
|
and this project adheres to [Semantic Versioning v2.0.0](https://semver.org/spec/v2.0.0.html).
|
7
7
|
|
8
|
-
## [[Unreleased]](https://github.com/esotericpig/nhkore/compare/v0.3.
|
8
|
+
## [[Unreleased]](https://github.com/esotericpig/nhkore/compare/v0.3.14...HEAD)
|
9
9
|
-
|
10
10
|
|
11
11
|
|
12
|
+
## [v0.3.14] - 2022-07-24
|
13
|
+
|
14
|
+
### Added
|
15
|
+
- `--loop` option to `search` command so can run web search (search engine) multiple times since this usually doesn't get all results if only do once.
|
16
|
+
|
17
|
+
### Fixed
|
18
|
+
- Updated gems (`nokogiri`).
|
19
|
+
|
20
|
+
|
12
21
|
## [v0.3.13] - 2022-04-27
|
13
22
|
|
14
23
|
### Fixed
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
nhkore (0.3.
|
4
|
+
nhkore (0.3.14)
|
5
5
|
attr_bool (~> 0.2)
|
6
6
|
bimyou_segmenter (~> 1.2)
|
7
7
|
cri (~> 2.15)
|
@@ -32,15 +32,15 @@ GEM
|
|
32
32
|
down (5.3.1)
|
33
33
|
addressable (~> 2.8)
|
34
34
|
highline (2.0.3)
|
35
|
-
http-cookie (1.0.
|
35
|
+
http-cookie (1.0.5)
|
36
36
|
domain_name (~> 0.5)
|
37
37
|
japanese_deinflector (0.0.2)
|
38
38
|
mini_portile2 (2.8.0)
|
39
|
-
minitest (5.
|
40
|
-
nokogiri (1.13.
|
39
|
+
minitest (5.16.2)
|
40
|
+
nokogiri (1.13.8)
|
41
41
|
mini_portile2 (~> 2.8.0)
|
42
42
|
racc (~> 1.4)
|
43
|
-
psych (4.0.
|
43
|
+
psych (4.0.4)
|
44
44
|
stringio
|
45
45
|
psychgus (1.3.4)
|
46
46
|
psych (>= 3.0)
|
@@ -57,7 +57,7 @@ GEM
|
|
57
57
|
rss (0.2.9)
|
58
58
|
rexml
|
59
59
|
rubyzip (2.3.2)
|
60
|
-
stringio (3.0.
|
60
|
+
stringio (3.0.2)
|
61
61
|
strings-ansi (0.2.0)
|
62
62
|
tiny_segmenter (0.0.6)
|
63
63
|
tty-cursor (0.7.1)
|
@@ -71,10 +71,10 @@ GEM
|
|
71
71
|
tty-cursor (~> 0.7)
|
72
72
|
unf (0.1.4)
|
73
73
|
unf_ext
|
74
|
-
unf_ext (0.0.8.
|
75
|
-
unicode-display_width (2.
|
74
|
+
unf_ext (0.0.8.2)
|
75
|
+
unicode-display_width (2.2.0)
|
76
76
|
webrick (1.7.0)
|
77
|
-
yard (0.9.
|
77
|
+
yard (0.9.28)
|
78
78
|
webrick (~> 1.7.0)
|
79
79
|
yard_ghurt (1.2.1)
|
80
80
|
rake
|
@@ -85,7 +85,7 @@ PLATFORMS
|
|
85
85
|
|
86
86
|
DEPENDENCIES
|
87
87
|
bundler (~> 2.3)
|
88
|
-
minitest (~> 5.
|
88
|
+
minitest (~> 5.16)
|
89
89
|
nhkore!
|
90
90
|
rake (~> 13.0)
|
91
91
|
raketeer (~> 0.2)
|
@@ -95,4 +95,4 @@ DEPENDENCIES
|
|
95
95
|
yard_ghurt (~> 1.2)
|
96
96
|
|
97
97
|
BUNDLED WITH
|
98
|
-
2.3.
|
98
|
+
2.3.18
|
data/README.md
CHANGED
@@ -885,7 +885,7 @@ Releasing new HTML file for website:
|
|
885
885
|
[GNU LGPL v3+](LICENSE.txt)
|
886
886
|
|
887
887
|
> NHKore (<https://github.com/esotericpig/nhkore>)
|
888
|
-
> Copyright (c) 2020-
|
888
|
+
> Copyright (c) 2020-2022 Jonathan Bradley Whited
|
889
889
|
>
|
890
890
|
> NHKore is free software: you can redistribute it and/or modify
|
891
891
|
> it under the terms of the GNU Lesser General Public License as published by
|
data/Rakefile
CHANGED
@@ -56,11 +56,11 @@ task :update_core do |task|
|
|
56
56
|
cmd = ['ruby','-w','./lib/nhkore.rb','-t','300','-m','10']
|
57
57
|
hl = HighLine.new
|
58
58
|
|
59
|
-
next unless sh(*cmd,'se','ez','bing')
|
59
|
+
next unless sh(*cmd,'se','-l','10','ez','bing')
|
60
60
|
next unless hl.agree(continue_msg)
|
61
61
|
puts
|
62
62
|
|
63
|
-
next unless sh(*cmd,'news','-s','
|
63
|
+
next unless sh(*cmd,'news','-s','1000','ez')
|
64
64
|
next unless hl.agree(continue_msg)
|
65
65
|
puts
|
66
66
|
|
data/lib/nhkore/app.rb
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
|
4
4
|
#--
|
5
5
|
# This file is part of NHKore.
|
6
|
-
# Copyright (c) 2020-
|
6
|
+
# Copyright (c) 2020-2022 Jonathan Bradley Whited
|
7
7
|
#
|
8
8
|
# SPDX-License-Identifier: LGPL-3.0-or-later
|
9
9
|
#++
|
@@ -537,7 +537,11 @@ module NHKore
|
|
537
537
|
end
|
538
538
|
|
539
539
|
def sleep_scraper
|
540
|
-
|
540
|
+
# Do a range to better emulate being a human.
|
541
|
+
r = rand(@sleep_time..(@sleep_time + 0.1111))
|
542
|
+
s = r.round(3) # Within 1000ms (0.000 - 0.999).
|
543
|
+
|
544
|
+
sleep(s)
|
541
545
|
end
|
542
546
|
|
543
547
|
def start_spin(title,detail: '')
|
@@ -3,7 +3,7 @@
|
|
3
3
|
|
4
4
|
#--
|
5
5
|
# This file is part of NHKore.
|
6
|
-
# Copyright (c) 2020-
|
6
|
+
# Copyright (c) 2020-2022 Jonathan Bradley Whited
|
7
7
|
#
|
8
8
|
# SPDX-License-Identifier: LGPL-3.0-or-later
|
9
9
|
#++
|
@@ -42,6 +42,12 @@ module CLI
|
|
42
42
|
DESC
|
43
43
|
app.check_empty_opt(:in,value)
|
44
44
|
}
|
45
|
+
option :l,:loop,'number of times to repeat the search to ensure results',argument: :required,
|
46
|
+
transform: lambda { |value|
|
47
|
+
value = value.to_i
|
48
|
+
value = 1 if value < 1
|
49
|
+
value
|
50
|
+
}
|
45
51
|
option :o,:out,<<-DESC,argument: :required,transform: lambda { |value|
|
46
52
|
'directory/file' to save links to; if you only specify a directory or a file, it will attach the
|
47
53
|
appropriate default directory/file name
|
@@ -164,6 +170,8 @@ module CLI
|
|
164
170
|
|
165
171
|
dry_run = @cmd_opts[:dry_run]
|
166
172
|
in_file = @cmd_opts[:in]
|
173
|
+
loop_times = @cmd_opts[:loop]
|
174
|
+
loop_times = 1 if loop_times.nil? || loop_times < 1
|
167
175
|
out_file = @cmd_opts[:out]
|
168
176
|
result_count = @cmd_opts[:results]
|
169
177
|
result_count = SearchScraper::DEFAULT_RESULT_COUNT if result_count.nil?
|
@@ -174,9 +182,6 @@ module CLI
|
|
174
182
|
is_file = !in_file.nil?
|
175
183
|
links = nil
|
176
184
|
new_links = [] # For --dry-run
|
177
|
-
next_page = NextPage.new
|
178
|
-
page_count = 0
|
179
|
-
page_num = 1
|
180
185
|
url = in_file # nil will use default URL, else a file
|
181
186
|
|
182
187
|
# Load previous links for 'scraped?' vars.
|
@@ -196,43 +201,52 @@ module CLI
|
|
196
201
|
end
|
197
202
|
|
198
203
|
puts "#{scraped_count} of #{links_count} links scraped."
|
199
|
-
|
200
204
|
return
|
201
205
|
end
|
202
206
|
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
207
|
+
1.upto(loop_times) do |loop_i|
|
208
|
+
page_range = (0..10_000) # Do a range to prevent an infinite loop; ichiman!
|
209
|
+
|
210
|
+
next_page = NextPage.new
|
211
|
+
page_count = 0
|
212
|
+
page_num = 1
|
213
|
+
|
214
|
+
case search_type
|
215
|
+
# Anything that extends SearchScraper.
|
216
|
+
when :bing
|
217
|
+
page_range.each do
|
218
|
+
scraper = nil
|
219
|
+
|
220
|
+
case search_type
|
221
|
+
when :bing
|
222
|
+
scraper = BingScraper.new(
|
223
|
+
nhk_type,count: result_count,is_file: is_file,url: url,**@scraper_kargs
|
224
|
+
)
|
225
|
+
else
|
226
|
+
raise NHKore::Error,"internal code broken; add missing search_type[#{search_type}]"
|
227
|
+
end
|
217
228
|
|
218
|
-
|
229
|
+
next_page = scraper.scrape(links,next_page)
|
219
230
|
|
220
|
-
|
221
|
-
|
222
|
-
|
231
|
+
new_links.concat(links.links.values[links_count..])
|
232
|
+
links_count = links.length
|
233
|
+
page_count = next_page.count if next_page.count > 0
|
223
234
|
|
224
|
-
|
225
|
-
|
235
|
+
update_spin_detail(
|
236
|
+
format(' (%d/%d, page=%d, count=%d, links=%d, new_links=%d)',
|
237
|
+
loop_i,loop_times,page_num,page_count,links.length,new_links.length)
|
238
|
+
)
|
226
239
|
|
227
|
-
|
240
|
+
break if next_page.empty?
|
228
241
|
|
229
|
-
|
230
|
-
|
242
|
+
page_num += 1
|
243
|
+
url = next_page.url
|
231
244
|
|
232
|
-
|
245
|
+
sleep_scraper
|
246
|
+
end
|
247
|
+
else
|
248
|
+
raise ArgumentError,"invalid search_type[#{search_type}]"
|
233
249
|
end
|
234
|
-
else
|
235
|
-
raise ArgumentError,"invalid search_type[#{search_type}]"
|
236
250
|
end
|
237
251
|
|
238
252
|
stop_spin
|
data/lib/nhkore/dict_scraper.rb
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
|
4
4
|
#--
|
5
5
|
# This file is part of NHKore.
|
6
|
-
# Copyright (c) 2020-
|
6
|
+
# Copyright (c) 2020-2022 Jonathan Bradley Whited
|
7
7
|
#
|
8
8
|
# SPDX-License-Identifier: LGPL-3.0-or-later
|
9
9
|
#++
|
@@ -39,7 +39,7 @@ module NHKore
|
|
39
39
|
i = url.rindex(%r{[/\\]}) # Can be a URL or a file
|
40
40
|
i = i.nil? ? 0 : (i + 1) # If no match found, no path
|
41
41
|
|
42
|
-
basename = File.basename(url[i
|
42
|
+
basename = File.basename(url[i..],'.*') if basename.nil?
|
43
43
|
path = url[0...i]
|
44
44
|
|
45
45
|
return "#{path}#{basename}.out.dic"
|
data/lib/nhkore/version.rb
CHANGED
data/nhkore.gemspec
CHANGED
@@ -26,48 +26,57 @@ Gem::Specification.new do |spec|
|
|
26
26
|
#'mailing_list_uri' => '',
|
27
27
|
}
|
28
28
|
|
29
|
+
spec.required_ruby_version = '>= 2.5'
|
29
30
|
spec.requirements = [
|
30
31
|
'Nokogiri: https://www.nokogiri.org/tutorials/installing_nokogiri.html',
|
31
32
|
]
|
32
33
|
|
33
|
-
spec.
|
34
|
-
spec.
|
35
|
-
spec.
|
36
|
-
|
34
|
+
spec.require_paths = ['lib']
|
35
|
+
spec.bindir = 'bin'
|
36
|
+
spec.executables = [spec.name]
|
37
|
+
|
38
|
+
spec.extra_rdoc_files = %w[ CHANGELOG.md LICENSE.txt README.md ]
|
39
|
+
spec.rdoc_options = [
|
40
|
+
'--hyperlink-all','--show-hash',
|
41
|
+
'--title',"NHKore v#{NHKore::VERSION} Doc",
|
42
|
+
'--main','README.md',
|
43
|
+
]
|
37
44
|
|
38
45
|
spec.files = [
|
39
46
|
Dir.glob(File.join("{#{spec.require_paths.join(',')}}",'**','*.{erb,rb}')),
|
40
47
|
Dir.glob(File.join(spec.bindir,'*')),
|
41
48
|
Dir.glob(File.join('{samples,test,yard}','**','*.{erb,rb}')),
|
42
49
|
%W[ Gemfile Gemfile.lock #{spec.name}.gemspec Rakefile .yardopts ],
|
43
|
-
|
50
|
+
spec.extra_rdoc_files,
|
44
51
|
].flatten
|
45
52
|
|
46
|
-
spec.add_runtime_dependency
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
53
|
+
run_dep = spec.method(:add_runtime_dependency)
|
54
|
+
run_dep[ 'attr_bool' ,'~> 0.2' ] # attr_accessor?/attr_reader?.
|
55
|
+
run_dep[ 'bimyou_segmenter' ,'~> 1.2' ] # Splitting Japanese sentences into words.
|
56
|
+
run_dep[ 'cri' ,'~> 2.15' ] # CLI commands/options.
|
57
|
+
run_dep[ 'down' ,'~> 5.3' ] # Downloading files (GetCmd).
|
58
|
+
run_dep[ 'highline' ,'~> 2.0' ] # CLI input/output.
|
59
|
+
run_dep[ 'http-cookie' ,'~> 1.0' ] # Parsing/Setting cookies [(Bing)Scraper].
|
60
|
+
run_dep[ 'japanese_deinflector','~> 0.0' ] # Unconjugating Japanese words (dictionary form).
|
61
|
+
run_dep[ 'nokogiri' ,'~> 1.13' ] # Scraping/Hacking.
|
62
|
+
run_dep[ 'psychgus' ,'~> 1.3' ] # Styling Psych YAML.
|
63
|
+
run_dep[ 'public_suffix' ,'~> 4.0' ] # Parsing URL domain names.
|
64
|
+
run_dep[ 'rainbow' ,'~> 3.1' ] # CLI color output.
|
65
|
+
run_dep[ 'rss' ,'~> 0.2' ] # Scraping [(Bing)Scraper].
|
66
|
+
run_dep[ 'rubyzip' ,'~> 2.3' ] # Extracting Zip files (GetCmd).
|
67
|
+
run_dep[ 'tiny_segmenter' ,'~> 0.0' ] # Splitting Japanese sentences into words.
|
68
|
+
run_dep[ 'tty-progressbar' ,'~> 0.18' ] # CLI progress bars.
|
69
|
+
run_dep[ 'tty-spinner' ,'~> 0.9' ] # CLI spinning progress.
|
62
70
|
|
63
|
-
spec.add_development_dependency
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
+
dev_dep = spec.method(:add_development_dependency)
|
72
|
+
dev_dep[ 'bundler' ,'~> 2.3' ]
|
73
|
+
dev_dep[ 'minitest' ,'~> 5.16' ]
|
74
|
+
dev_dep[ 'rake' ,'~> 13.0' ]
|
75
|
+
dev_dep[ 'raketeer' ,'~> 0.2' ] # Extra Rake tasks.
|
76
|
+
dev_dep[ 'rdoc' ,'~> 6.4' ] # YARDoc RDoc (*.rb).
|
77
|
+
dev_dep[ 'redcarpet' ,'~> 3.5' ] # YARDoc Markdown (*.md).
|
78
|
+
dev_dep[ 'yard' ,'~> 0.9' ] # Doc.
|
79
|
+
dev_dep[ 'yard_ghurt','~> 1.2' ] # Extra YARDoc Rake tasks.
|
71
80
|
|
72
81
|
spec.post_install_message = <<~MSG
|
73
82
|
+=============================================================================+
|
@@ -81,13 +90,7 @@ Gem::Specification.new do |spec|
|
|
81
90
|
| Changelog: #{spec.metadata['changelog_uri']}
|
82
91
|
+=============================================================================+
|
83
92
|
MSG
|
84
|
-
#puts spec.post_install_message.split("\n").map(&:length).max
|
85
|
-
|
86
|
-
spec.extra_rdoc_files = %w[ CHANGELOG.md LICENSE.txt README.md ]
|
87
93
|
|
88
|
-
|
89
|
-
|
90
|
-
'--title',"NHKore v#{NHKore::VERSION} Doc",
|
91
|
-
'--main','README.md',
|
92
|
-
]
|
94
|
+
# Uncomment to see max line length:
|
95
|
+
#puts spec.post_install_message.split("\n").map(&:length).max
|
93
96
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nhkore
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.14
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonathan Bradley Whited
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-07-25 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: attr_bool
|
@@ -254,14 +254,14 @@ dependencies:
|
|
254
254
|
requirements:
|
255
255
|
- - "~>"
|
256
256
|
- !ruby/object:Gem::Version
|
257
|
-
version: '5.
|
257
|
+
version: '5.16'
|
258
258
|
type: :development
|
259
259
|
prerelease: false
|
260
260
|
version_requirements: !ruby/object:Gem::Requirement
|
261
261
|
requirements:
|
262
262
|
- - "~>"
|
263
263
|
- !ruby/object:Gem::Version
|
264
|
-
version: '5.
|
264
|
+
version: '5.16'
|
265
265
|
- !ruby/object:Gem::Dependency
|
266
266
|
name: rake
|
267
267
|
requirement: !ruby/object:Gem::Requirement
|
@@ -412,7 +412,7 @@ metadata:
|
|
412
412
|
changelog_uri: https://github.com/esotericpig/nhkore/blob/master/CHANGELOG.md
|
413
413
|
post_install_message: |
|
414
414
|
+=============================================================================+
|
415
|
-
| NHKore v0.3.
|
415
|
+
| NHKore v0.3.14
|
416
416
|
|
|
417
417
|
| You can now use [nhkore] on the command line.
|
418
418
|
|
|
@@ -425,7 +425,7 @@ rdoc_options:
|
|
425
425
|
- "--hyperlink-all"
|
426
426
|
- "--show-hash"
|
427
427
|
- "--title"
|
428
|
-
- NHKore v0.3.
|
428
|
+
- NHKore v0.3.14 Doc
|
429
429
|
- "--main"
|
430
430
|
- README.md
|
431
431
|
require_paths:
|