nhkore 0.3.13 → 0.3.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 21663a8ce4850b7f03361289832cfdb6caa5bc64e62af1ff7cd4b91b7fc2329b
4
- data.tar.gz: de45874fc3834c492ea74ad26067504629185db39fff2f42148a24b4be453cc9
3
+ metadata.gz: 5416a0276ff4d50ef49bac27caea9fc6c6629f1d7736a4429661f5c2d730d7fd
4
+ data.tar.gz: a82b809808ca9b0ae44db8718ea1259b261b7e9ad60292acd22697a02d0a6510
5
5
  SHA512:
6
- metadata.gz: 0ea9413c534cb11d60764e6dd95473b65e6a76418b28ca69441c18741cd6920992f0289a6d9f5c39ed0124023296992f7a60696099ce056b009cadcf8e863867
7
- data.tar.gz: a223dd3e9416b5487274e3218fe55e6c936019a4faf58343a6c4fc9a3bc69c55805c08f42411337ce672766cd72934c423aff734bce766273e9a5707d356eceb
6
+ metadata.gz: e1702feb47ff9db7654306169b08cbc301b198dd047e88667070dab34f4044e037fca1ca45a8e6e35aa2cb08d90fdfb22bab48456ae3a8edac8878a1fc1d89fa
7
+ data.tar.gz: 5893f8a805f5134a5a4fc5fd78233ae70e6196799a8278733f05ffa0cba2447cba18123b00e8a14820f30fba44ce481e03a99b0a55e0d225392865ff27008ec2
data/CHANGELOG.md CHANGED
@@ -5,10 +5,19 @@ All notable changes to this project will be documented in this file.
5
5
  Format is based on [Keep a Changelog v1.0.0](https://keepachangelog.com/en/1.0.0),
6
6
  and this project adheres to [Semantic Versioning v2.0.0](https://semver.org/spec/v2.0.0.html).
7
7
 
8
- ## [[Unreleased]](https://github.com/esotericpig/nhkore/compare/v0.3.13...HEAD)
8
+ ## [[Unreleased]](https://github.com/esotericpig/nhkore/compare/v0.3.14...HEAD)
9
9
  -
10
10
 
11
11
 
12
+ ## [v0.3.14] - 2022-07-24
13
+
14
+ ### Added
15
+ - `--loop` option to `search` command so can run web search (search engine) multiple times since this usually doesn't get all results if only do once.
16
+
17
+ ### Fixed
18
+ - Updated gems (`nokogiri`).
19
+
20
+
12
21
  ## [v0.3.13] - 2022-04-27
13
22
 
14
23
  ### Fixed
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- nhkore (0.3.13)
4
+ nhkore (0.3.14)
5
5
  attr_bool (~> 0.2)
6
6
  bimyou_segmenter (~> 1.2)
7
7
  cri (~> 2.15)
@@ -32,15 +32,15 @@ GEM
32
32
  down (5.3.1)
33
33
  addressable (~> 2.8)
34
34
  highline (2.0.3)
35
- http-cookie (1.0.4)
35
+ http-cookie (1.0.5)
36
36
  domain_name (~> 0.5)
37
37
  japanese_deinflector (0.0.2)
38
38
  mini_portile2 (2.8.0)
39
- minitest (5.15.0)
40
- nokogiri (1.13.4)
39
+ minitest (5.16.2)
40
+ nokogiri (1.13.8)
41
41
  mini_portile2 (~> 2.8.0)
42
42
  racc (~> 1.4)
43
- psych (4.0.3)
43
+ psych (4.0.4)
44
44
  stringio
45
45
  psychgus (1.3.4)
46
46
  psych (>= 3.0)
@@ -57,7 +57,7 @@ GEM
57
57
  rss (0.2.9)
58
58
  rexml
59
59
  rubyzip (2.3.2)
60
- stringio (3.0.1)
60
+ stringio (3.0.2)
61
61
  strings-ansi (0.2.0)
62
62
  tiny_segmenter (0.0.6)
63
63
  tty-cursor (0.7.1)
@@ -71,10 +71,10 @@ GEM
71
71
  tty-cursor (~> 0.7)
72
72
  unf (0.1.4)
73
73
  unf_ext
74
- unf_ext (0.0.8.1)
75
- unicode-display_width (2.1.0)
74
+ unf_ext (0.0.8.2)
75
+ unicode-display_width (2.2.0)
76
76
  webrick (1.7.0)
77
- yard (0.9.27)
77
+ yard (0.9.28)
78
78
  webrick (~> 1.7.0)
79
79
  yard_ghurt (1.2.1)
80
80
  rake
@@ -85,7 +85,7 @@ PLATFORMS
85
85
 
86
86
  DEPENDENCIES
87
87
  bundler (~> 2.3)
88
- minitest (~> 5.15)
88
+ minitest (~> 5.16)
89
89
  nhkore!
90
90
  rake (~> 13.0)
91
91
  raketeer (~> 0.2)
@@ -95,4 +95,4 @@ DEPENDENCIES
95
95
  yard_ghurt (~> 1.2)
96
96
 
97
97
  BUNDLED WITH
98
- 2.3.12
98
+ 2.3.18
data/README.md CHANGED
@@ -885,7 +885,7 @@ Releasing new HTML file for website:
885
885
  [GNU LGPL v3+](LICENSE.txt)
886
886
 
887
887
  > NHKore (<https://github.com/esotericpig/nhkore>)
888
- > Copyright (c) 2020-2021 Jonathan Bradley Whited
888
+ > Copyright (c) 2020-2022 Jonathan Bradley Whited
889
889
  >
890
890
  > NHKore is free software: you can redistribute it and/or modify
891
891
  > it under the terms of the GNU Lesser General Public License as published by
data/Rakefile CHANGED
@@ -56,11 +56,11 @@ task :update_core do |task|
56
56
  cmd = ['ruby','-w','./lib/nhkore.rb','-t','300','-m','10']
57
57
  hl = HighLine.new
58
58
 
59
- next unless sh(*cmd,'se','ez','bing')
59
+ next unless sh(*cmd,'se','-l','10','ez','bing')
60
60
  next unless hl.agree(continue_msg)
61
61
  puts
62
62
 
63
- next unless sh(*cmd,'news','-s','500','ez')
63
+ next unless sh(*cmd,'news','-s','1000','ez')
64
64
  next unless hl.agree(continue_msg)
65
65
  puts
66
66
 
data/lib/nhkore/app.rb CHANGED
@@ -3,7 +3,7 @@
3
3
 
4
4
  #--
5
5
  # This file is part of NHKore.
6
- # Copyright (c) 2020-2021 Jonathan Bradley Whited
6
+ # Copyright (c) 2020-2022 Jonathan Bradley Whited
7
7
  #
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
@@ -537,7 +537,11 @@ module NHKore
537
537
  end
538
538
 
539
539
  def sleep_scraper
540
- sleep(@sleep_time)
540
+ # Do a range to better emulate being a human.
541
+ r = rand(@sleep_time..(@sleep_time + 0.1111))
542
+ s = r.round(3) # Within 1000ms (0.000 - 0.999).
543
+
544
+ sleep(s)
541
545
  end
542
546
 
543
547
  def start_spin(title,detail: '')
@@ -3,7 +3,7 @@
3
3
 
4
4
  #--
5
5
  # This file is part of NHKore.
6
- # Copyright (c) 2020-2021 Jonathan Bradley Whited
6
+ # Copyright (c) 2020-2022 Jonathan Bradley Whited
7
7
  #
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
@@ -42,6 +42,12 @@ module CLI
42
42
  DESC
43
43
  app.check_empty_opt(:in,value)
44
44
  }
45
+ option :l,:loop,'number of times to repeat the search to ensure results',argument: :required,
46
+ transform: lambda { |value|
47
+ value = value.to_i
48
+ value = 1 if value < 1
49
+ value
50
+ }
45
51
  option :o,:out,<<-DESC,argument: :required,transform: lambda { |value|
46
52
  'directory/file' to save links to; if you only specify a directory or a file, it will attach the
47
53
  appropriate default directory/file name
@@ -164,6 +170,8 @@ module CLI
164
170
 
165
171
  dry_run = @cmd_opts[:dry_run]
166
172
  in_file = @cmd_opts[:in]
173
+ loop_times = @cmd_opts[:loop]
174
+ loop_times = 1 if loop_times.nil? || loop_times < 1
167
175
  out_file = @cmd_opts[:out]
168
176
  result_count = @cmd_opts[:results]
169
177
  result_count = SearchScraper::DEFAULT_RESULT_COUNT if result_count.nil?
@@ -174,9 +182,6 @@ module CLI
174
182
  is_file = !in_file.nil?
175
183
  links = nil
176
184
  new_links = [] # For --dry-run
177
- next_page = NextPage.new
178
- page_count = 0
179
- page_num = 1
180
185
  url = in_file # nil will use default URL, else a file
181
186
 
182
187
  # Load previous links for 'scraped?' vars.
@@ -196,43 +201,52 @@ module CLI
196
201
  end
197
202
 
198
203
  puts "#{scraped_count} of #{links_count} links scraped."
199
-
200
204
  return
201
205
  end
202
206
 
203
- range = (0..10_000) # Do a range to prevent an infinite loop; ichiman!
204
-
205
- case search_type
206
- # Anything that extends SearchScraper.
207
- when :bing
208
- range.each do
209
- scraper = nil
210
-
211
- case search_type
212
- when :bing
213
- scraper = BingScraper.new(nhk_type,count: result_count,is_file: is_file,url: url,**@scraper_kargs)
214
- else
215
- raise NHKore::Error,"internal code broken; add missing search_type[#{search_type}]"
216
- end
207
+ 1.upto(loop_times) do |loop_i|
208
+ page_range = (0..10_000) # Do a range to prevent an infinite loop; ichiman!
209
+
210
+ next_page = NextPage.new
211
+ page_count = 0
212
+ page_num = 1
213
+
214
+ case search_type
215
+ # Anything that extends SearchScraper.
216
+ when :bing
217
+ page_range.each do
218
+ scraper = nil
219
+
220
+ case search_type
221
+ when :bing
222
+ scraper = BingScraper.new(
223
+ nhk_type,count: result_count,is_file: is_file,url: url,**@scraper_kargs
224
+ )
225
+ else
226
+ raise NHKore::Error,"internal code broken; add missing search_type[#{search_type}]"
227
+ end
217
228
 
218
- next_page = scraper.scrape(links,next_page)
229
+ next_page = scraper.scrape(links,next_page)
219
230
 
220
- new_links.concat(links.links.values[links_count..-1])
221
- links_count = links.length
222
- page_count = next_page.count if next_page.count > 0
231
+ new_links.concat(links.links.values[links_count..])
232
+ links_count = links.length
233
+ page_count = next_page.count if next_page.count > 0
223
234
 
224
- update_spin_detail(" (page=#{page_num}, count=#{page_count}, links=#{links.length}," \
225
- " new_links=#{new_links.length})")
235
+ update_spin_detail(
236
+ format(' (%d/%d, page=%d, count=%d, links=%d, new_links=%d)',
237
+ loop_i,loop_times,page_num,page_count,links.length,new_links.length)
238
+ )
226
239
 
227
- break if next_page.empty?
240
+ break if next_page.empty?
228
241
 
229
- page_num += 1
230
- url = next_page.url
242
+ page_num += 1
243
+ url = next_page.url
231
244
 
232
- sleep_scraper
245
+ sleep_scraper
246
+ end
247
+ else
248
+ raise ArgumentError,"invalid search_type[#{search_type}]"
233
249
  end
234
- else
235
- raise ArgumentError,"invalid search_type[#{search_type}]"
236
250
  end
237
251
 
238
252
  stop_spin
@@ -3,7 +3,7 @@
3
3
 
4
4
  #--
5
5
  # This file is part of NHKore.
6
- # Copyright (c) 2020-2021 Jonathan Bradley Whited
6
+ # Copyright (c) 2020-2022 Jonathan Bradley Whited
7
7
  #
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
@@ -39,7 +39,7 @@ module NHKore
39
39
  i = url.rindex(%r{[/\\]}) # Can be a URL or a file
40
40
  i = i.nil? ? 0 : (i + 1) # If no match found, no path
41
41
 
42
- basename = File.basename(url[i..-1],'.*') if basename.nil?
42
+ basename = File.basename(url[i..],'.*') if basename.nil?
43
43
  path = url[0...i]
44
44
 
45
45
  return "#{path}#{basename}.out.dic"
@@ -10,5 +10,5 @@
10
10
 
11
11
 
12
12
  module NHKore
13
- VERSION = '0.3.13'
13
+ VERSION = '0.3.14'
14
14
  end
data/nhkore.gemspec CHANGED
@@ -26,48 +26,57 @@ Gem::Specification.new do |spec|
26
26
  #'mailing_list_uri' => '',
27
27
  }
28
28
 
29
+ spec.required_ruby_version = '>= 2.5'
29
30
  spec.requirements = [
30
31
  'Nokogiri: https://www.nokogiri.org/tutorials/installing_nokogiri.html',
31
32
  ]
32
33
 
33
- spec.required_ruby_version = '>= 2.5'
34
- spec.require_paths = ['lib']
35
- spec.bindir = 'bin'
36
- spec.executables = [spec.name]
34
+ spec.require_paths = ['lib']
35
+ spec.bindir = 'bin'
36
+ spec.executables = [spec.name]
37
+
38
+ spec.extra_rdoc_files = %w[ CHANGELOG.md LICENSE.txt README.md ]
39
+ spec.rdoc_options = [
40
+ '--hyperlink-all','--show-hash',
41
+ '--title',"NHKore v#{NHKore::VERSION} Doc",
42
+ '--main','README.md',
43
+ ]
37
44
 
38
45
  spec.files = [
39
46
  Dir.glob(File.join("{#{spec.require_paths.join(',')}}",'**','*.{erb,rb}')),
40
47
  Dir.glob(File.join(spec.bindir,'*')),
41
48
  Dir.glob(File.join('{samples,test,yard}','**','*.{erb,rb}')),
42
49
  %W[ Gemfile Gemfile.lock #{spec.name}.gemspec Rakefile .yardopts ],
43
- %w[ CHANGELOG.md LICENSE.txt README.md ],
50
+ spec.extra_rdoc_files,
44
51
  ].flatten
45
52
 
46
- spec.add_runtime_dependency 'attr_bool' ,'~> 0.2' # For attr_accessor?/attr_reader?
47
- spec.add_runtime_dependency 'bimyou_segmenter' ,'~> 1.2' # For splitting Japanese sentences into words
48
- spec.add_runtime_dependency 'cri' ,'~> 2.15' # For CLI commands/options
49
- spec.add_runtime_dependency 'down' ,'~> 5.3' # For downloading files (GetCmd)
50
- spec.add_runtime_dependency 'highline' ,'~> 2.0' # For CLI input/output
51
- spec.add_runtime_dependency 'http-cookie' ,'~> 1.0' # For parsing/setting cookies (BingScraper/Scraper)
52
- spec.add_runtime_dependency 'japanese_deinflector' ,'~> 0.0' # For unconjugating Japanese words (plain/dictionary form)
53
- spec.add_runtime_dependency 'nokogiri' ,'~> 1.13' # For scraping/hacking
54
- spec.add_runtime_dependency 'psychgus' ,'~> 1.3' # For styling Psych YAML
55
- spec.add_runtime_dependency 'public_suffix' ,'~> 4.0' # For parsing URL domain names
56
- spec.add_runtime_dependency 'rainbow' ,'~> 3.1' # For CLI color output
57
- spec.add_runtime_dependency 'rss' ,'~> 0.2' # For scraping (BingScraper/Scraper)
58
- spec.add_runtime_dependency 'rubyzip' ,'~> 2.3' # For extracting Zip files (GetCmd)
59
- spec.add_runtime_dependency 'tiny_segmenter' ,'~> 0.0' # For splitting Japanese sentences into words
60
- spec.add_runtime_dependency 'tty-progressbar' ,'~> 0.18' # For CLI progress bars
61
- spec.add_runtime_dependency 'tty-spinner' ,'~> 0.9' # For CLI spinning progress
53
+ run_dep = spec.method(:add_runtime_dependency)
54
+ run_dep[ 'attr_bool' ,'~> 0.2' ] # attr_accessor?/attr_reader?.
55
+ run_dep[ 'bimyou_segmenter' ,'~> 1.2' ] # Splitting Japanese sentences into words.
56
+ run_dep[ 'cri' ,'~> 2.15' ] # CLI commands/options.
57
+ run_dep[ 'down' ,'~> 5.3' ] # Downloading files (GetCmd).
58
+ run_dep[ 'highline' ,'~> 2.0' ] # CLI input/output.
59
+ run_dep[ 'http-cookie' ,'~> 1.0' ] # Parsing/Setting cookies [(Bing)Scraper].
60
+ run_dep[ 'japanese_deinflector','~> 0.0' ] # Unconjugating Japanese words (dictionary form).
61
+ run_dep[ 'nokogiri' ,'~> 1.13' ] # Scraping/Hacking.
62
+ run_dep[ 'psychgus' ,'~> 1.3' ] # Styling Psych YAML.
63
+ run_dep[ 'public_suffix' ,'~> 4.0' ] # Parsing URL domain names.
64
+ run_dep[ 'rainbow' ,'~> 3.1' ] # CLI color output.
65
+ run_dep[ 'rss' ,'~> 0.2' ] # Scraping [(Bing)Scraper].
66
+ run_dep[ 'rubyzip' ,'~> 2.3' ] # Extracting Zip files (GetCmd).
67
+ run_dep[ 'tiny_segmenter' ,'~> 0.0' ] # Splitting Japanese sentences into words.
68
+ run_dep[ 'tty-progressbar' ,'~> 0.18' ] # CLI progress bars.
69
+ run_dep[ 'tty-spinner' ,'~> 0.9' ] # CLI spinning progress.
62
70
 
63
- spec.add_development_dependency 'bundler' ,'~> 2.3'
64
- spec.add_development_dependency 'minitest' ,'~> 5.15'
65
- spec.add_development_dependency 'rake' ,'~> 13.0'
66
- spec.add_development_dependency 'raketeer' ,'~> 0.2' # For extra Rake tasks
67
- spec.add_development_dependency 'rdoc' ,'~> 6.4' # For YARDoc RDoc (*.rb)
68
- spec.add_development_dependency 'redcarpet' ,'~> 3.5' # For YARDoc Markdown (*.md)
69
- spec.add_development_dependency 'yard' ,'~> 0.9' # For documentation
70
- spec.add_development_dependency 'yard_ghurt','~> 1.2' # For extra YARDoc Rake tasks
71
+ dev_dep = spec.method(:add_development_dependency)
72
+ dev_dep[ 'bundler' ,'~> 2.3' ]
73
+ dev_dep[ 'minitest' ,'~> 5.16' ]
74
+ dev_dep[ 'rake' ,'~> 13.0' ]
75
+ dev_dep[ 'raketeer' ,'~> 0.2' ] # Extra Rake tasks.
76
+ dev_dep[ 'rdoc' ,'~> 6.4' ] # YARDoc RDoc (*.rb).
77
+ dev_dep[ 'redcarpet' ,'~> 3.5' ] # YARDoc Markdown (*.md).
78
+ dev_dep[ 'yard' ,'~> 0.9' ] # Doc.
79
+ dev_dep[ 'yard_ghurt','~> 1.2' ] # Extra YARDoc Rake tasks.
71
80
 
72
81
  spec.post_install_message = <<~MSG
73
82
  +=============================================================================+
@@ -81,13 +90,7 @@ Gem::Specification.new do |spec|
81
90
  | Changelog: #{spec.metadata['changelog_uri']}
82
91
  +=============================================================================+
83
92
  MSG
84
- #puts spec.post_install_message.split("\n").map(&:length).max
85
-
86
- spec.extra_rdoc_files = %w[ CHANGELOG.md LICENSE.txt README.md ]
87
93
 
88
- spec.rdoc_options = [
89
- '--hyperlink-all','--show-hash',
90
- '--title',"NHKore v#{NHKore::VERSION} Doc",
91
- '--main','README.md',
92
- ]
94
+ # Uncomment to see max line length:
95
+ #puts spec.post_install_message.split("\n").map(&:length).max
93
96
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nhkore
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.13
4
+ version: 0.3.14
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jonathan Bradley Whited
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2022-04-27 00:00:00.000000000 Z
11
+ date: 2022-07-25 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: attr_bool
@@ -254,14 +254,14 @@ dependencies:
254
254
  requirements:
255
255
  - - "~>"
256
256
  - !ruby/object:Gem::Version
257
- version: '5.15'
257
+ version: '5.16'
258
258
  type: :development
259
259
  prerelease: false
260
260
  version_requirements: !ruby/object:Gem::Requirement
261
261
  requirements:
262
262
  - - "~>"
263
263
  - !ruby/object:Gem::Version
264
- version: '5.15'
264
+ version: '5.16'
265
265
  - !ruby/object:Gem::Dependency
266
266
  name: rake
267
267
  requirement: !ruby/object:Gem::Requirement
@@ -412,7 +412,7 @@ metadata:
412
412
  changelog_uri: https://github.com/esotericpig/nhkore/blob/master/CHANGELOG.md
413
413
  post_install_message: |
414
414
  +=============================================================================+
415
- | NHKore v0.3.13
415
+ | NHKore v0.3.14
416
416
  |
417
417
  | You can now use [nhkore] on the command line.
418
418
  |
@@ -425,7 +425,7 @@ rdoc_options:
425
425
  - "--hyperlink-all"
426
426
  - "--show-hash"
427
427
  - "--title"
428
- - NHKore v0.3.13 Doc
428
+ - NHKore v0.3.14 Doc
429
429
  - "--main"
430
430
  - README.md
431
431
  require_paths: