nhkore 0.3.17 → 0.3.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.yardopts +1 -1
- data/CHANGELOG.md +24 -1
- data/Gemfile +14 -1
- data/Gemfile.lock +29 -29
- data/README.md +2 -7
- data/Rakefile +19 -52
- data/bin/nhkore +1 -3
- data/lib/nhkore/app.rb +41 -46
- data/lib/nhkore/article.rb +9 -11
- data/lib/nhkore/article_scraper.rb +30 -29
- data/lib/nhkore/cleaner.rb +1 -3
- data/lib/nhkore/cli/fx_cmd.rb +17 -22
- data/lib/nhkore/cli/get_cmd.rb +5 -7
- data/lib/nhkore/cli/news_cmd.rb +14 -19
- data/lib/nhkore/cli/search_cmd.rb +11 -14
- data/lib/nhkore/cli/sift_cmd.rb +13 -15
- data/lib/nhkore/datetime_parser.rb +35 -37
- data/lib/nhkore/defn.rb +2 -4
- data/lib/nhkore/dict.rb +1 -3
- data/lib/nhkore/dict_scraper.rb +1 -3
- data/lib/nhkore/entry.rb +1 -3
- data/lib/nhkore/error.rb +1 -2
- data/lib/nhkore/fileable.rb +1 -2
- data/lib/nhkore/lib.rb +5 -12
- data/lib/nhkore/missingno.rb +1 -3
- data/lib/nhkore/news.rb +7 -10
- data/lib/nhkore/polisher.rb +1 -3
- data/lib/nhkore/scraper.rb +23 -13
- data/lib/nhkore/search_link.rb +11 -13
- data/lib/nhkore/search_scraper.rb +26 -15
- data/lib/nhkore/sifter.rb +7 -9
- data/lib/nhkore/splitter.rb +1 -3
- data/lib/nhkore/util.rb +8 -8
- data/lib/nhkore/variator.rb +3 -4
- data/lib/nhkore/version.rb +2 -3
- data/lib/nhkore/word.rb +8 -10
- data/lib/nhkore.rb +3 -11
- data/nhkore.gemspec +41 -47
- data/samples/looper.rb +1 -2
- data/test/nhkore/test_helper.rb +1 -8
- data/test/nhkore_test.rb +5 -9
- metadata +55 -139
- data/lib/nhkore/user_agents.rb +0 -1172
- data/yard/templates/default/layout/html/footer.erb +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a63aff9e86de6a678a4b4ad3defa9bf7d28577dabad0ef0a69747ddce1224219
|
4
|
+
data.tar.gz: 12668a0e95694198c928644d51be450c485c0947f5741dd01ab2ff8e20b0e760
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4c7981a84189176febf8156251b850e604ef1c447e1c78d0a8af5a4a509b68c0cf564b54e7cfac4582d1c572a50da0f05f9e268b19e4ac9d170ae0806688f68d
|
7
|
+
data.tar.gz: fd540fd9952b5377fcf4db72e4a5e71724d0a8d20ae256ecc4bb8293c672cd0fea1284e2639444fe8fbd892859fbac85ebc66fbabe96c51fdd60322e96f8419a
|
data/.yardopts
CHANGED
data/CHANGELOG.md
CHANGED
@@ -5,10 +5,33 @@ All notable changes to this project will be documented in this file.
|
|
5
5
|
Format is based on [Keep a Changelog v1.0.0](https://keepachangelog.com/en/1.0.0),
|
6
6
|
and this project adheres to [Semantic Versioning v2.0.0](https://semver.org/spec/v2.0.0.html).
|
7
7
|
|
8
|
-
## [[Unreleased]](https://github.com/esotericpig/nhkore/compare/v0.3.
|
8
|
+
## [[Unreleased]](https://github.com/esotericpig/nhkore/compare/v0.3.19...HEAD)
|
9
9
|
-
|
10
10
|
|
11
11
|
|
12
|
+
## [v0.3.19] - 2025-04-28
|
13
|
+
|
14
|
+
### Fixed
|
15
|
+
- Fixed to include `fileutils` for `news` cmd when directory doesn't exist.
|
16
|
+
|
17
|
+
### Changed
|
18
|
+
- Removed `UserAgents`. Replaced with Gem `ronin-web-user_agents`.
|
19
|
+
- Made some changes to `BingScraper`. Ultimately, it just doesn't work anymore, as Bing has become too strict. In the future, need to use a different search engine or a different way. Leaving for now as a zombie, and unnecessary for most NHKore functionality anyway.
|
20
|
+
|
21
|
+
|
22
|
+
## [v0.3.18] - 2025-04-24
|
23
|
+
|
24
|
+
### Fixed
|
25
|
+
- Bing no longer allows `count`, so removed it. No workaround/fix for now....
|
26
|
+
- New NHK Easy pages no longer have a dictionary, so changed it to only warn instead of an exception.
|
27
|
+
|
28
|
+
### Changed
|
29
|
+
- Changed Nokogiri gem version to `~> 1`, instead of `~> 1.xx`, as I got tired of the security alerts. Now, it will always be up-to-date and secure.
|
30
|
+
- Updated gems.
|
31
|
+
- Removed `yard_ghurt` gem.
|
32
|
+
- Applied new RuboCop suggestions.
|
33
|
+
|
34
|
+
|
12
35
|
## [v0.3.17] - 2024-09-03
|
13
36
|
|
14
37
|
### Changed
|
data/Gemfile
CHANGED
@@ -1,7 +1,20 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
# frozen_string_literal: true
|
3
3
|
|
4
|
-
|
5
4
|
source 'https://rubygems.org'
|
6
5
|
|
7
6
|
gemspec
|
7
|
+
|
8
|
+
group :development,:test do
|
9
|
+
gem 'bundler' ,'~> 2.6'
|
10
|
+
gem 'rake' ,'~> 13.2'
|
11
|
+
gem 'raketeer' ,'~> 0.2' # Extra Rake tasks.
|
12
|
+
# Doc.
|
13
|
+
gem 'rdoc' ,'~> 6.13' # YARDoc RDoc (*.rb).
|
14
|
+
gem 'redcarpet','~> 3.6' # YARDoc Markdown (*.md).
|
15
|
+
gem 'yard' ,'~> 0.9' # YARDoc doc.
|
16
|
+
end
|
17
|
+
|
18
|
+
group :test do
|
19
|
+
gem 'minitest','~> 5.25'
|
20
|
+
end
|
data/Gemfile.lock
CHANGED
@@ -1,20 +1,22 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
nhkore (0.3.
|
4
|
+
nhkore (0.3.19)
|
5
5
|
attr_bool (~> 0.2)
|
6
6
|
bimyou_segmenter (~> 1.2)
|
7
7
|
cri (~> 2.15)
|
8
|
+
csv (~> 3.3)
|
8
9
|
down (~> 5.4)
|
9
10
|
highline (~> 3.1)
|
10
11
|
http-cookie (~> 1.0)
|
11
12
|
japanese_deinflector (~> 0.0)
|
12
|
-
nokogiri (~> 1
|
13
|
+
nokogiri (~> 1)
|
13
14
|
psychgus (~> 1.3)
|
14
15
|
public_suffix (~> 6.0)
|
15
16
|
rainbow (~> 3.1)
|
17
|
+
ronin-web-user_agents (~> 0.1)
|
16
18
|
rss (~> 0.3)
|
17
|
-
rubyzip (~> 2.
|
19
|
+
rubyzip (~> 2.4)
|
18
20
|
tiny_segmenter (~> 0.0)
|
19
21
|
tty-progressbar (~> 0.18)
|
20
22
|
tty-spinner (~> 0.9)
|
@@ -27,23 +29,26 @@ GEM
|
|
27
29
|
attr_bool (0.2.2)
|
28
30
|
bimyou_segmenter (1.2.0)
|
29
31
|
cri (2.15.12)
|
32
|
+
csv (3.3.4)
|
33
|
+
date (3.4.1)
|
30
34
|
domain_name (0.6.20240107)
|
31
35
|
down (5.4.2)
|
32
36
|
addressable (~> 2.8)
|
33
|
-
highline (3.1.
|
37
|
+
highline (3.1.2)
|
34
38
|
reline
|
35
|
-
http-cookie (1.0.
|
39
|
+
http-cookie (1.0.8)
|
36
40
|
domain_name (~> 0.5)
|
37
|
-
io-console (0.
|
41
|
+
io-console (0.8.0)
|
38
42
|
japanese_deinflector (0.0.2)
|
39
|
-
mini_portile2 (2.8.
|
40
|
-
minitest (5.25.
|
41
|
-
nokogiri (1.
|
43
|
+
mini_portile2 (2.8.8)
|
44
|
+
minitest (5.25.5)
|
45
|
+
nokogiri (1.18.8)
|
42
46
|
mini_portile2 (~> 2.8.2)
|
43
47
|
racc (~> 1.4)
|
44
|
-
psych (5.
|
48
|
+
psych (5.2.3)
|
49
|
+
date
|
45
50
|
stringio
|
46
|
-
psychgus (1.3.
|
51
|
+
psychgus (1.3.5)
|
47
52
|
psych (>= 3.0)
|
48
53
|
public_suffix (6.0.1)
|
49
54
|
racc (1.8.1)
|
@@ -51,22 +56,21 @@ GEM
|
|
51
56
|
rake (13.2.1)
|
52
57
|
raketeer (0.2.13)
|
53
58
|
rake
|
54
|
-
rdoc (6.
|
59
|
+
rdoc (6.13.1)
|
55
60
|
psych (>= 4.0.0)
|
56
|
-
redcarpet (3.6.
|
57
|
-
reline (0.
|
61
|
+
redcarpet (3.6.1)
|
62
|
+
reline (0.6.1)
|
58
63
|
io-console (~> 0.5)
|
59
|
-
rexml (3.
|
60
|
-
|
64
|
+
rexml (3.4.1)
|
65
|
+
ronin-web-user_agents (0.1.1)
|
61
66
|
rss (0.3.1)
|
62
67
|
rexml
|
63
|
-
rubyzip (2.
|
64
|
-
stringio (3.1.
|
68
|
+
rubyzip (2.4.1)
|
69
|
+
stringio (3.1.7)
|
65
70
|
strings-ansi (0.2.0)
|
66
|
-
strscan (3.1.0)
|
67
71
|
tiny_segmenter (0.0.6)
|
68
72
|
tty-cursor (0.7.1)
|
69
|
-
tty-progressbar (0.18.
|
73
|
+
tty-progressbar (0.18.3)
|
70
74
|
strings-ansi (~> 0.2)
|
71
75
|
tty-cursor (~> 0.7)
|
72
76
|
tty-screen (~> 0.8)
|
@@ -74,25 +78,21 @@ GEM
|
|
74
78
|
tty-screen (0.8.2)
|
75
79
|
tty-spinner (0.9.3)
|
76
80
|
tty-cursor (~> 0.7)
|
77
|
-
unicode-display_width (2.
|
78
|
-
yard (0.9.
|
79
|
-
yard_ghurt (1.2.1)
|
80
|
-
rake
|
81
|
-
yard
|
81
|
+
unicode-display_width (2.6.0)
|
82
|
+
yard (0.9.37)
|
82
83
|
|
83
84
|
PLATFORMS
|
84
85
|
ruby
|
85
86
|
|
86
87
|
DEPENDENCIES
|
87
|
-
bundler (~> 2.
|
88
|
+
bundler (~> 2.6)
|
88
89
|
minitest (~> 5.25)
|
89
90
|
nhkore!
|
90
91
|
rake (~> 13.2)
|
91
92
|
raketeer (~> 0.2)
|
92
|
-
rdoc (~> 6.
|
93
|
+
rdoc (~> 6.13)
|
93
94
|
redcarpet (~> 3.6)
|
94
95
|
yard (~> 0.9)
|
95
|
-
yard_ghurt (~> 1.2)
|
96
96
|
|
97
97
|
BUNDLED WITH
|
98
|
-
2.
|
98
|
+
2.6.8
|
data/README.md
CHANGED
@@ -732,7 +732,7 @@ if !File.exist?(file)
|
|
732
732
|
end
|
733
733
|
```
|
734
734
|
|
735
|
-
### Util
|
735
|
+
### Util & DatetimeParser
|
736
736
|
|
737
737
|
These provide a variety of useful methods/constants.
|
738
738
|
|
@@ -740,7 +740,6 @@ Here are some of the most useful ones:
|
|
740
740
|
|
741
741
|
```Ruby
|
742
742
|
require 'nhkore/datetime_parser'
|
743
|
-
require 'nhkore/user_agents'
|
744
743
|
require 'nhkore/util'
|
745
744
|
|
746
745
|
include NHKore
|
@@ -748,10 +747,6 @@ include NHKore
|
|
748
747
|
puts '======='
|
749
748
|
puts '[ Net ]'
|
750
749
|
puts '======='
|
751
|
-
# Get a random User Agent for HTTP header field 'User-Agent'.
|
752
|
-
# - This is used by default in Scraper/SearchScraper.
|
753
|
-
puts "User-Agent: #{UserAgents.sample()}"
|
754
|
-
|
755
750
|
uri = URI('https://www.bing.com/search?q=nhk')
|
756
751
|
Util.replace_uri_query!(uri,q: 'banana')
|
757
752
|
|
@@ -884,7 +879,7 @@ Releasing new HTML file for website:
|
|
884
879
|
[GNU LGPL v3+](LICENSE.txt)
|
885
880
|
|
886
881
|
> NHKore (<https://github.com/esotericpig/nhkore>)
|
887
|
-
> Copyright (c) 2020-
|
882
|
+
> Copyright (c) 2020-2025 Bradley Whited
|
888
883
|
>
|
889
884
|
> NHKore is free software: you can redistribute it and/or modify
|
890
885
|
> it under the terms of the GNU Lesser General Public License as published by
|
data/Rakefile
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
# frozen_string_literal: true
|
3
3
|
|
4
|
-
|
5
4
|
require 'bundler/gem_tasks'
|
6
5
|
|
7
6
|
require 'rake/clean'
|
@@ -9,46 +8,41 @@ require 'rake/testtask'
|
|
9
8
|
require 'raketeer/irb'
|
10
9
|
require 'raketeer/nokogiri_installs'
|
11
10
|
require 'yard'
|
12
|
-
require 'yard_ghurt'
|
13
11
|
|
14
12
|
require 'nhkore/util'
|
15
13
|
require 'nhkore/version'
|
16
14
|
|
17
|
-
|
18
15
|
PKG_DIR = 'pkg'
|
19
16
|
|
20
17
|
CLEAN.exclude('{.git,core,stock}/**/*')
|
21
|
-
CLOBBER.include('doc/',
|
22
|
-
|
18
|
+
CLOBBER.include('doc/',"#{PKG_DIR}/")
|
23
19
|
|
24
|
-
task default: [
|
20
|
+
task default: %i[test]
|
25
21
|
|
26
22
|
desc 'Generate documentation (YARDoc)'
|
27
|
-
task doc: %i[yard
|
28
|
-
end
|
23
|
+
task doc: %i[yard]
|
29
24
|
|
30
|
-
desc "Package '#{
|
31
|
-
task :pkg_core do |
|
25
|
+
desc "Package '#{NHKore::Util::CORE_DIR}/' data as a Zip file into '#{PKG_DIR}/'"
|
26
|
+
task :pkg_core do |_task|
|
32
27
|
mkdir_p PKG_DIR
|
33
28
|
|
34
|
-
pattern =
|
29
|
+
pattern = "#{NHKore::Util::CORE_DIR}/*.{csv,html,json,yml}"
|
35
30
|
zip_file = File.join(PKG_DIR,'nhkore-core.zip')
|
36
31
|
|
37
|
-
sh 'zip','-9rv',zip_file,*Dir.glob(pattern)
|
32
|
+
sh 'zip','-9rv',zip_file,*Dir.glob(pattern)
|
38
33
|
end
|
39
34
|
|
40
35
|
Rake::TestTask.new do |task|
|
41
|
-
task.libs = [
|
42
|
-
task.pattern =
|
43
|
-
task.description += ": '#{task.pattern}'"
|
36
|
+
task.libs = %w[lib test]
|
37
|
+
task.pattern = 'test/**/*_test.rb'
|
44
38
|
task.verbose = false
|
45
39
|
task.warning = true
|
46
40
|
end
|
47
41
|
|
48
42
|
# If you need to run a part after the 1st part,
|
49
43
|
# just type 'n' to not overwrite the file and then 'y' for continue.
|
50
|
-
desc "Update '#{
|
51
|
-
task :update_core do |
|
44
|
+
desc "Update '#{NHKore::Util::CORE_DIR}/' files for release"
|
45
|
+
task :update_core do |_task|
|
52
46
|
require 'highline'
|
53
47
|
|
54
48
|
continue_msg = "\nContinue (y/n)? "
|
@@ -63,7 +57,7 @@ task :update_core do |task|
|
|
63
57
|
next unless hl.agree(continue_msg)
|
64
58
|
puts
|
65
59
|
|
66
|
-
next unless sh(*cmd,'news','-s','1000','ez')
|
60
|
+
next unless sh(*cmd,'news','-s','1000','ez','--lenient')
|
67
61
|
next unless hl.agree(continue_msg)
|
68
62
|
puts
|
69
63
|
|
@@ -78,7 +72,7 @@ task :update_core do |task|
|
|
78
72
|
end
|
79
73
|
|
80
74
|
desc 'Update showcase file for release'
|
81
|
-
task :update_showcase do |
|
75
|
+
task :update_showcase do |_task|
|
82
76
|
require 'highline'
|
83
77
|
|
84
78
|
showcase_file = File.join('.','nhkore-ez.html')
|
@@ -86,43 +80,16 @@ task :update_showcase do |task|
|
|
86
80
|
hl = HighLine.new
|
87
81
|
|
88
82
|
next unless sh('ruby','-w','./lib/nhkore.rb',
|
89
|
-
|
90
|
-
|
91
|
-
|
83
|
+
'sift','ez','--no-eng',
|
84
|
+
'--out',showcase_file)
|
85
|
+
|
86
|
+
dest_dir = File.join('..','esotericpig.github.io','showcase','')
|
92
87
|
|
93
|
-
next unless hl.agree("\nMove the file (y/n)? ")
|
88
|
+
next unless hl.agree("\nMove the file to '#{dest_dir}' (y/n)? ")
|
94
89
|
puts
|
95
|
-
next unless sh('mv','-iv',showcase_file,
|
96
|
-
File.join('..','esotericpig.github.io','showcase',''),
|
97
|
-
)
|
90
|
+
next unless sh('mv','-iv',showcase_file,dest_dir)
|
98
91
|
end
|
99
92
|
|
100
93
|
YARD::Rake::YardocTask.new do |task|
|
101
|
-
task.options += ['--template-path',File.join('yard','templates')]
|
102
94
|
task.options += ['--title',"NHKore v#{NHKore::VERSION} Doc"]
|
103
95
|
end
|
104
|
-
|
105
|
-
# Execute "rake yard_gfm_fix" for production.
|
106
|
-
# Execute "rake yard_gfm_fix[true]" for testing locally.
|
107
|
-
YardGhurt::GFMFixTask.new do |task|
|
108
|
-
task.arg_names = [:dev]
|
109
|
-
task.dry_run = false
|
110
|
-
task.fix_code_langs = true
|
111
|
-
task.md_files = ['index.html']
|
112
|
-
|
113
|
-
task.before = proc do |t,args|
|
114
|
-
# Delete this file as it's never used (index.html is an exact copy).
|
115
|
-
YardGhurt::Util.rm_exist(File.join(t.doc_dir,'file.README.html'))
|
116
|
-
|
117
|
-
# Root dir of my GitHub Page for CSS/JS.
|
118
|
-
ghp_root = YardGhurt::Util.to_bool(args.dev) ? '../../esotericpig.github.io' : '../../..'
|
119
|
-
|
120
|
-
t.css_styles << %Q(<link rel="stylesheet" type="text/css" href="#{ghp_root}/css/prism.css" />)
|
121
|
-
t.js_scripts << %Q(<script src="#{ghp_root}/js/prism.js"></script>)
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
# Probably not useful for others.
|
126
|
-
YardGhurt::GHPSyncTask.new do |task|
|
127
|
-
task.ghp_dir = '../esotericpig.github.io/docs/nhkore/yardoc'
|
128
|
-
end
|
data/bin/nhkore
CHANGED
data/lib/nhkore/app.rb
CHANGED
@@ -3,12 +3,11 @@
|
|
3
3
|
|
4
4
|
#--
|
5
5
|
# This file is part of NHKore.
|
6
|
-
# Copyright (c) 2020
|
6
|
+
# Copyright (c) 2020 Bradley Whited
|
7
7
|
#
|
8
8
|
# SPDX-License-Identifier: LGPL-3.0-or-later
|
9
9
|
#++
|
10
10
|
|
11
|
-
|
12
11
|
require 'cri'
|
13
12
|
require 'highline'
|
14
13
|
require 'rainbow'
|
@@ -25,11 +24,7 @@ require 'nhkore/cli/news_cmd'
|
|
25
24
|
require 'nhkore/cli/search_cmd'
|
26
25
|
require 'nhkore/cli/sift_cmd'
|
27
26
|
|
28
|
-
|
29
27
|
module NHKore
|
30
|
-
module CLI
|
31
|
-
end
|
32
|
-
|
33
28
|
###
|
34
29
|
# For disabling/enabling color output.
|
35
30
|
###
|
@@ -40,7 +35,7 @@ module NHKore
|
|
40
35
|
@color = color
|
41
36
|
end
|
42
37
|
|
43
|
-
def color?(
|
38
|
+
def color?(_io)
|
44
39
|
return @color
|
45
40
|
end
|
46
41
|
end
|
@@ -73,7 +68,7 @@ module NHKore
|
|
73
68
|
attr_accessor :sleep_time
|
74
69
|
attr_accessor :spinner
|
75
70
|
|
76
|
-
def initialize(args=ARGV)
|
71
|
+
def initialize(args = ARGV)
|
77
72
|
super()
|
78
73
|
|
79
74
|
@args = args
|
@@ -150,13 +145,13 @@ module NHKore
|
|
150
145
|
This is similar to a core word/vocabulary list.
|
151
146
|
DESC
|
152
147
|
|
153
|
-
flag :s,:'classic-fx',<<-DESC do |
|
148
|
+
flag :s,:'classic-fx',<<-DESC do |_value,_cmd|
|
154
149
|
use classic spinner/progress special effects (in case of no Unicode support) when running long tasks
|
155
150
|
DESC
|
156
151
|
app.progress_bar = :classic
|
157
152
|
app.spinner = CLASSIC_SPINNER
|
158
153
|
end
|
159
|
-
flag COLOR_OPTS[0],COLOR_OPTS[1],"force color output (for commands like '| less -R')" do |
|
154
|
+
flag COLOR_OPTS[0],COLOR_OPTS[1],"force color output (for commands like '| less -R')" do |_value,_cmd|
|
160
155
|
app.enable_color(true)
|
161
156
|
end
|
162
157
|
flag :n,:'dry-run',<<-DESC
|
@@ -164,11 +159,11 @@ module NHKore
|
|
164
159
|
DESC
|
165
160
|
# Big F because dangerous.
|
166
161
|
flag :F,:force,"force overwriting files, creating directories, etc. (don't prompt); dangerous!"
|
167
|
-
flag :h,:help,'show this help' do |
|
162
|
+
flag :h,:help,'show this help' do |_value,cmd|
|
168
163
|
puts cmd.help
|
169
164
|
exit
|
170
165
|
end
|
171
|
-
option :m,:'max-retry',<<-DESC,argument: :required,default: 3 do |value,
|
166
|
+
option :m,:'max-retry',<<-DESC,argument: :required,default: 3 do |value,_cmd|
|
172
167
|
maximum number of times to retry URLs (-1 or integer >= 0)
|
173
168
|
DESC
|
174
169
|
value = value.to_i
|
@@ -176,14 +171,14 @@ module NHKore
|
|
176
171
|
|
177
172
|
app.scraper_kargs[:max_retries] = value
|
178
173
|
end
|
179
|
-
flag NO_COLOR_OPTS[0],NO_COLOR_OPTS[1],'disable color output' do |
|
174
|
+
flag NO_COLOR_OPTS[0],NO_COLOR_OPTS[1],'disable color output' do |_value,_cmd|
|
180
175
|
app.enable_color(false)
|
181
176
|
end
|
182
|
-
flag :X,:'no-fx','disable spinner/progress special effects when running long tasks' do |
|
177
|
+
flag :X,:'no-fx','disable spinner/progress special effects when running long tasks' do |_value,_cmd|
|
183
178
|
app.progress_bar = :no
|
184
179
|
app.spinner = {} # Still outputs status & stores tokens
|
185
180
|
end
|
186
|
-
option :o,:'open-timeout',<<-DESC,argument: :required do |value,
|
181
|
+
option :o,:'open-timeout',<<-DESC,argument: :required do |value,_cmd|
|
187
182
|
seconds for URL open timeouts (-1 or decimal >= 0)
|
188
183
|
DESC
|
189
184
|
value = value.to_f
|
@@ -191,7 +186,7 @@ module NHKore
|
|
191
186
|
|
192
187
|
app.scraper_kargs[:open_timeout] = value
|
193
188
|
end
|
194
|
-
option :r,:'read-timeout',<<-DESC,argument: :required do |value,
|
189
|
+
option :r,:'read-timeout',<<-DESC,argument: :required do |value,_cmd|
|
195
190
|
seconds for URL read timeouts (-1 or decimal >= 0)
|
196
191
|
DESC
|
197
192
|
value = value.to_f
|
@@ -199,13 +194,13 @@ module NHKore
|
|
199
194
|
|
200
195
|
app.scraper_kargs[:read_timeout] = value
|
201
196
|
end
|
202
|
-
option :z,:sleep,<<-DESC,argument: :required,default: DEFAULT_SLEEP_TIME do |value,
|
197
|
+
option :z,:sleep,<<-DESC,argument: :required,default: DEFAULT_SLEEP_TIME do |value,_cmd|
|
203
198
|
seconds to sleep per scrape (i.e., per page/article) so don't get banned (i.e., fake being human)
|
204
199
|
DESC
|
205
200
|
app.sleep_time = value.to_f
|
206
201
|
app.sleep_time = 0.0 if app.sleep_time < 0.0
|
207
202
|
end
|
208
|
-
option :t,:timeout,<<-DESC,argument: :required do |value,
|
203
|
+
option :t,:timeout,<<-DESC,argument: :required do |value,_cmd|
|
209
204
|
seconds for all URL timeouts: [open, read] (-1 or decimal >= 0)
|
210
205
|
DESC
|
211
206
|
value = value.to_f
|
@@ -214,7 +209,7 @@ module NHKore
|
|
214
209
|
app.scraper_kargs[:open_timeout] = value
|
215
210
|
app.scraper_kargs[:read_timeout] = value
|
216
211
|
end
|
217
|
-
option :u,:'user-agent',<<-DESC,argument: :required do |value,
|
212
|
+
option :u,:'user-agent',<<-DESC,argument: :required do |value,_cmd|
|
218
213
|
HTTP header field 'User-Agent' to use instead of a random one
|
219
214
|
DESC
|
220
215
|
value = app.check_empty_opt(:'user-agent',value)
|
@@ -222,12 +217,12 @@ module NHKore
|
|
222
217
|
app.scraper_kargs[:header] ||= {}
|
223
218
|
app.scraper_kargs[:header]['user-agent'] = value
|
224
219
|
end
|
225
|
-
flag :v,:version,'show the version and exit' do |
|
220
|
+
flag :v,:version,'show the version and exit' do |_value,_cmd|
|
226
221
|
app.show_version
|
227
222
|
exit
|
228
223
|
end
|
229
224
|
|
230
|
-
run do |
|
225
|
+
run do |_opts,_args,cmd|
|
231
226
|
puts cmd.help
|
232
227
|
end
|
233
228
|
end
|
@@ -253,21 +248,20 @@ module NHKore
|
|
253
248
|
file = Util.strip_web_str(@cmd_opts[opt_key].to_s)
|
254
249
|
|
255
250
|
if file.empty?
|
256
|
-
# Do not check default_dir.empty?()
|
257
|
-
if default_filename.empty?
|
258
|
-
|
259
|
-
|
260
|
-
|
261
|
-
|
251
|
+
# Do not check `default_dir.empty?()`.
|
252
|
+
file = if default_filename.empty?
|
253
|
+
nil # NOTE: nil is very important for BingScraper.init()!
|
254
|
+
else
|
255
|
+
File.join(default_dir,default_filename)
|
256
|
+
end
|
257
|
+
# Directory?
|
258
|
+
elsif File.directory?(file) || Util.dir_str?(file)
|
259
|
+
file = File.join(file,default_filename)
|
260
|
+
# File name only? (no directory)
|
261
|
+
elsif Util.filename_str?(file)
|
262
|
+
file = File.join(default_dir,file)
|
262
263
|
else
|
263
|
-
#
|
264
|
-
if File.directory?(file) || Util.dir_str?(file)
|
265
|
-
file = File.join(file,default_filename)
|
266
|
-
# File name only? (no directory)
|
267
|
-
elsif Util.filename_str?(file)
|
268
|
-
file = File.join(default_dir,file)
|
269
|
-
end
|
270
|
-
# Else, passed in both: 'directory/file'
|
264
|
+
# Passed in both: 'directory/file'
|
271
265
|
end
|
272
266
|
|
273
267
|
# '~' will expand to home, etc.
|
@@ -307,7 +301,7 @@ module NHKore
|
|
307
301
|
config.head = 'o'
|
308
302
|
end
|
309
303
|
|
310
|
-
#config.frequency = 5 # For a big download, set this
|
304
|
+
# config.frequency = 5 # For a big download, set this
|
311
305
|
config.interval = 1 if download
|
312
306
|
end
|
313
307
|
end
|
@@ -325,7 +319,7 @@ module NHKore
|
|
325
319
|
aliases :v
|
326
320
|
summary "Show the version and exit (aliases: #{app.color_alias('v')})"
|
327
321
|
|
328
|
-
run do |
|
322
|
+
run do |_opts,_args,_cmd|
|
329
323
|
app.show_version
|
330
324
|
end
|
331
325
|
end
|
@@ -501,14 +495,15 @@ module NHKore
|
|
501
495
|
# this due to relying on @cmd_opts[:ext] to be nil.
|
502
496
|
# It's easy to change this one instance, but I'm not sure
|
503
497
|
# at the moment where else might be affected
|
504
|
-
## Cri has a default proc for default values
|
505
|
-
## that doesn't store the keys.
|
506
|
-
#new_opts.default_proc = proc do |hash,key|
|
507
|
-
# # :max_retry => %s(max-retry)
|
508
|
-
# key = key.to_s.gsub('_','-').to_sym
|
509
498
|
#
|
510
|
-
#
|
511
|
-
#
|
499
|
+
# # Cri has a default proc for default values
|
500
|
+
# # that doesn't store the keys.
|
501
|
+
# new_opts.default_proc = proc do |hash,key|
|
502
|
+
# # :max_retry => %s(max-retry)
|
503
|
+
# key = key.to_s.gsub('_','-').to_sym
|
504
|
+
#
|
505
|
+
# opts.default_proc.call(hash,key)
|
506
|
+
# end
|
512
507
|
|
513
508
|
@cmd = cmd
|
514
509
|
@cmd_args = args
|
@@ -547,7 +542,7 @@ module NHKore
|
|
547
542
|
|
548
543
|
def stop_spin
|
549
544
|
if @spinner.is_a?(Hash)
|
550
|
-
puts
|
545
|
+
puts "#{NO_SPINNER_MSG % @spinner} done!"
|
551
546
|
else
|
552
547
|
@spinner.reset
|
553
548
|
@spinner.stop('done!')
|
@@ -586,7 +581,7 @@ module NHKore
|
|
586
581
|
@tokens[:progress] = 0
|
587
582
|
end
|
588
583
|
|
589
|
-
def advance(progress=1)
|
584
|
+
def advance(progress = 1)
|
590
585
|
total = @tokens[:total]
|
591
586
|
progress = @tokens[:progress] + progress
|
592
587
|
progress = total if progress > total
|
data/lib/nhkore/article.rb
CHANGED
@@ -3,18 +3,16 @@
|
|
3
3
|
|
4
4
|
#--
|
5
5
|
# This file is part of NHKore.
|
6
|
-
# Copyright (c) 2020
|
6
|
+
# Copyright (c) 2020 Bradley Whited
|
7
7
|
#
|
8
8
|
# SPDX-License-Identifier: LGPL-3.0-or-later
|
9
9
|
#++
|
10
10
|
|
11
|
-
|
12
11
|
require 'time'
|
13
12
|
|
14
13
|
require 'nhkore/util'
|
15
14
|
require 'nhkore/word'
|
16
15
|
|
17
|
-
|
18
16
|
module NHKore
|
19
17
|
class Article
|
20
18
|
attr_reader :datetime
|
@@ -69,7 +67,7 @@ module NHKore
|
|
69
67
|
coder[:words] = @words
|
70
68
|
end
|
71
69
|
|
72
|
-
def self.load_data(
|
70
|
+
def self.load_data(_key,hash)
|
73
71
|
words = hash[:words]
|
74
72
|
|
75
73
|
article = Article.new
|
@@ -80,7 +78,7 @@ module NHKore
|
|
80
78
|
article.title = hash[:title]
|
81
79
|
article.url = hash[:url]
|
82
80
|
|
83
|
-
words&.each
|
81
|
+
words&.each do |k,h|
|
84
82
|
k = k.to_s # Change from a symbol
|
85
83
|
article.words[k] = Word.load_data(k,h)
|
86
84
|
end
|
@@ -89,11 +87,11 @@ module NHKore
|
|
89
87
|
end
|
90
88
|
|
91
89
|
def datetime=(value)
|
92
|
-
if value.is_a?(Time)
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
90
|
+
@datetime = if value.is_a?(Time)
|
91
|
+
value
|
92
|
+
else
|
93
|
+
Util.empty_web_str?(value) ? nil : Time.iso8601(value)
|
94
|
+
end
|
97
95
|
end
|
98
96
|
|
99
97
|
def futsuurl=(value)
|
@@ -118,7 +116,7 @@ module NHKore
|
|
118
116
|
|
119
117
|
if !mini
|
120
118
|
s << "\n words:"
|
121
|
-
@words.each do |
|
119
|
+
@words.each do |_key,word|
|
122
120
|
s << "\n #{word}"
|
123
121
|
end
|
124
122
|
end
|