nhkore 0.3.5 → 0.3.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -1
- data/README.md +4 -0
- data/Rakefile +22 -7
- data/lib/nhkore/article_scraper.rb +10 -5
- data/lib/nhkore/search_scraper.rb +1 -0
- data/lib/nhkore/version.rb +1 -1
- data/nhkore.gemspec +10 -6
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 445adf6e8abd4da9fd6dd25e9632d5f477b467f6ce8c3dcecae87e3f61305d98
|
4
|
+
data.tar.gz: ca812639ff1edd8da835f5bbb2cde403c9cb63e17568fb3ec367eec00605ec17
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 392607205c53aa2a5dfcde244e5fa6137483d216dc27becf06c76798209d2dcf328f17abee2026d795207d4e783a23fd108e615525445f52ca6442560600cd42
|
7
|
+
data.tar.gz: 7a1219623b6645bbc633ba9c94e767dcf86be8852a7228c1d5ddd3936f61b884897f680369d4c9d9db5aba8ab4561048d59aed15cecf7ba05695c1957f31b0ea
|
data/CHANGELOG.md
CHANGED
@@ -2,7 +2,22 @@
|
|
2
2
|
|
3
3
|
Format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
4
4
|
|
5
|
-
## [[Unreleased]](https://github.com/esotericpig/nhkore/compare/v0.3.
|
5
|
+
## [[Unreleased]](https://github.com/esotericpig/nhkore/compare/v0.3.6...master)
|
6
|
+
|
7
|
+
## [v0.3.6] - 2020-08-18
|
8
|
+
|
9
|
+
### Added
|
10
|
+
- `update_showcase` Rake task for development & personal site (GitHub Page)
|
11
|
+
- `$ bundle exec rake update_showcase`
|
12
|
+
|
13
|
+
### Changed
|
14
|
+
- Updated Gems
|
15
|
+
|
16
|
+
### Fixed
|
17
|
+
- ArticleScraper for title for specific site
|
18
|
+
- https://www3.nhk.or.jp/news/easy/article/disaster_earthquake_illust.html
|
19
|
+
- Ignored `/cgi2.*enqform/` URLs from SearchScraper (Bing)
|
20
|
+
- Added more detail to dictionary error in ArticleScraper
|
6
21
|
|
7
22
|
## [v0.3.5] - 2020-05-04
|
8
23
|
|
data/README.md
CHANGED
@@ -870,6 +870,10 @@ This will update *core/* for you:
|
|
870
870
|
- *Raketary*: `$ raketary github_pkg`
|
871
871
|
7. Run: `$ bundle exec rake release`
|
872
872
|
|
873
|
+
Releasing new HTML file for website:
|
874
|
+
|
875
|
+
1. `$ bundle exec rake update_showcase`
|
876
|
+
|
873
877
|
## License [^](#contents)
|
874
878
|
|
875
879
|
[GNU LGPL v3+](LICENSE.txt)
|
data/Rakefile
CHANGED
@@ -35,7 +35,7 @@ require 'nhkore/version'
|
|
35
35
|
|
36
36
|
PKG_DIR = 'pkg'
|
37
37
|
|
38
|
-
CLEAN.exclude('.git
|
38
|
+
CLEAN.exclude('{.git,core,stock}/**/*')
|
39
39
|
CLOBBER.include('doc/',File.join(PKG_DIR,''))
|
40
40
|
|
41
41
|
|
@@ -59,7 +59,7 @@ Rake::TestTask.new() do |task|
|
|
59
59
|
task.libs = ['lib','test']
|
60
60
|
task.pattern = File.join('test','**','*_test.rb')
|
61
61
|
task.description += ": '#{task.pattern}'"
|
62
|
-
task.verbose =
|
62
|
+
task.verbose = false
|
63
63
|
task.warning = true
|
64
64
|
end
|
65
65
|
|
@@ -88,13 +88,28 @@ task :update_core do |task|
|
|
88
88
|
next unless sh(*cmd,'sift','-e','yml' ,'ez')
|
89
89
|
end
|
90
90
|
|
91
|
-
|
92
|
-
|
91
|
+
# @since 0.3.6
|
92
|
+
desc 'Update showcase file for release'
|
93
|
+
task :update_showcase do |task|
|
94
|
+
require 'highline'
|
95
|
+
|
96
|
+
SHOWCASE_FILE = File.join('.','nhkore-ez.html')
|
97
|
+
|
98
|
+
hl = HighLine.new()
|
93
99
|
|
94
|
-
|
95
|
-
|
100
|
+
next unless sh('ruby','-w','./lib/nhkore.rb',
|
101
|
+
'sift','ez','--no-eng',
|
102
|
+
'--out',SHOWCASE_FILE,
|
103
|
+
)
|
96
104
|
|
97
|
-
|
105
|
+
next unless hl.agree("\nMove the file (y/n)? ")
|
106
|
+
puts
|
107
|
+
next unless sh('mv','-iv',SHOWCASE_FILE,
|
108
|
+
File.join('..','esotericpig.github.io','showcase',''),
|
109
|
+
)
|
110
|
+
end
|
111
|
+
|
112
|
+
YARD::Rake::YardocTask.new() do |task|
|
98
113
|
task.options += ['--template-path',File.join('yard','templates')]
|
99
114
|
task.options += ['--title',"NHKore v#{NHKore::VERSION} Doc"]
|
100
115
|
end
|
@@ -291,7 +291,7 @@ module NHKore
|
|
291
291
|
|
292
292
|
retry
|
293
293
|
else
|
294
|
-
raise e.exception("could not scrape dictionary at URL[#{
|
294
|
+
raise e.exception("could not scrape dictionary URL[#{dict_url}] at URL[#{@url}]: #{e}")
|
295
295
|
end
|
296
296
|
end
|
297
297
|
|
@@ -481,19 +481,24 @@ module NHKore
|
|
481
481
|
|
482
482
|
def scrape_title(doc,article)
|
483
483
|
tag = doc.css('h1.article-main__title')
|
484
|
+
tag_name = nil
|
485
|
+
|
486
|
+
if tag.length < 1
|
487
|
+
# - https://www3.nhk.or.jp/news/easy/article/disaster_earthquake_illust.html
|
488
|
+
tag_name = 'h1.article-eq__title'
|
489
|
+
tag = doc.css(tag_name)
|
490
|
+
end
|
484
491
|
|
485
492
|
if tag.length < 1 && !@strict
|
486
493
|
# This shouldn't be used except for select sites.
|
487
494
|
# - https://www3.nhk.or.jp/news/easy/tsunamikeihou/index.html
|
488
|
-
|
489
495
|
tag_name = 'div#main h2'
|
490
|
-
|
491
|
-
Util.warn("using [#{tag_name}] for title at URL[#{@url}]")
|
492
|
-
|
493
496
|
tag = doc.css(tag_name)
|
494
497
|
end
|
495
498
|
|
496
499
|
if tag.length > 0
|
500
|
+
Util.warn("using [#{tag_name}] for title at URL[#{@url}]") unless tag_name.nil?()
|
501
|
+
|
497
502
|
result = scrape_and_add_words(tag,article)
|
498
503
|
title = result.text
|
499
504
|
|
@@ -61,6 +61,7 @@ module NHKore
|
|
61
61
|
return true if link =~ /\/movieplayer\.html?/ # https://www3.nhk.or.jp/news/easy/movieplayer.html?id=k10038422811_1207251719_1207251728.mp4&teacuprbbs=4feb73432045dbb97c283d64d459f7cf
|
62
62
|
return true if link =~ /\/audio\.html?/ # https://www3.nhk.or.jp/news/easy/player/audio.html?id=k10011555691000
|
63
63
|
return true if link =~ /\/news\/easy\/index\.html?/ # http://www3.nhk.or.jp/news/easy/index.html
|
64
|
+
return true if link =~ /cgi2.*enqform/ # https://cgi2.nhk.or.jp/news/easy/easy_enq/bin/form/enqform.html?id=k10011916321000&title=日本の会社が作った鉄道の車両「あずま」がイギリスで走る
|
64
65
|
|
65
66
|
return false
|
66
67
|
end
|
data/lib/nhkore/version.rb
CHANGED
data/nhkore.gemspec
CHANGED
@@ -49,15 +49,19 @@ Gem::Specification.new() do |spec|
|
|
49
49
|
spec.bindir = 'bin'
|
50
50
|
spec.executables = [spec.name]
|
51
51
|
|
52
|
-
spec.files =
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
52
|
+
spec.files = [
|
53
|
+
Dir.glob(File.join("{#{spec.require_paths.join(',')}}",'**','*.{erb,rb}')),
|
54
|
+
Dir.glob(File.join(spec.bindir,'*')),
|
55
|
+
Dir.glob(File.join('{test,yard}','**','*.{erb,rb}')),
|
56
|
+
%W( Gemfile #{spec.name}.gemspec Rakefile ),
|
57
|
+
%w( CHANGELOG.md LICENSE.txt README.md ),
|
58
|
+
].flatten()
|
57
59
|
|
58
60
|
spec.required_ruby_version = '>= 2.4'
|
59
61
|
|
60
|
-
spec.requirements
|
62
|
+
spec.requirements = [
|
63
|
+
'Nokogiri: https://www.nokogiri.org/tutorials/installing_nokogiri.html',
|
64
|
+
]
|
61
65
|
|
62
66
|
spec.add_runtime_dependency 'attr_bool' ,'~> 0.1' # For attr_accessor?/attr_reader?
|
63
67
|
spec.add_runtime_dependency 'bimyou_segmenter' ,'~> 1.2' # For splitting Japanese sentences into words
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nhkore
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonathan Bradley Whited (@esotericpig)
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-08-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: attr_bool
|
@@ -390,7 +390,7 @@ metadata:
|
|
390
390
|
changelog_uri: https://github.com/esotericpig/nhkore/blob/master/CHANGELOG.md
|
391
391
|
homepage_uri: https://github.com/esotericpig/nhkore
|
392
392
|
source_code_uri: https://github.com/esotericpig/nhkore
|
393
|
-
post_install_message: " \n NHKore v0.3.
|
393
|
+
post_install_message: " \n NHKore v0.3.6\n \n You can now use [nhkore] on the
|
394
394
|
command line.\n \n Homepage: https://github.com/esotericpig/nhkore\n \n Code:
|
395
395
|
\ https://github.com/esotericpig/nhkore\n Changelog: https://github.com/esotericpig/nhkore/blob/master/CHANGELOG.md\n
|
396
396
|
\ Bugs: https://github.com/esotericpig/nhkore/issues\n \n"
|