nhkore 0.3.5 → 0.3.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +16 -1
- data/README.md +4 -0
- data/Rakefile +22 -7
- data/lib/nhkore/article_scraper.rb +10 -5
- data/lib/nhkore/search_scraper.rb +1 -0
- data/lib/nhkore/version.rb +1 -1
- data/nhkore.gemspec +10 -6
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 445adf6e8abd4da9fd6dd25e9632d5f477b467f6ce8c3dcecae87e3f61305d98
|
4
|
+
data.tar.gz: ca812639ff1edd8da835f5bbb2cde403c9cb63e17568fb3ec367eec00605ec17
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 392607205c53aa2a5dfcde244e5fa6137483d216dc27becf06c76798209d2dcf328f17abee2026d795207d4e783a23fd108e615525445f52ca6442560600cd42
|
7
|
+
data.tar.gz: 7a1219623b6645bbc633ba9c94e767dcf86be8852a7228c1d5ddd3936f61b884897f680369d4c9d9db5aba8ab4561048d59aed15cecf7ba05695c1957f31b0ea
|
data/CHANGELOG.md
CHANGED
@@ -2,7 +2,22 @@
|
|
2
2
|
|
3
3
|
Format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
|
4
4
|
|
5
|
-
## [[Unreleased]](https://github.com/esotericpig/nhkore/compare/v0.3.
|
5
|
+
## [[Unreleased]](https://github.com/esotericpig/nhkore/compare/v0.3.6...master)
|
6
|
+
|
7
|
+
## [v0.3.6] - 2020-08-18
|
8
|
+
|
9
|
+
### Added
|
10
|
+
- `update_showcase` Rake task for development & personal site (GitHub Page)
|
11
|
+
- `$ bundle exec rake update_showcase`
|
12
|
+
|
13
|
+
### Changed
|
14
|
+
- Updated Gems
|
15
|
+
|
16
|
+
### Fixed
|
17
|
+
- ArticleScraper for title for specific site
|
18
|
+
- https://www3.nhk.or.jp/news/easy/article/disaster_earthquake_illust.html
|
19
|
+
- Ignored `/cgi2.*enqform/` URLs from SearchScraper (Bing)
|
20
|
+
- Added more detail to dictionary error in ArticleScraper
|
6
21
|
|
7
22
|
## [v0.3.5] - 2020-05-04
|
8
23
|
|
data/README.md
CHANGED
@@ -870,6 +870,10 @@ This will update *core/* for you:
|
|
870
870
|
- *Raketary*: `$ raketary github_pkg`
|
871
871
|
7. Run: `$ bundle exec rake release`
|
872
872
|
|
873
|
+
Releasing new HTML file for website:
|
874
|
+
|
875
|
+
1. `$ bundle exec rake update_showcase`
|
876
|
+
|
873
877
|
## License [^](#contents)
|
874
878
|
|
875
879
|
[GNU LGPL v3+](LICENSE.txt)
|
data/Rakefile
CHANGED
@@ -35,7 +35,7 @@ require 'nhkore/version'
|
|
35
35
|
|
36
36
|
PKG_DIR = 'pkg'
|
37
37
|
|
38
|
-
CLEAN.exclude('.git
|
38
|
+
CLEAN.exclude('{.git,core,stock}/**/*')
|
39
39
|
CLOBBER.include('doc/',File.join(PKG_DIR,''))
|
40
40
|
|
41
41
|
|
@@ -59,7 +59,7 @@ Rake::TestTask.new() do |task|
|
|
59
59
|
task.libs = ['lib','test']
|
60
60
|
task.pattern = File.join('test','**','*_test.rb')
|
61
61
|
task.description += ": '#{task.pattern}'"
|
62
|
-
task.verbose =
|
62
|
+
task.verbose = false
|
63
63
|
task.warning = true
|
64
64
|
end
|
65
65
|
|
@@ -88,13 +88,28 @@ task :update_core do |task|
|
|
88
88
|
next unless sh(*cmd,'sift','-e','yml' ,'ez')
|
89
89
|
end
|
90
90
|
|
91
|
-
|
92
|
-
|
91
|
+
# @since 0.3.6
|
92
|
+
desc 'Update showcase file for release'
|
93
|
+
task :update_showcase do |task|
|
94
|
+
require 'highline'
|
95
|
+
|
96
|
+
SHOWCASE_FILE = File.join('.','nhkore-ez.html')
|
97
|
+
|
98
|
+
hl = HighLine.new()
|
93
99
|
|
94
|
-
|
95
|
-
|
100
|
+
next unless sh('ruby','-w','./lib/nhkore.rb',
|
101
|
+
'sift','ez','--no-eng',
|
102
|
+
'--out',SHOWCASE_FILE,
|
103
|
+
)
|
96
104
|
|
97
|
-
|
105
|
+
next unless hl.agree("\nMove the file (y/n)? ")
|
106
|
+
puts
|
107
|
+
next unless sh('mv','-iv',SHOWCASE_FILE,
|
108
|
+
File.join('..','esotericpig.github.io','showcase',''),
|
109
|
+
)
|
110
|
+
end
|
111
|
+
|
112
|
+
YARD::Rake::YardocTask.new() do |task|
|
98
113
|
task.options += ['--template-path',File.join('yard','templates')]
|
99
114
|
task.options += ['--title',"NHKore v#{NHKore::VERSION} Doc"]
|
100
115
|
end
|
@@ -291,7 +291,7 @@ module NHKore
|
|
291
291
|
|
292
292
|
retry
|
293
293
|
else
|
294
|
-
raise e.exception("could not scrape dictionary at URL[#{
|
294
|
+
raise e.exception("could not scrape dictionary URL[#{dict_url}] at URL[#{@url}]: #{e}")
|
295
295
|
end
|
296
296
|
end
|
297
297
|
|
@@ -481,19 +481,24 @@ module NHKore
|
|
481
481
|
|
482
482
|
def scrape_title(doc,article)
|
483
483
|
tag = doc.css('h1.article-main__title')
|
484
|
+
tag_name = nil
|
485
|
+
|
486
|
+
if tag.length < 1
|
487
|
+
# - https://www3.nhk.or.jp/news/easy/article/disaster_earthquake_illust.html
|
488
|
+
tag_name = 'h1.article-eq__title'
|
489
|
+
tag = doc.css(tag_name)
|
490
|
+
end
|
484
491
|
|
485
492
|
if tag.length < 1 && !@strict
|
486
493
|
# This shouldn't be used except for select sites.
|
487
494
|
# - https://www3.nhk.or.jp/news/easy/tsunamikeihou/index.html
|
488
|
-
|
489
495
|
tag_name = 'div#main h2'
|
490
|
-
|
491
|
-
Util.warn("using [#{tag_name}] for title at URL[#{@url}]")
|
492
|
-
|
493
496
|
tag = doc.css(tag_name)
|
494
497
|
end
|
495
498
|
|
496
499
|
if tag.length > 0
|
500
|
+
Util.warn("using [#{tag_name}] for title at URL[#{@url}]") unless tag_name.nil?()
|
501
|
+
|
497
502
|
result = scrape_and_add_words(tag,article)
|
498
503
|
title = result.text
|
499
504
|
|
@@ -61,6 +61,7 @@ module NHKore
|
|
61
61
|
return true if link =~ /\/movieplayer\.html?/ # https://www3.nhk.or.jp/news/easy/movieplayer.html?id=k10038422811_1207251719_1207251728.mp4&teacuprbbs=4feb73432045dbb97c283d64d459f7cf
|
62
62
|
return true if link =~ /\/audio\.html?/ # https://www3.nhk.or.jp/news/easy/player/audio.html?id=k10011555691000
|
63
63
|
return true if link =~ /\/news\/easy\/index\.html?/ # http://www3.nhk.or.jp/news/easy/index.html
|
64
|
+
return true if link =~ /cgi2.*enqform/ # https://cgi2.nhk.or.jp/news/easy/easy_enq/bin/form/enqform.html?id=k10011916321000&title=日本の会社が作った鉄道の車両「あずま」がイギリスで走る
|
64
65
|
|
65
66
|
return false
|
66
67
|
end
|
data/lib/nhkore/version.rb
CHANGED
data/nhkore.gemspec
CHANGED
@@ -49,15 +49,19 @@ Gem::Specification.new() do |spec|
|
|
49
49
|
spec.bindir = 'bin'
|
50
50
|
spec.executables = [spec.name]
|
51
51
|
|
52
|
-
spec.files =
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
52
|
+
spec.files = [
|
53
|
+
Dir.glob(File.join("{#{spec.require_paths.join(',')}}",'**','*.{erb,rb}')),
|
54
|
+
Dir.glob(File.join(spec.bindir,'*')),
|
55
|
+
Dir.glob(File.join('{test,yard}','**','*.{erb,rb}')),
|
56
|
+
%W( Gemfile #{spec.name}.gemspec Rakefile ),
|
57
|
+
%w( CHANGELOG.md LICENSE.txt README.md ),
|
58
|
+
].flatten()
|
57
59
|
|
58
60
|
spec.required_ruby_version = '>= 2.4'
|
59
61
|
|
60
|
-
spec.requirements
|
62
|
+
spec.requirements = [
|
63
|
+
'Nokogiri: https://www.nokogiri.org/tutorials/installing_nokogiri.html',
|
64
|
+
]
|
61
65
|
|
62
66
|
spec.add_runtime_dependency 'attr_bool' ,'~> 0.1' # For attr_accessor?/attr_reader?
|
63
67
|
spec.add_runtime_dependency 'bimyou_segmenter' ,'~> 1.2' # For splitting Japanese sentences into words
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: nhkore
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jonathan Bradley Whited (@esotericpig)
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-08-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: attr_bool
|
@@ -390,7 +390,7 @@ metadata:
|
|
390
390
|
changelog_uri: https://github.com/esotericpig/nhkore/blob/master/CHANGELOG.md
|
391
391
|
homepage_uri: https://github.com/esotericpig/nhkore
|
392
392
|
source_code_uri: https://github.com/esotericpig/nhkore
|
393
|
-
post_install_message: " \n NHKore v0.3.
|
393
|
+
post_install_message: " \n NHKore v0.3.6\n \n You can now use [nhkore] on the
|
394
394
|
command line.\n \n Homepage: https://github.com/esotericpig/nhkore\n \n Code:
|
395
395
|
\ https://github.com/esotericpig/nhkore\n Changelog: https://github.com/esotericpig/nhkore/blob/master/CHANGELOG.md\n
|
396
396
|
\ Bugs: https://github.com/esotericpig/nhkore/issues\n \n"
|