nhkore 0.3.17 → 0.3.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,19 +3,17 @@
3
3
 
4
4
  #--
5
5
  # This file is part of NHKore.
6
- # Copyright (c) 2020-2021 Jonathan Bradley Whited
6
+ # Copyright (c) 2020 Bradley Whited
7
7
  #
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  require 'attr_bool'
13
12
  require 'date'
14
13
  require 'time'
15
14
 
16
15
  require 'nhkore/util'
17
16
 
18
-
19
17
  module NHKore
20
18
  class DatetimeParser
21
19
  extend AttrBool::Ext
@@ -55,10 +53,10 @@ module NHKore
55
53
  # Assume this millennium.
56
54
  # So if the current year is 2200, and year is 150,
57
55
  # then it will be 2000 + 150 = 2150.
58
- else
56
+ elsif millennium >= 1000
59
57
  # Assume previous millennium (2000 -> 1000),
60
58
  # so year 999 will become 1999.
61
- millennium -= 1000 if millennium >= 1000
59
+ millennium -= 1000
62
60
  end
63
61
 
64
62
  year = millennium + year
@@ -155,7 +153,7 @@ module NHKore
155
153
 
156
154
  attr_reader? :min_or_max
157
155
 
158
- def initialize(year=nil,month=nil,day=nil,hour=nil,min=nil,sec=nil)
156
+ def initialize(year = nil,month = nil,day = nil,hour = nil,min = nil,sec = nil)
159
157
  super()
160
158
 
161
159
  set!(year,month,day,hour,min,sec)
@@ -185,66 +183,66 @@ module NHKore
185
183
  @sec = other.sec unless @has_sec
186
184
  has_small = true
187
185
  else
188
- if has_small
189
- @sec = jst_now.sec
190
- else
191
- @sec = is_from ? 0 : 59
192
- end
186
+ @sec = if has_small
187
+ jst_now.sec
188
+ else
189
+ is_from ? 0 : 59
190
+ end
193
191
  end
194
192
 
195
193
  if @has_min || other.has_min?
196
194
  @min = other.min unless @has_min
197
195
  has_small = true
198
196
  else
199
- if has_small
200
- @min = jst_now.min
201
- else
202
- @min = is_from ? 0 : 59
203
- end
197
+ @min = if has_small
198
+ jst_now.min
199
+ else
200
+ is_from ? 0 : 59
201
+ end
204
202
  end
205
203
 
206
204
  if @has_hour || other.has_hour?
207
205
  @hour = other.hour unless @has_hour
208
206
  has_small = true
209
207
  else
210
- if has_small
211
- @hour = jst_now.hour
212
- else
213
- @hour = is_from ? 0 : 23
214
- end
208
+ @hour = if has_small
209
+ jst_now.hour
210
+ else
211
+ is_from ? 0 : 23
212
+ end
215
213
  end
216
214
 
217
215
  if @has_day || other.has_day?
218
216
  @day = other.day unless @has_day
219
217
  has_small = true
220
218
  else
221
- if has_small
222
- @day = jst_now.day
223
- else
224
- @day = is_from ? 1 : :last_day
225
- end
219
+ @day = if has_small
220
+ jst_now.day
221
+ else
222
+ is_from ? 1 : :last_day
223
+ end
226
224
  end
227
225
 
228
226
  if @has_month || other.has_month?
229
227
  @month = other.month unless @has_month
230
228
  has_small = true
231
229
  else
232
- if has_small
233
- @month = jst_now.month
234
- else
235
- @month = is_from ? 1 : 12
236
- end
230
+ @month = if has_small
231
+ jst_now.month
232
+ else
233
+ is_from ? 1 : 12
234
+ end
237
235
  end
238
236
 
239
237
  if @has_year || other.has_year?
240
238
  @year = other.year unless @has_year
241
239
  has_small = true # rubocop:disable Lint/UselessAssignment
242
240
  else
243
- if has_small
244
- @year = jst_now.year
245
- else
246
- @year = is_from ? Util::MIN_SANE_YEAR : jst_now.year
247
- end
241
+ @year = if has_small
242
+ jst_now.year
243
+ else
244
+ is_from ? Util::MIN_SANE_YEAR : jst_now.year
245
+ end
248
246
  end
249
247
 
250
248
  # Must be after setting @year & @month.
@@ -289,7 +287,7 @@ module NHKore
289
287
  return self
290
288
  end
291
289
 
292
- def set!(year=nil,month=nil,day=nil,hour=nil,min=nil,sec=nil)
290
+ def set!(year = nil,month = nil,day = nil,hour = nil,min = nil,sec = nil)
293
291
  @year = year
294
292
  @month = month
295
293
  @day = day
data/lib/nhkore/defn.rb CHANGED
@@ -3,18 +3,16 @@
3
3
 
4
4
  #--
5
5
  # This file is part of NHKore.
6
- # Copyright (c) 2020-2021 Jonathan Bradley Whited
6
+ # Copyright (c) 2020 Bradley Whited
7
7
  #
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  require 'nokogiri'
13
12
 
14
13
  require 'nhkore/util'
15
14
  require 'nhkore/word'
16
15
 
17
-
18
16
  module NHKore
19
17
  class Defn
20
18
  attr_reader :hyoukis
@@ -35,7 +33,7 @@ module NHKore
35
33
 
36
34
  hyoukis = hash['hyouki']
37
35
 
38
- hyoukis&.each() do |hyouki|
36
+ hyoukis&.each do |hyouki|
39
37
  next if hyouki.nil?
40
38
  next if (hyouki = Util.strip_web_str(hyouki)).empty?
41
39
 
data/lib/nhkore/dict.rb CHANGED
@@ -3,16 +3,14 @@
3
3
 
4
4
  #--
5
5
  # This file is part of NHKore.
6
- # Copyright (c) 2020-2021 Jonathan Bradley Whited
6
+ # Copyright (c) 2020 Bradley Whited
7
7
  #
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  require 'nhkore/entry'
13
12
  require 'nhkore/error'
14
13
 
15
-
16
14
  module NHKore
17
15
  class Dict
18
16
  attr_reader :entries
@@ -3,18 +3,16 @@
3
3
 
4
4
  #--
5
5
  # This file is part of NHKore.
6
- # Copyright (c) 2020-2022 Jonathan Bradley Whited
6
+ # Copyright (c) 2020 Bradley Whited
7
7
  #
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  require 'nhkore/dict'
13
12
  require 'nhkore/error'
14
13
  require 'nhkore/scraper'
15
14
  require 'nhkore/util'
16
15
 
17
-
18
16
  module NHKore
19
17
  class DictScraper < Scraper
20
18
  attr_accessor :missingno
data/lib/nhkore/entry.rb CHANGED
@@ -3,16 +3,14 @@
3
3
 
4
4
  #--
5
5
  # This file is part of NHKore.
6
- # Copyright (c) 2020-2021 Jonathan Bradley Whited
6
+ # Copyright (c) 2020 Bradley Whited
7
7
  #
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  require 'nhkore/defn'
13
12
  require 'nhkore/util'
14
13
 
15
-
16
14
  module NHKore
17
15
  class Entry
18
16
  HYOUKI_SEP = '・'
data/lib/nhkore/error.rb CHANGED
@@ -3,12 +3,11 @@
3
3
 
4
4
  #--
5
5
  # This file is part of NHKore.
6
- # Copyright (c) 2020-2021 Jonathan Bradley Whited
6
+ # Copyright (c) 2020 Bradley Whited
7
7
  #
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  module NHKore
13
12
  class Error < ::StandardError; end
14
13
 
@@ -3,12 +3,11 @@
3
3
 
4
4
  #--
5
5
  # This file is part of NHKore.
6
- # Copyright (c) 2020-2021 Jonathan Bradley Whited
6
+ # Copyright (c) 2020 Bradley Whited
7
7
  #
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  module NHKore
13
12
  module Fileable
14
13
  def self.included(mod)
data/lib/nhkore/lib.rb CHANGED
@@ -3,11 +3,15 @@
3
3
 
4
4
  #--
5
5
  # This file is part of NHKore.
6
- # Copyright (c) 2020-2021 Jonathan Bradley Whited
6
+ # Copyright (c) 2020 Bradley Whited
7
7
  #
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
+ ###
12
+ # Include this file to only require the files needed to use this
13
+ # Gem as a library (i.e., don't include CLI-related files).
14
+ ###
11
15
 
12
16
  require 'nhkore/article'
13
17
  require 'nhkore/article_scraper'
@@ -27,18 +31,7 @@ require 'nhkore/search_link'
27
31
  require 'nhkore/search_scraper'
28
32
  require 'nhkore/sifter'
29
33
  require 'nhkore/splitter'
30
- require 'nhkore/user_agents'
31
34
  require 'nhkore/util'
32
35
  require 'nhkore/variator'
33
36
  require 'nhkore/version'
34
37
  require 'nhkore/word'
35
-
36
-
37
- module NHKore
38
- ###
39
- # Include this file to only require the files needed to use this
40
- # Gem as a library (i.e., don't include CLI-related files).
41
- ###
42
- module Lib
43
- end
44
- end
@@ -3,15 +3,13 @@
3
3
 
4
4
  #--
5
5
  # This file is part of NHKore.
6
- # Copyright (c) 2020-2021 Jonathan Bradley Whited
6
+ # Copyright (c) 2020 Bradley Whited
7
7
  #
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  require 'nhkore/util'
13
12
 
14
-
15
13
  module NHKore
16
14
  class Missingno
17
15
  attr_reader :kanas
data/lib/nhkore/news.rb CHANGED
@@ -3,18 +3,16 @@
3
3
 
4
4
  #--
5
5
  # This file is part of NHKore.
6
- # Copyright (c) 2020-2021 Jonathan Bradley Whited
6
+ # Copyright (c) 2020 Bradley Whited
7
7
  #
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  require 'nhkore/article'
13
12
  require 'nhkore/error'
14
13
  require 'nhkore/fileable'
15
14
  require 'nhkore/util'
16
15
 
17
-
18
16
  module NHKore
19
17
  class News
20
18
  include Fileable
@@ -60,14 +58,14 @@ module NHKore
60
58
  coder[:articles] = @articles
61
59
  end
62
60
 
63
- def self.load_data(data,article_class: Article,file: nil,news_class: News,overwrite: false,**kargs)
61
+ def self.load_data(data,article_class: Article,file: nil,news_class: News,overwrite: false,**_kargs)
64
62
  data = Util.load_yaml(data,file: file)
65
63
 
66
64
  articles = data[:articles]
67
65
 
68
66
  news = news_class.new
69
67
 
70
- articles&.each() do |key,hash|
68
+ articles&.each do |key,hash|
71
69
  key = key.to_s # Change from a symbol
72
70
  news.add_article(article_class.load_data(key,hash),key: key,overwrite: overwrite)
73
71
  end
@@ -99,7 +97,6 @@ module NHKore
99
97
  @articles.each_value do |a|
100
98
  if a.sha256 == sha256
101
99
  article = a
102
-
103
100
  break
104
101
  end
105
102
  end
@@ -131,11 +128,11 @@ module NHKore
131
128
  return News.load_data(data,article_class: Article,news_class: FutsuuNews,**kargs)
132
129
  end
133
130
 
134
- def self.load_file(file=DEFAULT_FILE,**kargs)
131
+ def self.load_file(file = DEFAULT_FILE,**kargs)
135
132
  return News.load_file(file,article_class: Article,news_class: FutsuuNews,**kargs)
136
133
  end
137
134
 
138
- def save_file(file=DEFAULT_FILE,**kargs)
135
+ def save_file(file = DEFAULT_FILE,**kargs)
139
136
  super
140
137
  end
141
138
  end
@@ -148,11 +145,11 @@ module NHKore
148
145
  return News.load_data(data,article_class: Article,news_class: YasashiiNews,**kargs)
149
146
  end
150
147
 
151
- def self.load_file(file=DEFAULT_FILE,**kargs)
148
+ def self.load_file(file = DEFAULT_FILE,**kargs)
152
149
  return News.load_file(file,article_class: Article,news_class: YasashiiNews,**kargs)
153
150
  end
154
151
 
155
- def save_file(file=DEFAULT_FILE,**kargs)
152
+ def save_file(file = DEFAULT_FILE,**kargs)
156
153
  super
157
154
  end
158
155
  end
@@ -3,15 +3,13 @@
3
3
 
4
4
  #--
5
5
  # This file is part of NHKore.
6
- # Copyright (c) 2020-2021 Jonathan Bradley Whited
6
+ # Copyright (c) 2020 Bradley Whited
7
7
  #
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  require 'nhkore/word'
13
12
 
14
-
15
13
  module NHKore
16
14
  class Polisher
17
15
  def begin_polish(str)
@@ -3,30 +3,36 @@
3
3
 
4
4
  #--
5
5
  # This file is part of NHKore.
6
- # Copyright (c) 2020-2021 Jonathan Bradley Whited
6
+ # Copyright (c) 2020 Bradley Whited
7
7
  #
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  require 'attr_bool'
13
12
  require 'nokogiri'
14
13
  require 'open-uri'
14
+ require 'ronin/web/user_agents'
15
15
 
16
16
  require 'nhkore/error'
17
- require 'nhkore/user_agents'
18
17
  require 'nhkore/util'
19
18
 
20
-
21
19
  module NHKore
22
20
  class Scraper
23
21
  extend AttrBool::Ext
24
22
 
25
23
  DEFAULT_HEADER = {
26
- 'user-agent' => UserAgents.sample,
27
- 'accept' => 'text/html,application/xhtml+xml,application/xml,application/rss+xml,text/xml;image/webp' \
28
- ',image/apng,*/*;application/signed-exchange',
24
+ # See for better ones:
25
+ # - https://www.useragentstring.com/pages/Chrome/
26
+ 'user-agent' => Ronin::Web::UserAgents.random,
27
+
28
+ 'accept' => 'text/html,application/xhtml+xml,application/xml,application/rss+xml,text/xml;' \
29
+ 'q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
30
+ 'accept-language' => 'en;q=0.9,ja-JP;q=0.8,ja',
31
+ 'cache-control' => 'max-age=0',
29
32
  'dnt' => '1',
33
+ 'ect' => '4g',
34
+ 'priority' => 'u=0, i',
35
+ 'upgrade-insecure-requests' => '1',
30
36
  }.freeze
31
37
 
32
38
  attr_accessor? :eat_cookie
@@ -48,7 +54,7 @@ module NHKore
48
54
  # (time-consuming) operation since it opens the URL again, but necessary for some URLs.
49
55
  # @param redirect_rule [nil,:lenient,:strict]
50
56
  def initialize(url,eat_cookie: false,header: nil,is_file: false,max_redirects: 3,max_retries: 3,
51
- redirect_rule: :strict,str_or_io: nil,**kargs)
57
+ redirect_rule: :strict,str_or_io: nil,**kargs)
52
58
  super()
53
59
 
54
60
  if !header.nil? && !is_file
@@ -106,7 +112,7 @@ module NHKore
106
112
  return URI.join(@url,relative_url)
107
113
  end
108
114
 
109
- def open(url,str_or_io=nil,is_file: @is_file)
115
+ def open(url,str_or_io = nil,is_file: @is_file)
110
116
  @is_file = is_file
111
117
  @str_or_io = str_or_io
112
118
  @url = url
@@ -155,16 +161,20 @@ module NHKore
155
161
  case @redirect_rule
156
162
  when :lenient,:strict
157
163
  if redirect_uri.scheme != top_uri.scheme
158
- raise redirect.exception("redirect scheme[#{redirect_uri.scheme}] does not match original " \
159
- "scheme[#{top_uri.scheme}] at redirect URL[#{redirect_uri}]: #{redirect}")
164
+ raise redirect.exception(
165
+ "redirect scheme[#{redirect_uri.scheme}] does not match original " \
166
+ "scheme[#{top_uri.scheme}] at redirect URL[#{redirect_uri}]: #{redirect}"
167
+ )
160
168
  end
161
169
 
162
170
  if @redirect_rule == :strict
163
171
  redirect_domain = Util.domain(redirect_uri.host)
164
172
 
165
173
  if redirect_domain != top_domain
166
- raise redirect.exception("redirect domain[#{redirect_domain}] does not match original " \
167
- "domain[#{top_domain}] at redirect URL[#{redirect_uri}]: #{redirect}")
174
+ raise redirect.exception(
175
+ "redirect domain[#{redirect_domain}] does not match original " \
176
+ "domain[#{top_domain}] at redirect URL[#{redirect_uri}]: #{redirect}"
177
+ )
168
178
  end
169
179
  end
170
180
  end
@@ -3,19 +3,17 @@
3
3
 
4
4
  #--
5
5
  # This file is part of NHKore.
6
- # Copyright (c) 2020-2021 Jonathan Bradley Whited
6
+ # Copyright (c) 2020 Bradley Whited
7
7
  #
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  require 'attr_bool'
13
12
  require 'time'
14
13
 
15
14
  require 'nhkore/fileable'
16
15
  require 'nhkore/util'
17
16
 
18
-
19
17
  module NHKore
20
18
  class SearchLink
21
19
  extend AttrBool::Ext
@@ -49,7 +47,7 @@ module NHKore
49
47
  coder[:sha256] = @sha256
50
48
  end
51
49
 
52
- def self.load_data(key,hash)
50
+ def self.load_data(_key,hash)
53
51
  slink = SearchLink.new(
54
52
  hash[:url],
55
53
  scraped: hash[:scraped],
@@ -74,11 +72,11 @@ module NHKore
74
72
  end
75
73
 
76
74
  def datetime=(value)
77
- if value.is_a?(Time)
78
- @datetime = value
79
- else
80
- @datetime = Util.empty_web_str?(value) ? nil : Time.iso8601(value)
81
- end
75
+ @datetime = if value.is_a?(Time)
76
+ value
77
+ else
78
+ Util.empty_web_str?(value) ? nil : Time.iso8601(value)
79
+ end
82
80
  end
83
81
 
84
82
  def futsuurl=(value)
@@ -143,8 +141,8 @@ module NHKore
143
141
  return self
144
142
  end
145
143
 
146
- def each(&block)
147
- return @links.each(&block)
144
+ def each(&)
145
+ return @links.each(&)
148
146
  end
149
147
 
150
148
  def encode_with(coder)
@@ -153,14 +151,14 @@ module NHKore
153
151
  coder[:links] = @links
154
152
  end
155
153
 
156
- def self.load_data(data,file: nil,**kargs)
154
+ def self.load_data(data,file: nil,**_kargs)
157
155
  data = Util.load_yaml(data,file: file)
158
156
 
159
157
  links = data[:links]
160
158
 
161
159
  slinks = SearchLinks.new
162
160
 
163
- links&.each() do |key,hash|
161
+ links&.each do |key,hash|
164
162
  key = key.to_s unless key.nil?
165
163
  slinks.links[key] = SearchLink.load_data(key,hash)
166
164
  end
@@ -3,12 +3,11 @@
3
3
 
4
4
  #--
5
5
  # This file is part of NHKore.
6
- # Copyright (c) 2020-2021 Jonathan Bradley Whited
6
+ # Copyright (c) 2020 Bradley Whited
7
7
  #
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  require 'net/http'
13
12
  require 'uri'
14
13
 
@@ -17,7 +16,6 @@ require 'nhkore/scraper'
17
16
  require 'nhkore/search_link'
18
17
  require 'nhkore/util'
19
18
 
20
-
21
19
  module NHKore
22
20
  class SearchScraper < Scraper
23
21
  DEFAULT_RESULT_COUNT = 100
@@ -60,8 +58,10 @@ module NHKore
60
58
 
61
59
  # Example: https://www3.nhk.or.jp/news/easy/k10014150691000/k10014150691000.html
62
60
  def fetch_valid_link?(link)
63
- uri = begin
64
- URI(link)
61
+ uri = nil
62
+
63
+ begin
64
+ uri = URI(link)
65
65
  rescue StandardError
66
66
  return false # Bad URL.
67
67
  end
@@ -111,29 +111,40 @@ module NHKore
111
111
  super(url,**kargs)
112
112
  end
113
113
 
114
- def self.build_url(site,count: DEFAULT_RESULT_COUNT,**kargs)
114
+ # FIXME: Bing no longer allows `count`.
115
+ # rubocop:disable Lint/UnusedMethodArgument
116
+ def self.build_url(site,count: DEFAULT_RESULT_COUNT,**_kargs)
115
117
  url = ''.dup
116
118
 
117
119
  url << 'https://www.bing.com/search?'
118
120
  url << URI.encode_www_form(
119
121
  q: "site:#{site}",
120
- count: count
122
+ qs: 'n',
123
+ sp: '-1',
124
+ lq: '0',
125
+ pq: "site:#{site}",
126
+ sc: '1-25',
127
+ sk: '',
128
+ first: '1',
129
+ FORM: 'PERE',
121
130
  )
122
131
 
123
132
  return url
124
133
  end
134
+ # rubocop:enable Lint/UnusedMethodArgument
125
135
 
126
- def scrape(slinks,page=NextPage.new())
127
- next_page,link_count = scrape_html(slinks,page)
136
+ def scrape(slinks,page = NextPage.new())
137
+ next_page,_link_count = scrape_html(slinks,page)
128
138
 
129
- if link_count <= 0
130
- scrape_rss(slinks,page,next_page)
131
- end
139
+ # FIXME: Bing no longer allows RSS pages after the first page.
140
+ # if link_count <= 0
141
+ # scrape_rss(slinks,page,next_page)
142
+ # end
132
143
 
133
144
  return next_page
134
145
  end
135
146
 
136
- def scrape_html(slinks,page,next_page=NextPage.new())
147
+ def scrape_html(slinks,page,next_page = NextPage.new())
137
148
  doc = html_doc
138
149
  link_count = 0
139
150
 
@@ -145,7 +156,7 @@ module NHKore
145
156
 
146
157
  next if ignore_link?(href)
147
158
 
148
- if (md = href.match(/first=(\d+)/))
159
+ if (md = href.match(/first=(\d+)/i)) && href =~ /FORM=PERE/i
149
160
  count = md[1].to_i
150
161
 
151
162
  if count > page.count && (next_page.count < 0 || count < next_page.count)
@@ -161,7 +172,7 @@ module NHKore
161
172
  return [next_page,link_count]
162
173
  end
163
174
 
164
- def scrape_rss(slinks,page,next_page=NextPage.new())
175
+ def scrape_rss(slinks,page,next_page = NextPage.new())
165
176
  link_count = 0
166
177
 
167
178
  if !@is_file