nhkore 0.3.16 → 0.3.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/nhkore/entry.rb CHANGED
@@ -8,11 +8,9 @@
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  require 'nhkore/defn'
13
12
  require 'nhkore/util'
14
13
 
15
-
16
14
  module NHKore
17
15
  class Entry
18
16
  HYOUKI_SEP = '・'
data/lib/nhkore/error.rb CHANGED
@@ -8,7 +8,6 @@
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  module NHKore
13
12
  class Error < ::StandardError; end
14
13
 
@@ -8,7 +8,6 @@
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  module NHKore
13
12
  module Fileable
14
13
  def self.included(mod)
data/lib/nhkore/lib.rb CHANGED
@@ -8,7 +8,6 @@
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  require 'nhkore/article'
13
12
  require 'nhkore/article_scraper'
14
13
  require 'nhkore/cleaner'
@@ -33,7 +32,6 @@ require 'nhkore/variator'
33
32
  require 'nhkore/version'
34
33
  require 'nhkore/word'
35
34
 
36
-
37
35
  module NHKore
38
36
  ###
39
37
  # Include this file to only require the files needed to use this
@@ -8,10 +8,8 @@
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  require 'nhkore/util'
13
12
 
14
-
15
13
  module NHKore
16
14
  class Missingno
17
15
  attr_reader :kanas
data/lib/nhkore/news.rb CHANGED
@@ -8,13 +8,11 @@
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  require 'nhkore/article'
13
12
  require 'nhkore/error'
14
13
  require 'nhkore/fileable'
15
14
  require 'nhkore/util'
16
15
 
17
-
18
16
  module NHKore
19
17
  class News
20
18
  include Fileable
@@ -60,14 +58,14 @@ module NHKore
60
58
  coder[:articles] = @articles
61
59
  end
62
60
 
63
- def self.load_data(data,article_class: Article,file: nil,news_class: News,overwrite: false,**kargs)
61
+ def self.load_data(data,article_class: Article,file: nil,news_class: News,overwrite: false,**_kargs)
64
62
  data = Util.load_yaml(data,file: file)
65
63
 
66
64
  articles = data[:articles]
67
65
 
68
66
  news = news_class.new
69
67
 
70
- articles&.each() do |key,hash|
68
+ articles&.each do |key,hash|
71
69
  key = key.to_s # Change from a symbol
72
70
  news.add_article(article_class.load_data(key,hash),key: key,overwrite: overwrite)
73
71
  end
@@ -99,7 +97,6 @@ module NHKore
99
97
  @articles.each_value do |a|
100
98
  if a.sha256 == sha256
101
99
  article = a
102
-
103
100
  break
104
101
  end
105
102
  end
@@ -131,11 +128,11 @@ module NHKore
131
128
  return News.load_data(data,article_class: Article,news_class: FutsuuNews,**kargs)
132
129
  end
133
130
 
134
- def self.load_file(file=DEFAULT_FILE,**kargs)
131
+ def self.load_file(file = DEFAULT_FILE,**kargs)
135
132
  return News.load_file(file,article_class: Article,news_class: FutsuuNews,**kargs)
136
133
  end
137
134
 
138
- def save_file(file=DEFAULT_FILE,**kargs)
135
+ def save_file(file = DEFAULT_FILE,**kargs)
139
136
  super
140
137
  end
141
138
  end
@@ -148,11 +145,11 @@ module NHKore
148
145
  return News.load_data(data,article_class: Article,news_class: YasashiiNews,**kargs)
149
146
  end
150
147
 
151
- def self.load_file(file=DEFAULT_FILE,**kargs)
148
+ def self.load_file(file = DEFAULT_FILE,**kargs)
152
149
  return News.load_file(file,article_class: Article,news_class: YasashiiNews,**kargs)
153
150
  end
154
151
 
155
- def save_file(file=DEFAULT_FILE,**kargs)
152
+ def save_file(file = DEFAULT_FILE,**kargs)
156
153
  super
157
154
  end
158
155
  end
@@ -8,10 +8,8 @@
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  require 'nhkore/word'
13
12
 
14
-
15
13
  module NHKore
16
14
  class Polisher
17
15
  def begin_polish(str)
@@ -8,7 +8,6 @@
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  require 'attr_bool'
13
12
  require 'nokogiri'
14
13
  require 'open-uri'
@@ -17,7 +16,6 @@ require 'nhkore/error'
17
16
  require 'nhkore/user_agents'
18
17
  require 'nhkore/util'
19
18
 
20
-
21
19
  module NHKore
22
20
  class Scraper
23
21
  extend AttrBool::Ext
@@ -48,7 +46,7 @@ module NHKore
48
46
  # (time-consuming) operation since it opens the URL again, but necessary for some URLs.
49
47
  # @param redirect_rule [nil,:lenient,:strict]
50
48
  def initialize(url,eat_cookie: false,header: nil,is_file: false,max_redirects: 3,max_retries: 3,
51
- redirect_rule: :strict,str_or_io: nil,**kargs)
49
+ redirect_rule: :strict,str_or_io: nil,**kargs)
52
50
  super()
53
51
 
54
52
  if !header.nil? && !is_file
@@ -106,7 +104,7 @@ module NHKore
106
104
  return URI.join(@url,relative_url)
107
105
  end
108
106
 
109
- def open(url,str_or_io=nil,is_file: @is_file)
107
+ def open(url,str_or_io = nil,is_file: @is_file)
110
108
  @is_file = is_file
111
109
  @str_or_io = str_or_io
112
110
  @url = url
@@ -155,16 +153,20 @@ module NHKore
155
153
  case @redirect_rule
156
154
  when :lenient,:strict
157
155
  if redirect_uri.scheme != top_uri.scheme
158
- raise redirect.exception("redirect scheme[#{redirect_uri.scheme}] does not match original " \
159
- "scheme[#{top_uri.scheme}] at redirect URL[#{redirect_uri}]: #{redirect}")
156
+ raise redirect.exception(
157
+ "redirect scheme[#{redirect_uri.scheme}] does not match original " \
158
+ "scheme[#{top_uri.scheme}] at redirect URL[#{redirect_uri}]: #{redirect}"
159
+ )
160
160
  end
161
161
 
162
162
  if @redirect_rule == :strict
163
163
  redirect_domain = Util.domain(redirect_uri.host)
164
164
 
165
165
  if redirect_domain != top_domain
166
- raise redirect.exception("redirect domain[#{redirect_domain}] does not match original " \
167
- "domain[#{top_domain}] at redirect URL[#{redirect_uri}]: #{redirect}")
166
+ raise redirect.exception(
167
+ "redirect domain[#{redirect_domain}] does not match original " \
168
+ "domain[#{top_domain}] at redirect URL[#{redirect_uri}]: #{redirect}"
169
+ )
168
170
  end
169
171
  end
170
172
  end
@@ -8,14 +8,12 @@
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  require 'attr_bool'
13
12
  require 'time'
14
13
 
15
14
  require 'nhkore/fileable'
16
15
  require 'nhkore/util'
17
16
 
18
-
19
17
  module NHKore
20
18
  class SearchLink
21
19
  extend AttrBool::Ext
@@ -49,7 +47,7 @@ module NHKore
49
47
  coder[:sha256] = @sha256
50
48
  end
51
49
 
52
- def self.load_data(key,hash)
50
+ def self.load_data(_key,hash)
53
51
  slink = SearchLink.new(
54
52
  hash[:url],
55
53
  scraped: hash[:scraped],
@@ -74,11 +72,11 @@ module NHKore
74
72
  end
75
73
 
76
74
  def datetime=(value)
77
- if value.is_a?(Time)
78
- @datetime = value
79
- else
80
- @datetime = Util.empty_web_str?(value) ? nil : Time.iso8601(value)
81
- end
75
+ @datetime = if value.is_a?(Time)
76
+ value
77
+ else
78
+ Util.empty_web_str?(value) ? nil : Time.iso8601(value)
79
+ end
82
80
  end
83
81
 
84
82
  def futsuurl=(value)
@@ -143,8 +141,8 @@ module NHKore
143
141
  return self
144
142
  end
145
143
 
146
- def each(&block)
147
- return @links.each(&block)
144
+ def each(&)
145
+ return @links.each(&)
148
146
  end
149
147
 
150
148
  def encode_with(coder)
@@ -153,14 +151,14 @@ module NHKore
153
151
  coder[:links] = @links
154
152
  end
155
153
 
156
- def self.load_data(data,file: nil,**kargs)
154
+ def self.load_data(data,file: nil,**_kargs)
157
155
  data = Util.load_yaml(data,file: file)
158
156
 
159
157
  links = data[:links]
160
158
 
161
159
  slinks = SearchLinks.new
162
160
 
163
- links&.each() do |key,hash|
161
+ links&.each do |key,hash|
164
162
  key = key.to_s unless key.nil?
165
163
  slinks.links[key] = SearchLink.load_data(key,hash)
166
164
  end
@@ -8,7 +8,6 @@
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  require 'net/http'
13
12
  require 'uri'
14
13
 
@@ -17,7 +16,6 @@ require 'nhkore/scraper'
17
16
  require 'nhkore/search_link'
18
17
  require 'nhkore/util'
19
18
 
20
-
21
19
  module NHKore
22
20
  class SearchScraper < Scraper
23
21
  DEFAULT_RESULT_COUNT = 100
@@ -60,8 +58,10 @@ module NHKore
60
58
 
61
59
  # Example: https://www3.nhk.or.jp/news/easy/k10014150691000/k10014150691000.html
62
60
  def fetch_valid_link?(link)
63
- uri = begin
64
- URI(link)
61
+ uri = nil
62
+
63
+ begin
64
+ uri = URI(link)
65
65
  rescue StandardError
66
66
  return false # Bad URL.
67
67
  end
@@ -111,19 +111,21 @@ module NHKore
111
111
  super(url,**kargs)
112
112
  end
113
113
 
114
- def self.build_url(site,count: DEFAULT_RESULT_COUNT,**kargs)
114
+ # rubocop:disable Lint/UnusedMethodArgument
115
+ def self.build_url(site,count: DEFAULT_RESULT_COUNT,**_kargs)
115
116
  url = ''.dup
116
117
 
117
118
  url << 'https://www.bing.com/search?'
118
119
  url << URI.encode_www_form(
119
120
  q: "site:#{site}",
120
- count: count
121
+ # count: count # FIXME: `count` is no longer allowed on Bing.
121
122
  )
122
123
 
123
124
  return url
124
125
  end
126
+ # rubocop:enable Lint/UnusedMethodArgument
125
127
 
126
- def scrape(slinks,page=NextPage.new())
128
+ def scrape(slinks,page = NextPage.new())
127
129
  next_page,link_count = scrape_html(slinks,page)
128
130
 
129
131
  if link_count <= 0
@@ -133,7 +135,7 @@ module NHKore
133
135
  return next_page
134
136
  end
135
137
 
136
- def scrape_html(slinks,page,next_page=NextPage.new())
138
+ def scrape_html(slinks,page,next_page = NextPage.new())
137
139
  doc = html_doc
138
140
  link_count = 0
139
141
 
@@ -161,7 +163,7 @@ module NHKore
161
163
  return [next_page,link_count]
162
164
  end
163
165
 
164
- def scrape_rss(slinks,page,next_page=NextPage.new())
166
+ def scrape_rss(slinks,page,next_page = NextPage.new())
165
167
  link_count = 0
166
168
 
167
169
  if !@is_file
data/lib/nhkore/sifter.rb CHANGED
@@ -8,12 +8,10 @@
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  require 'nhkore/article'
13
12
  require 'nhkore/fileable'
14
13
  require 'nhkore/util'
15
14
 
16
-
17
15
  module NHKore
18
16
  class Sifter
19
17
  include Fileable
@@ -87,7 +85,7 @@ module NHKore
87
85
  datetime = article.datetime
88
86
 
89
87
  return true if datetime.nil? ||
90
- datetime < datetime_filter[:from] || datetime > datetime_filter[:to]
88
+ datetime < datetime_filter[:from] || datetime > datetime_filter[:to]
91
89
  end
92
90
 
93
91
  if !title_filter.nil?
@@ -109,7 +107,7 @@ module NHKore
109
107
  return false
110
108
  end
111
109
 
112
- def filter_by_datetime(datetime_filter=nil,from: nil,to: nil)
110
+ def filter_by_datetime(datetime_filter = nil,from: nil,to: nil)
113
111
  if !datetime_filter.nil?
114
112
  if datetime_filter.respond_to?(:[])
115
113
  # If out-of-bounds, just nil.
@@ -234,10 +232,10 @@ module NHKore
234
232
  HTML
235
233
 
236
234
  # If have too few or too many '<col>', invalid HTML.
237
- @output << %Q(<col style="width:6em;">\n) unless @ignores[:freq]
238
- @output << %Q(<col style="width:17em;">\n) unless @ignores[:word]
239
- @output << %Q(<col style="width:17em;">\n) unless @ignores[:kana]
240
- @output << %Q(<col style="width:5em;">\n) unless @ignores[:eng]
235
+ @output << %(<col style="width:6em;">\n) unless @ignores[:freq]
236
+ @output << %(<col style="width:17em;">\n) unless @ignores[:word]
237
+ @output << %(<col style="width:17em;">\n) unless @ignores[:kana]
238
+ @output << %(<col style="width:5em;">\n) unless @ignores[:eng]
241
239
  @output << "<col>\n" unless @ignores[:defn] # No width for defn, fills rest of page
242
240
 
243
241
  @output << '<tr>'
@@ -8,10 +8,8 @@
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  require 'nhkore/util'
13
12
 
14
-
15
13
  module NHKore
16
14
  class Splitter
17
15
  def begin_split(str)
@@ -8,7 +8,6 @@
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  if $PROGRAM_NAME == __FILE__
13
12
  require 'bundler/inline'
14
13
 
data/lib/nhkore/util.rb CHANGED
@@ -8,13 +8,11 @@
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  require 'cgi'
13
12
  require 'set'
14
13
  require 'time'
15
14
  require 'uri'
16
15
 
17
-
18
16
  module NHKore
19
17
  module Util
20
18
  CORE_DIR = 'core'
@@ -64,7 +62,8 @@ module NHKore
64
62
 
65
63
  stylers = Array(stylers)
66
64
 
67
- return Psychgus.dump(obj,
65
+ return Psychgus.dump(
66
+ obj,
68
67
  deref_aliases: true, # Dereference aliases for load_yaml()
69
68
  header: true, # %YAML [version]
70
69
  line_width: 10_000, # Try not to wrap; ichiman!
@@ -117,10 +116,11 @@ module NHKore
117
116
  def self.load_yaml(data,file: nil,**kargs)
118
117
  require 'psychgus'
119
118
 
120
- return Psych.safe_load(data,
119
+ return Psych.safe_load(
120
+ data,
121
121
  aliases: false,
122
122
  filename: file,
123
- #freeze: true, # Not in this current version of Psych
123
+ # freeze: true, # Not in this current version of Psych
124
124
  permitted_classes: [Symbol],
125
125
  symbolize_names: true,
126
126
  **kargs,
@@ -180,8 +180,8 @@ module NHKore
180
180
  # String's normal strip() method doesn't work with special Unicode/HTML white space.
181
181
  def self.strip_web_str(str)
182
182
  # After testing with Benchmark, this is slower than one regex.
183
- #str = str.gsub(/\A[[:space:]]+/,'')
184
- #str = str.gsub(/[[:space:]]+\z/,'')
183
+ # str = str.gsub(/\A[[:space:]]+/,'')
184
+ # str = str.gsub(/[[:space:]]+\z/,'')
185
185
 
186
186
  str = str.gsub(STRIP_WEB_STR_REGEX,'')
187
187
 
@@ -8,7 +8,6 @@
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  module NHKore
13
12
  class Variator
14
13
  def begin_variate(str)
@@ -24,7 +23,7 @@ module NHKore
24
23
  end
25
24
 
26
25
  class BasicVariator < Variator
27
- def end_variate(str)
26
+ def end_variate(_str)
28
27
  return [] # No variations; don't return nil
29
28
  end
30
29
  end
@@ -49,7 +48,7 @@ module NHKore
49
48
  def end_variate(str)
50
49
  guess = @deinflector.deinflect(str)
51
50
 
52
- return [] if guess.length < 1
51
+ return [] if guess.empty?
53
52
  return [] if (guess = guess[0])[:weight] < 0.5
54
53
 
55
54
  return [guess[:word]]
@@ -8,7 +8,6 @@
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  module NHKore
13
- VERSION = '0.3.16'
12
+ VERSION = '0.3.18'
14
13
  end
data/lib/nhkore/word.rb CHANGED
@@ -8,13 +8,11 @@
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  require 'nokogiri'
13
12
 
14
13
  require 'nhkore/error'
15
14
  require 'nhkore/util'
16
15
 
17
-
18
16
  module NHKore
19
17
  class Word
20
18
  attr_accessor :defn
@@ -24,7 +22,7 @@ module NHKore
24
22
  attr_reader :kanji
25
23
  attr_reader :key
26
24
 
27
- def initialize(defn: nil,eng: nil,freq: 1,kana: nil,kanji: nil,unknown: nil,word: nil,**kargs)
25
+ def initialize(defn: nil,eng: nil,freq: 1,kana: nil,kanji: nil,unknown: nil,word: nil,**_kargs)
28
26
  super()
29
27
 
30
28
  if !word.nil?
@@ -110,14 +108,14 @@ module NHKore
110
108
  # First, try <rb> tags.
111
109
  kanjis = tag.css('rb')
112
110
  # Second, try text nodes.
113
- kanjis = tag.search('./text()') if kanjis.length < 1
111
+ kanjis = tag.search('./text()') if kanjis.empty?
114
112
  # Third, try non-<rt> tags, in case of being surrounded by <span>, <b>, etc.
115
- kanjis = tag.search("./*[not(name()='rt')]") if kanjis.length < 1
113
+ kanjis = tag.search("./*[not(name()='rt')]") if kanjis.empty?
116
114
 
117
115
  kanas = tag.css('rt')
118
116
 
119
- raise ScrapeError,"no kanji at URL[#{url}] in tag[#{tag}]" if kanjis.length < 1
120
- raise ScrapeError,"no kana at URL[#{url}] in tag[#{tag}]" if kanas.length < 1
117
+ raise ScrapeError,"no kanji at URL[#{url}] in tag[#{tag}]" if kanjis.empty?
118
+ raise ScrapeError,"no kana at URL[#{url}] in tag[#{tag}]" if kanas.empty?
121
119
 
122
120
  if kanjis.length != kanas.length
123
121
  raise ScrapeError,"number of kanji & kana mismatch at URL[#{url}] in tag[#{tag}]"
@@ -130,7 +128,7 @@ module NHKore
130
128
  kana = kanas[i].text
131
129
 
132
130
  # Uncomment for debugging; really need a logger.
133
- #puts "Word[#{i}]: #{kanji} => #{kana}"
131
+ # puts "Word[#{i}]: #{kanji} => #{kana}"
134
132
 
135
133
  if !missingno.nil?
136
134
  # Check kana first, since this is the typical scenario.
@@ -162,7 +160,7 @@ module NHKore
162
160
 
163
161
  # Do not clean and/or strip spaces, as the raw text is important for
164
162
  # Defn and ArticleScraper.
165
- def self.scrape_text_node(tag,url: nil)
163
+ def self.scrape_text_node(tag,url: nil) # rubocop:disable Lint/UnusedMethodArgument
166
164
  text = tag.text
167
165
 
168
166
  # No error; empty text is fine (not strictly kanji/kana only).
data/lib/nhkore.rb CHANGED
@@ -8,7 +8,6 @@
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  TESTING = ($PROGRAM_NAME == __FILE__)
13
12
 
14
13
  if TESTING
@@ -25,9 +24,8 @@ require 'nhkore/cli/news_cmd'
25
24
  require 'nhkore/cli/search_cmd'
26
25
  require 'nhkore/cli/sift_cmd'
27
26
 
28
-
29
27
  module NHKore
30
- def self.run(args=ARGV)
28
+ def self.run(args = ARGV)
31
29
  app = App.new(args)
32
30
 
33
31
  begin