RubyGems - nhkore - Versions diffs - 0.3.4 → 0.3.9 - Mend

nhkore 0.3.4 → 0.3.9

Files changed (44) hide show

checksums.yaml +4 -4
data/.yardopts +3 -0
data/CHANGELOG.md +82 -2
data/Gemfile +0 -18
data/Gemfile.lock +89 -0
data/README.md +23 -19
data/Rakefile +53 -52
data/bin/nhkore +4 -15
data/lib/nhkore.rb +8 -20
data/lib/nhkore/app.rb +237 -236
data/lib/nhkore/article.rb +56 -53
data/lib/nhkore/article_scraper.rb +308 -289
data/lib/nhkore/cleaner.rb +20 -32
data/lib/nhkore/cli/fx_cmd.rb +41 -53
data/lib/nhkore/cli/get_cmd.rb +59 -70
data/lib/nhkore/cli/news_cmd.rb +143 -153
data/lib/nhkore/cli/search_cmd.rb +108 -118
data/lib/nhkore/cli/sift_cmd.rb +109 -120
data/lib/nhkore/datetime_parser.rb +89 -103
data/lib/nhkore/defn.rb +48 -55
data/lib/nhkore/dict.rb +26 -38
data/lib/nhkore/dict_scraper.rb +31 -40
data/lib/nhkore/entry.rb +43 -55
data/lib/nhkore/error.rb +16 -21
data/lib/nhkore/fileable.rb +10 -21
data/lib/nhkore/lib.rb +5 -17
data/lib/nhkore/missingno.rb +21 -33
data/lib/nhkore/news.rb +61 -66
data/lib/nhkore/polisher.rb +22 -34
data/lib/nhkore/scraper.rb +75 -82
data/lib/nhkore/search_link.rb +85 -78
data/lib/nhkore/search_scraper.rb +89 -92
data/lib/nhkore/sifter.rb +157 -171
data/lib/nhkore/splitter.rb +19 -31
data/lib/nhkore/user_agents.rb +28 -32
data/lib/nhkore/util.rb +72 -84
data/lib/nhkore/variator.rb +20 -32
data/lib/nhkore/version.rb +4 -16
data/lib/nhkore/word.rb +105 -99
data/nhkore.gemspec +57 -64
data/samples/looper.rb +71 -0
data/test/nhkore/test_helper.rb +3 -15
data/test/nhkore_test.rb +6 -18
metadata +50 -28

data/lib/nhkore/article.rb CHANGED Viewed

@@ -1,23 +1,11 @@
-#!/usr/bin/env ruby
 # encoding: UTF-8
 # frozen_string_literal: true
 #--
 # This file is part of NHKore.
-# Copyright (c) 2020 Jonathan Bradley Whited (@esotericpig)
-#
-# NHKore is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# NHKore is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with NHKore.  If not, see <https://www.gnu.org/licenses/>.
+# Copyright (c) 2020-2021 Jonathan Bradley Whited
+#
+# SPDX-License-Identifier: LGPL-3.0-or-later
 #++
@@ -29,20 +17,20 @@ require 'nhkore/word'
 module NHKore
   ###
-  # @author Jonathan Bradley Whited (@esotericpig)
+  # @author Jonathan Bradley Whited
   # @since  0.2.0
   ###
   class Article
-    attr_accessor :datetime
-    attr_accessor :futsuurl
+    attr_reader :datetime
+    attr_reader :futsuurl
     attr_accessor :sha256
     attr_accessor :title
-    attr_accessor :url
+    attr_reader :url
     attr_reader :words
-    def initialize()
+    def initialize
       super()
       @datetime = nil
       @futsuurl = nil
       @sha256 = nil
@@ -50,7 +38,7 @@ module NHKore
       @url = nil
       @words = {}
     end
     # Why does this not look up the kanji/kana only and then update the other
     # kana/kanji part appropriately?
     # - There are some words like +行って+. Without the kana, it's difficult to
@@ -60,70 +48,85 @@ module NHKore
     #   try to populate the other value.
     def add_word(word,use_freq: false)
       curr_word = words[word.key]
-      if curr_word.nil?()
+      if curr_word.nil?
         words[word.key] = word
         curr_word = word
       else
         curr_word.freq += (use_freq ? word.freq : 1)
-        curr_word.defn = word.defn if word.defn.to_s().length > curr_word.defn.to_s().length
-        curr_word.eng = word.eng if word.eng.to_s().length > curr_word.eng.to_s().length
+        curr_word.defn = word.defn if word.defn.to_s.length > curr_word.defn.to_s.length
+        curr_word.eng = word.eng if word.eng.to_s.length > curr_word.eng.to_s.length
       end
       return curr_word
     end
     def encode_with(coder)
       # Order matters.
-      coder[:datetime] = @datetime.nil?() ? @datetime : @datetime.iso8601()
+      coder[:datetime] = @datetime.nil? ? @datetime : @datetime.iso8601
       coder[:title] = @title
-      coder[:url] = @url
-      coder[:futsuurl] = @futsuurl
+      coder[:url] = @url.nil? ? nil : @url.to_s
+      coder[:futsuurl] = @futsuurl.nil? ? nil : @futsuurl.to_s
       coder[:sha256] = @sha256
       coder[:words] = @words
     end
     def self.load_data(key,hash)
-      datetime = hash[:datetime]
       words = hash[:words]
-      article = Article.new()
-      article.datetime = Util.empty_web_str?(datetime) ? nil : Time.iso8601(datetime)
+      article = Article.new
+      article.datetime = hash[:datetime]
       article.futsuurl = hash[:futsuurl]
       article.sha256 = hash[:sha256]
       article.title = hash[:title]
       article.url = hash[:url]
-      if !words.nil?()
-        words.each() do |k,h|
-          k = k.to_s() # Change from a symbol
-          article.words[k] = Word.load_data(k,h)
-        end
+      words&.each() do |k,h|
+        k = k.to_s # Change from a symbol
+        article.words[k] = Word.load_data(k,h)
       end
       return article
     end
+    def datetime=(value)
+      if value.is_a?(Time)
+        @datetime = value
+      else
+        @datetime = Util.empty_web_str?(value) ? nil : Time.iso8601(value)
+      end
+    end
+    def futsuurl=(value)
+      # Don't store URI, store String.
+      @futsuurl = value.nil? ? nil : value.to_s
+    end
+    def url=(value)
+      # Don't store URI, store String.
+      @url = value.nil? ? nil : value.to_s
+    end
     def to_s(mini: false)
-      s = ''.dup()
+      s = ''.dup
       s << "'#{@url}':"
       s << "\n  datetime: '#{@datetime}'"
       s << "\n  title:    '#{@title}'"
       s << "\n  url:      '#{@url}'"
       s << "\n  futsuurl: '#{@futsuurl}'"
       s << "\n  sha256:   '#{@sha256}'"
       if !mini
         s << "\n  words:"
-        @words.each() do |key,word|
+        @words.each do |key,word|
           s << "\n    #{word}"
         end
       end
       return s
     end
   end

data/lib/nhkore/article_scraper.rb CHANGED Viewed

@@ -1,23 +1,11 @@
-#!/usr/bin/env ruby
 # encoding: UTF-8
 # frozen_string_literal: true
 #--
 # This file is part of NHKore.
-# Copyright (c) 2020 Jonathan Bradley Whited (@esotericpig)
-#
-# NHKore is free software: you can redistribute it and/or modify
-# it under the terms of the GNU Lesser General Public License as published by
-# the Free Software Foundation, either version 3 of the License, or
-# (at your option) any later version.
-#
-# NHKore is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU Lesser General Public License for more details.
-#
-# You should have received a copy of the GNU Lesser General Public License
-# along with NHKore.  If not, see <https://www.gnu.org/licenses/>.
+# Copyright (c) 2020-2021 Jonathan Bradley Whited
+#
+# SPDX-License-Identifier: LGPL-3.0-or-later
 #++
@@ -39,10 +27,12 @@ require 'nhkore/word'
 module NHKore
   ###
-  # @author Jonathan Bradley Whited (@esotericpig)
+  # @author Jonathan Bradley Whited
   # @since  0.2.0
   ###
   class ArticleScraper < Scraper
+    extend AttrBool::Ext
     attr_reader :cleaners
     attr_accessor :datetime
     attr_accessor :dict
@@ -53,18 +43,20 @@ module NHKore
     attr_accessor? :strict
     attr_reader :variators
     attr_accessor :year
     # @param dict [Dict,:scrape,nil] the {Dict} (dictionary) to use for {Word#defn} (definitions)
     #             [+:scrape+] auto-scrape it using {DictScraper}
     #             [+nil+]     don't scrape/use it
     # @param missingno [Missingno] data to use as a fallback for Ruby words without kana/kanji,
     #                  instead of raising an error
     # @param strict [true,false]
-    def initialize(url,cleaners: [BestCleaner.new()],datetime: nil,dict: :scrape,missingno: nil,polishers: [BestPolisher.new()],splitter: BestSplitter.new(),strict: true,variators: [BestVariator.new()],year: nil,**kargs)
+    def initialize(url,cleaners: [BestCleaner.new],datetime: nil,dict: :scrape,missingno: nil,
+        polishers: [BestPolisher.new],splitter: BestSplitter.new,strict: true,
+        variators: [BestVariator.new],year: nil,**kargs)
       super(url,**kargs)
       @cleaners = Array(cleaners)
-      @datetime = datetime.nil?() ? nil : Util.jst_time(datetime)
+      @datetime = datetime.nil? ? nil : Util.jst_time(datetime)
       @dict = dict
       @kargs = kargs
       @missingno = missingno
@@ -74,20 +66,20 @@ module NHKore
       @variators = Array(variators)
       @year = year
     end
     def add_words(article,words,text)
-      words.each() do |word|
+      words.each do |word|
         # Words should have already been cleaned.
         # If we don't check this, Word.new() could raise an error in polish().
-        next if polish(word.word).empty?()
+        next if polish(word.word).empty?
         article.add_word(polish(word))
-        variate(word.word).each() do |v|
+        variate(word.word).each do |v|
           v = polish(clean(v))
-          next if v.empty?()
+          next if v.empty?
           # Do not pass in "word: word". We only want defn & eng.
           # If we pass in kanji/kana & unknown, it will raise an error.
           article.add_word(Word.new(
@@ -97,513 +89,540 @@ module NHKore
           ))
         end
       end
-      split(text).each() do |t|
+      split(text).each do |t|
         t = polish(clean(t))
-        next if t.empty?()
+        next if t.empty?
         article.add_word(Word.new(unknown: t))
-        variate(t).each() do |v|
+        variate(t).each do |v|
           v = polish(clean(v))
-          next if v.empty?()
+          next if v.empty?
           article.add_word(Word.new(unknown: v))
         end
       end
     end
     def clean(obj)
       return Cleaner.clean_any(obj,@cleaners)
     end
-    def fix_bad_html()
+    def fix_bad_html
       # Fixes:
       # - '<「<' without escaping '<' as '&lt;'
       #   - https://www3.nhk.or.jp/news/easy/k10012118911000/k10012118911000.html
       #   - '</p><br><「<ruby>台風<rt>たいふう</rt></ruby>'
-      read()
-      # To add a new one, simply add '|(...)' on a newline and test $#.
+      read
+      # To add a new one, simply add '|(...)' on a newline and test Regexp.last_match().
       @str_or_io = @str_or_io.gsub(/
-        (\<「\<)
+        (?<cane><「<)
       /x) do |match|
-        if !$1.nil?()
+        if !Regexp.last_match(:cane).nil?
           match = match.sub('<','&lt;')
         end
         match
       end
     end
     def parse_datetime(str,year)
       str = str.gsub(/[\[\][[:space:]]]+/,'') # Remove: [ ] \s
       str = "#{year}年 #{str} #{Util::JST_OFFSET}"
       return Time.strptime(str,'%Y年 %m月%d日%H時%M分 %:z')
     end
     def parse_dicwin_id(str)
       str = str.gsub(/\D+/,'')
-      return nil if str.empty?()
+      return nil if str.empty?
       return str
     end
     def polish(obj)
       return Polisher.polish_any(obj,@polishers)
     end
-    def scrape()
-      scrape_dict()
-      fix_bad_html()
-      article = Article.new()
-      doc = html_doc()
+    def scrape
+      scrape_dict
+      fix_bad_html
+      article = Article.new
+      doc = html_doc
       article.futsuurl = scrape_futsuurl(doc)
       article.datetime = scrape_datetime(doc,article.futsuurl)
       article.sha256 = scrape_content(doc,article)
       article.title = scrape_title(doc,article)
       article.url = @url
       return article
     end
-    def scrape_and_add_words(tag,article,result: ScrapeWordsResult.new())
+    def scrape_and_add_words(tag,article,result: ScrapeWordsResult.new)
       result = scrape_words(tag,result: result)
-      result.polish!()
+      result.polish!
       add_words(article,result.words,result.text)
       return result
     end
     def scrape_content(doc,article)
       tag = doc.css('div#js-article-body')
       tag = doc.css('div.article-main__body') if tag.length < 1
       tag = doc.css('div.article-body') if tag.length < 1
       # - https://www3.nhk.or.jp/news/easy/tsunamikeihou/index.html
       tag = doc.css('div#main') if tag.length < 1 && !@strict
       if tag.length > 0
-        text = Util.unspace_web_str(tag.text.to_s())
-        if !text.empty?()
+        text = Util.unspace_web_str(tag.text.to_s)
+        if !text.empty?
           hexdigest = Digest::SHA256.hexdigest(text)
-          return hexdigest if article.nil?() # For scrape_sha256_only()
+          return hexdigest if article.nil? # For scrape_sha256_only()
           result = scrape_and_add_words(tag,article)
-          return hexdigest if result.words?()
+          return hexdigest if result.words?
         end
       end
       raise ScrapeError,"could not scrape content at URL[#{@url}]"
     end
     def scrape_datetime(doc,futsuurl=nil)
       year = scrape_year(doc,futsuurl)
       # First, try with the id.
       tag_name = 'p#js-article-date'
       tag = doc.css(tag_name)
       if tag.length > 0
         tag_text = tag[0].text
         begin
           datetime = parse_datetime(tag_text,year)
           return datetime
         rescue ArgumentError => e
           # Ignore; try again below.
           Util.warn("could not parse date time[#{tag_text}] from tag[#{tag_name}] at URL[#{@url}]: #{e}")
         end
       end
       # Second, try with the class.
       tag_name = 'p.article-main__date'
       tag = doc.css(tag_name)
       if tag.length > 0
         tag_text = tag[0].text
         begin
           datetime = parse_datetime(tag_text,year)
           return datetime
         rescue ArgumentError => e
           # Ignore; try again below.
           Util.warn("could not parse date time[#{tag_text}] from tag[#{tag_name}] at URL[#{@url}]: #{e}")
         end
         return datetime
       end
       # Third, try body's id.
       # - https://www3.nhk.or.jp/news/easy/article/disaster_earthquake_02.html
       # - 'news20170331_k10010922481000'
       tag = doc.css('body')
       if tag.length > 0
-        tag_id = tag[0]['id'].to_s().split('_',2)
+        tag_id = tag[0]['id'].to_s.split('_',2)
         if tag_id.length > 0
           tag_id = tag_id[0].gsub(/[^[[:digit:]]]+/,'')
           if tag_id.length == 8
             datetime = Time.strptime(tag_id,'%Y%m%d')
             return datetime
           end
         end
       end
       # As a last resort, use our user-defined fallback (if specified).
-      return @datetime unless @datetime.nil?()
+      return @datetime unless @datetime.nil?
       raise ScrapeError,"could not scrape date time at URL[#{@url}]"
     end
-    def scrape_dict()
+    def scrape_dict
       return if @dict != :scrape
       dict_url = DictScraper.parse_url(@url)
       retries = 0
       begin
         scraper = DictScraper.new(dict_url,missingno: @missingno,parse_url: false,**@kargs)
       rescue OpenURI::HTTPError => e
-        if retries == 0 && e.to_s().include?('404')
-          read()
+        if retries == 0 && e.to_s.include?('404')
+          read
           scraper = ArticleScraper.new(@url,str_or_io: @str_or_io,**@kargs)
-          dict_url = scraper.scrape_dict_url_only()
+          dict_url = scraper.scrape_dict_url_only
           retries += 1
           retry
         else
-          raise e.exception("could not scrape dictionary at URL[#{dict_url}]: #{e}")
+          raise e.exception("could not scrape dictionary URL[#{dict_url}] at URL[#{@url}]: #{e}")
         end
       end
-      @dict = scraper.scrape()
+      @dict = scraper.scrape
     end
-    def scrape_dict_url_only()
-      doc = html_doc()
+    def scrape_dict_url_only
+      doc = html_doc
       # - https://www3.nhk.or.jp/news/easy/article/disaster_earthquake_02.html
       # - 'news20170331_k10010922481000'
       tag = doc.css('body')
       if tag.length > 0
-        tag_id = tag[0]['id'].to_s().split('_',2)
+        tag_id = tag[0]['id'].to_s.split('_',2)
         if tag_id.length == 2
           dict_url = Util.strip_web_str(tag_id[1])
-          if !dict_url.empty?()
+          if !dict_url.empty?
             return DictScraper.parse_url(@url,basename: dict_url)
           end
         end
       end
       raise ScrapeError,"could not scrape dictionary URL at URL[#{@url}]"
     end
-    def scrape_dicwin_word(tag,id,result: ScrapeWordsResult.new())
+    def scrape_dicwin_word(tag,id,result: ScrapeWordsResult.new)
       dicwin_result = scrape_words(tag,dicwin: true)
-      return nil unless dicwin_result.words?()
-      kana = ''.dup()
-      kanji = ''.dup()
-      dicwin_result.words.each() do |word|
-        kana << word.kana unless word.kana.nil?()
-        if kanji.empty?()
-          kanji << word.kanji unless word.kanji.nil?()
+      return nil unless dicwin_result.words?
+      kana = ''.dup
+      kanji = ''.dup
+      dicwin_result.words.each do |word|
+        kana << word.kana unless word.kana.nil?
+        if kanji.empty?
+          kanji << word.kanji unless word.kanji.nil?
         else
           kanji << word.word # Add trailing kana (or kanji) to kanji
         end
       end
       entry = nil
       kana = clean(kana)
       kanji = clean(kanji)
-      raise ScrapeError,"empty dicWin word at URL[#{@url}] in tag[#{tag}]" if kana.empty?() && kanji.empty?()
-      if !@dict.nil?()
+      raise ScrapeError,"empty dicWin word at URL[#{@url}] in tag[#{tag}]" if kana.empty? && kanji.empty?
+      if !@dict.nil?
         entry = @dict[id]
-        raise ScrapeError,"no dicWin ID[#{id}] at URL[#{@url}] in dictionary[#{@dict}]" if entry.nil?()
-        entry = entry.to_s()
+        raise ScrapeError,"no dicWin ID[#{id}] at URL[#{@url}] in dictionary[#{@dict}]" if entry.nil?
+        entry = entry.to_s
       end
       word = Word.new(
         defn: entry,
         kana: kana,
         kanji: kanji
       )
       result.add_text(dicwin_result.text) # Don't call dicwin_result.polish!()
       result.add_word(word)
       return word
     end
     def scrape_futsuurl(doc)
       # First, try with the id.
       tag = doc.css('div#js-regular-news-wrapper')
       if tag.length > 0
         link = scrape_link(tag[0])
-        return link unless link.nil?()
+        return link unless link.nil?
       end
       # Second, try with the class.
       tag = doc.css('div.link-to-normal')
       if tag.length > 0
         link = scrape_link(tag[0])
-        return link unless link.nil?()
+        return link unless link.nil?
       end
       # Some sites don't have a futsuurl and need a lenient mode.
       # - https://www3.nhk.or.jp/news/easy/article/disaster_earthquake_02.html
       warn_or_error(ScrapeError,"could not scrape futsuurl at URL[#{@url}]")
       return nil
     end
     def scrape_link(tag)
       link = tag.css('a')
       return nil if link.length < 1
-      link = Util.unspace_web_str(link[0]['href'].to_s())
-      return nil if link.empty?()
+      link = Util.unspace_web_str(link[0]['href'].to_s)
+      return nil if link.empty?
       return link
     end
-    def scrape_ruby_word(tag,result: ScrapeWordsResult.new())
-      word = Word.scrape_ruby_tag(tag,missingno: @missingno,url: @url)
-      return nil if word.nil?()
+    # @since 0.3.8
+    # @see https://www3.nhk.or.jp/news/easy/k10012759201000/k10012759201000.html
+    def scrape_ruby_words(tag,result: ScrapeWordsResult.new)
+      words = Word.scrape_ruby_tag(tag,missingno: @missingno,url: @url)
+      final_words = []
+      return final_words if words.nil?
+      words.each do |word|
+        final_words << scrape_ruby_word(word,result: result)
+      end
+      return final_words
+    end
+    def scrape_ruby_word(word,result: ScrapeWordsResult.new)
       # No cleaning; raw text.
       # Do not add kana to the text.
       result.add_text(word.kanji)
       kanji = clean(word.kanji)
       kana = clean(word.kana)
-      if !@missingno.nil?()
+      # Even though Word.scrape_ruby_tag() also does this,
+      #   check it again after cleaning above.
+      if !@missingno.nil?
         # Check kana first, since this is the typical scenario.
         # - https://www3.nhk.or.jp/news/easy/k10012331311000/k10012331311000.html
         # - '窓' in '（８）窓を開けて外の空気を入れましょう'
-        if kana.empty?()
+        if kana.empty?
           kana = @missingno.kana_from_kanji(kanji)
-          kana = kana.nil?() ? '' : clean(kana)
-          if !kana.empty?()
+          kana = kana.nil? ? '' : clean(kana)
+          if !kana.empty?
             Util.warn("using missingno for kana[#{kana}] from kanji[#{kanji}]")
           end
-        elsif kanji.empty?()
+        elsif kanji.empty?
           kanji = @missingno.kanji_from_kana(kana)
-          kanji = kanji.nil?() ? '' : clean(kanji)
-          if !kanji.empty?()
+          kanji = kanji.nil? ? '' : clean(kanji)
+          if !kanji.empty?
             Util.warn("using missingno for kanji[#{kanji}] from kana[#{kana}]")
           end
         end
       end
-      raise ScrapeError,"empty kanji at URL[#{@url}] in tag[#{tag}]" if kanji.empty?()
-      raise ScrapeError,"empty kana at URL[#{@url}] in tag[#{tag}]" if kana.empty?()
+      raise ScrapeError,"empty kanji at URL[#{@url}] in tag[#{tag}]" if kanji.empty?
+      raise ScrapeError,"empty kana at URL[#{@url}] in tag[#{tag}]" if kana.empty?
       word = Word.new(
         kana: kana,
         kanji: kanji,
         word: word
       )
       return word
     end
-    def scrape_sha256_only()
-      doc = html_doc()
+    def scrape_sha256_only
+      doc = html_doc
       sha256 = scrape_content(doc,nil)
       return sha256
     end
-    def scrape_text_word(tag,result: ScrapeWordsResult.new())
+    def scrape_text_word(tag,result: ScrapeWordsResult.new)
       word = Word.scrape_text_node(tag,url: @url)
-      if word.nil?()
-        result.add_text(tag.text.to_s()) # Raw spaces for output
+      if word.nil?
+        result.add_text(tag.text.to_s) # Raw spaces for output
         return nil
       end
-      text = word.kana # Should be kana only
+      # Kanji only for:
+      # - https://www3.nhk.or.jp/news/easy/k10012639271000/k10012639271000.html
+      #   - '第３のビール'
+      text = word.word # Should usually be kana only
       result.add_text(text) # No cleaning; raw text
       text = clean(text)
-      return nil if text.empty?() # No error; empty text is fine here
+      return nil if text.empty? # No error; empty text is fine here
       word = Word.new(
-        kana: text,
-        word: word
+        kana: clean(word.kana),
+        kanji: clean(word.kanji),
+        word: word,
       )
       return word
     end
     def scrape_title(doc,article)
       tag = doc.css('h1.article-main__title')
+      tag_name = nil
+      if tag.length < 1
+        # - https://www3.nhk.or.jp/news/easy/article/disaster_earthquake_illust.html
+        tag_name = 'h1.article-eq__title'
+        tag = doc.css(tag_name)
+      end
       if tag.length < 1 && !@strict
         # This shouldn't be used except for select sites.
         # - https://www3.nhk.or.jp/news/easy/tsunamikeihou/index.html
         tag_name = 'div#main h2'
-        Util.warn("using [#{tag_name}] for title at URL[#{@url}]")
         tag = doc.css(tag_name)
       end
       if tag.length > 0
+        Util.warn("using [#{tag_name}] for title at URL[#{@url}]") unless tag_name.nil?
         result = scrape_and_add_words(tag,article)
         title = result.text
-        return title unless title.empty?()
+        return title unless title.empty?
       end
       raise ScrapeError,"could not scrape title at URL[#{@url}]"
     end
-    def scrape_words(tag,dicwin: false,result: ScrapeWordsResult.new())
-      children = tag.children.to_a().reverse() # A faster stack?
-      while !children.empty?()
-        child = children.pop()
+    def scrape_words(tag,dicwin: false,result: ScrapeWordsResult.new)
+      children = tag.children.to_a.reverse # A faster stack?
+      while !children.empty?
+        child = children.pop
         name = nil
-        word = nil
-        name = Util.unspace_web_str(child.name.to_s()).downcase() if child.respond_to?(:name)
+        words = []
+        name = Util.unspace_web_str(child.name.to_s).downcase if child.respond_to?(:name)
         if name == 'ruby'
-          word = scrape_ruby_word(child,result: result)
-        elsif child.text?()
-          word = scrape_text_word(child,result: result)
+          # Returns an array.
+          words = scrape_ruby_words(child,result: result)
+        elsif child.text?
+          words << scrape_text_word(child,result: result)
         elsif name == 'rt'
           raise ScrapeError,"invalid rt tag[#{child}] without a ruby tag at URL[#{@url}]"
         else
           dicwin_id = nil
           if name == 'a'
-            id = parse_dicwin_id(child['id'].to_s())
-            klass = Util.unspace_web_str(child['class'].to_s()).downcase()
-            if klass == 'dicwin' && !id.nil?()
+            id = parse_dicwin_id(child['id'].to_s)
+            klass = Util.unspace_web_str(child['class'].to_s).downcase
+            if klass == 'dicwin' && !id.nil?
               if dicwin
-                raise ScrapeError,"invalid dicWin class[#{child}] nested inside another dicWin class at URL[#{@url}]"
+                raise ScrapeError,"invalid dicWin class[#{child}] nested inside another dicWin class at" \
+                  " URL[#{@url}]"
               end
               dicwin_id = id
             end
           end
-          if dicwin_id.nil?()
-            grand_children = child.children.to_a()
-            (grand_children.length() - 1).downto(0).each() do |i|
-              children.push(grand_children[i])
-            end
+          if dicwin_id.nil?
             # I originally didn't use a stack-like Array and did a constant insert,
             #   but I think this is slower (moving all elements down every time).
             # However, if it's using C-like code for moving memory, then maybe it
             #   is faster?
-            #children.insert(i + 1,*child.children.to_a())
+            # Old code:
+            #   children.insert(i + 1,*child.children.to_a())
+            grand_children = child.children.to_a
+            (grand_children.length - 1).downto(0).each do |i|
+              children.push(grand_children[i])
+            end
           else
-            word = scrape_dicwin_word(child,dicwin_id,result: result)
+            words << scrape_dicwin_word(child,dicwin_id,result: result)
           end
         end
-        result.add_word(word) unless word.nil?()
+        words&.each do |word|
+          # All word-scraping methods can return nil.
+          result.add_word(word) unless word.nil?
+        end
       end
       return result
     end
     def scrape_year(doc,futsuurl=nil)
       # First, try body's id.
       tag = doc.css('body')
       if tag.length > 0
-        tag_id = tag[0]['id'].to_s().gsub(/[^[[:digit:]]]+/,'')
+        tag_id = tag[0]['id'].to_s.gsub(/[^[[:digit:]]]+/,'')
         if tag_id.length >= 4
-          year = tag_id[0..3].to_i()
+          year = tag_id[0..3].to_i
           return year if Util.sane_year?(year)
         end
       end
       # Second, try futsuurl.
-      if !futsuurl.nil?()
+      if !futsuurl.nil?
         m = futsuurl.match(/([[:digit:]]{4,})/)
-        if !m.nil?() && (m = m[0].to_s()).length >= 4
-          year = m[0..3].to_i()
+        if !m.nil? && (m = m[0].to_s).length >= 4
+          year = m[0..3].to_i
           return year if Util.sane_year?(year)
         end
       end
       # As a last resort, use our user-defined fallbacks (if specified).
-      return @year.to_i() unless @year.nil?()
-      return @datetime.year if !@datetime.nil?() && Util.sane_year?(@datetime.year)
+      return @year.to_i unless @year.nil?
+      return @datetime.year if !@datetime.nil? && Util.sane_year?(@datetime.year)
       raise ScrapeError,"could not scrape year at URL[#{@url}]"
     end
     def split(str)
       return @splitter.split(str)
     end
     def variate(str)
       variations = []
-      @variators.each() do |variator|
+      @variators.each do |variator|
         variations.push(*variator.variate(str))
       end
       return variations
     end
     def warn_or_error(klass,msg)
       if @strict
         raise klass,msg
@@ -612,42 +631,42 @@ module NHKore
       end
     end
   end
   ###
-  # @author Jonathan Bradley Whited (@esotericpig)
+  # @author Jonathan Bradley Whited
   # @since  0.2.0
   ###
   class ScrapeWordsResult
     attr_reader :text
     attr_reader :words
-    def initialize()
+    def initialize
       super()
-      @text = ''.dup()
+      @text = ''.dup
       @words = []
     end
     def add_text(text)
       @text << Util.reduce_jpn_space(text)
       return self
     end
     def add_word(word)
       @words << word
       return self
     end
-    def polish!()
+    def polish!
       @text = Util.strip_web_str(@text)
       return self
     end
-    def words?()
-      return !@words.empty?()
+    def words?
+      return !@words.empty?
     end
   end
 end