nhkore 0.3.17 → 0.3.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/nhkore/sifter.rb CHANGED
@@ -3,17 +3,15 @@
3
3
 
4
4
  #--
5
5
  # This file is part of NHKore.
6
- # Copyright (c) 2020-2021 Jonathan Bradley Whited
6
+ # Copyright (c) 2020 Bradley Whited
7
7
  #
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  require 'nhkore/article'
13
12
  require 'nhkore/fileable'
14
13
  require 'nhkore/util'
15
14
 
16
-
17
15
  module NHKore
18
16
  class Sifter
19
17
  include Fileable
@@ -87,7 +85,7 @@ module NHKore
87
85
  datetime = article.datetime
88
86
 
89
87
  return true if datetime.nil? ||
90
- datetime < datetime_filter[:from] || datetime > datetime_filter[:to]
88
+ datetime < datetime_filter[:from] || datetime > datetime_filter[:to]
91
89
  end
92
90
 
93
91
  if !title_filter.nil?
@@ -109,7 +107,7 @@ module NHKore
109
107
  return false
110
108
  end
111
109
 
112
- def filter_by_datetime(datetime_filter=nil,from: nil,to: nil)
110
+ def filter_by_datetime(datetime_filter = nil,from: nil,to: nil)
113
111
  if !datetime_filter.nil?
114
112
  if datetime_filter.respond_to?(:[])
115
113
  # If out-of-bounds, just nil.
@@ -234,10 +232,10 @@ module NHKore
234
232
  HTML
235
233
 
236
234
  # If have too few or too many '<col>', invalid HTML.
237
- @output << %Q(<col style="width:6em;">\n) unless @ignores[:freq]
238
- @output << %Q(<col style="width:17em;">\n) unless @ignores[:word]
239
- @output << %Q(<col style="width:17em;">\n) unless @ignores[:kana]
240
- @output << %Q(<col style="width:5em;">\n) unless @ignores[:eng]
235
+ @output << %(<col style="width:6em;">\n) unless @ignores[:freq]
236
+ @output << %(<col style="width:17em;">\n) unless @ignores[:word]
237
+ @output << %(<col style="width:17em;">\n) unless @ignores[:kana]
238
+ @output << %(<col style="width:5em;">\n) unless @ignores[:eng]
241
239
  @output << "<col>\n" unless @ignores[:defn] # No width for defn, fills rest of page
242
240
 
243
241
  @output << '<tr>'
@@ -3,15 +3,13 @@
3
3
 
4
4
  #--
5
5
  # This file is part of NHKore.
6
- # Copyright (c) 2020-2021 Jonathan Bradley Whited
6
+ # Copyright (c) 2020 Bradley Whited
7
7
  #
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  require 'nhkore/util'
13
12
 
14
-
15
13
  module NHKore
16
14
  class Splitter
17
15
  def begin_split(str)
data/lib/nhkore/util.rb CHANGED
@@ -3,18 +3,16 @@
3
3
 
4
4
  #--
5
5
  # This file is part of NHKore.
6
- # Copyright (c) 2020-2021 Jonathan Bradley Whited
6
+ # Copyright (c) 2020 Bradley Whited
7
7
  #
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  require 'cgi'
13
12
  require 'set'
14
13
  require 'time'
15
14
  require 'uri'
16
15
 
17
-
18
16
  module NHKore
19
17
  module Util
20
18
  CORE_DIR = 'core'
@@ -64,7 +62,8 @@ module NHKore
64
62
 
65
63
  stylers = Array(stylers)
66
64
 
67
- return Psychgus.dump(obj,
65
+ return Psychgus.dump(
66
+ obj,
68
67
  deref_aliases: true, # Dereference aliases for load_yaml()
69
68
  header: true, # %YAML [version]
70
69
  line_width: 10_000, # Try not to wrap; ichiman!
@@ -117,10 +116,11 @@ module NHKore
117
116
  def self.load_yaml(data,file: nil,**kargs)
118
117
  require 'psychgus'
119
118
 
120
- return Psych.safe_load(data,
119
+ return Psych.safe_load(
120
+ data,
121
121
  aliases: false,
122
122
  filename: file,
123
- #freeze: true, # Not in this current version of Psych
123
+ # freeze: true, # Not in this current version of Psych
124
124
  permitted_classes: [Symbol],
125
125
  symbolize_names: true,
126
126
  **kargs,
@@ -180,8 +180,8 @@ module NHKore
180
180
  # String's normal strip() method doesn't work with special Unicode/HTML white space.
181
181
  def self.strip_web_str(str)
182
182
  # After testing with Benchmark, this is slower than one regex.
183
- #str = str.gsub(/\A[[:space:]]+/,'')
184
- #str = str.gsub(/[[:space:]]+\z/,'')
183
+ # str = str.gsub(/\A[[:space:]]+/,'')
184
+ # str = str.gsub(/[[:space:]]+\z/,'')
185
185
 
186
186
  str = str.gsub(STRIP_WEB_STR_REGEX,'')
187
187
 
@@ -3,12 +3,11 @@
3
3
 
4
4
  #--
5
5
  # This file is part of NHKore.
6
- # Copyright (c) 2020-2021 Jonathan Bradley Whited
6
+ # Copyright (c) 2020 Bradley Whited
7
7
  #
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  module NHKore
13
12
  class Variator
14
13
  def begin_variate(str)
@@ -24,7 +23,7 @@ module NHKore
24
23
  end
25
24
 
26
25
  class BasicVariator < Variator
27
- def end_variate(str)
26
+ def end_variate(_str)
28
27
  return [] # No variations; don't return nil
29
28
  end
30
29
  end
@@ -49,7 +48,7 @@ module NHKore
49
48
  def end_variate(str)
50
49
  guess = @deinflector.deinflect(str)
51
50
 
52
- return [] if guess.length < 1
51
+ return [] if guess.empty?
53
52
  return [] if (guess = guess[0])[:weight] < 0.5
54
53
 
55
54
  return [guess[:word]]
@@ -3,12 +3,11 @@
3
3
 
4
4
  #--
5
5
  # This file is part of NHKore.
6
- # Copyright (c) 2020-2021 Jonathan Bradley Whited
6
+ # Copyright (c) 2020 Bradley Whited
7
7
  #
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  module NHKore
13
- VERSION = '0.3.17'
12
+ VERSION = '0.3.19'
14
13
  end
data/lib/nhkore/word.rb CHANGED
@@ -3,18 +3,16 @@
3
3
 
4
4
  #--
5
5
  # This file is part of NHKore.
6
- # Copyright (c) 2020-2021 Jonathan Bradley Whited
6
+ # Copyright (c) 2020 Bradley Whited
7
7
  #
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  require 'nokogiri'
13
12
 
14
13
  require 'nhkore/error'
15
14
  require 'nhkore/util'
16
15
 
17
-
18
16
  module NHKore
19
17
  class Word
20
18
  attr_accessor :defn
@@ -24,7 +22,7 @@ module NHKore
24
22
  attr_reader :kanji
25
23
  attr_reader :key
26
24
 
27
- def initialize(defn: nil,eng: nil,freq: 1,kana: nil,kanji: nil,unknown: nil,word: nil,**kargs)
25
+ def initialize(defn: nil,eng: nil,freq: 1,kana: nil,kanji: nil,unknown: nil,word: nil,**_kargs)
28
26
  super()
29
27
 
30
28
  if !word.nil?
@@ -110,14 +108,14 @@ module NHKore
110
108
  # First, try <rb> tags.
111
109
  kanjis = tag.css('rb')
112
110
  # Second, try text nodes.
113
- kanjis = tag.search('./text()') if kanjis.length < 1
111
+ kanjis = tag.search('./text()') if kanjis.empty?
114
112
  # Third, try non-<rt> tags, in case of being surrounded by <span>, <b>, etc.
115
- kanjis = tag.search("./*[not(name()='rt')]") if kanjis.length < 1
113
+ kanjis = tag.search("./*[not(name()='rt')]") if kanjis.empty?
116
114
 
117
115
  kanas = tag.css('rt')
118
116
 
119
- raise ScrapeError,"no kanji at URL[#{url}] in tag[#{tag}]" if kanjis.length < 1
120
- raise ScrapeError,"no kana at URL[#{url}] in tag[#{tag}]" if kanas.length < 1
117
+ raise ScrapeError,"no kanji at URL[#{url}] in tag[#{tag}]" if kanjis.empty?
118
+ raise ScrapeError,"no kana at URL[#{url}] in tag[#{tag}]" if kanas.empty?
121
119
 
122
120
  if kanjis.length != kanas.length
123
121
  raise ScrapeError,"number of kanji & kana mismatch at URL[#{url}] in tag[#{tag}]"
@@ -130,7 +128,7 @@ module NHKore
130
128
  kana = kanas[i].text
131
129
 
132
130
  # Uncomment for debugging; really need a logger.
133
- #puts "Word[#{i}]: #{kanji} => #{kana}"
131
+ # puts "Word[#{i}]: #{kanji} => #{kana}"
134
132
 
135
133
  if !missingno.nil?
136
134
  # Check kana first, since this is the typical scenario.
@@ -162,7 +160,7 @@ module NHKore
162
160
 
163
161
  # Do not clean and/or strip spaces, as the raw text is important for
164
162
  # Defn and ArticleScraper.
165
- def self.scrape_text_node(tag,url: nil)
163
+ def self.scrape_text_node(tag,url: nil) # rubocop:disable Lint/UnusedMethodArgument
166
164
  text = tag.text
167
165
 
168
166
  # No error; empty text is fine (not strictly kanji/kana only).
data/lib/nhkore.rb CHANGED
@@ -3,12 +3,11 @@
3
3
 
4
4
  #--
5
5
  # This file is part of NHKore.
6
- # Copyright (c) 2020-2021 Jonathan Bradley Whited
6
+ # Copyright (c) 2020 Bradley Whited
7
7
  #
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  TESTING = ($PROGRAM_NAME == __FILE__)
13
12
 
14
13
  if TESTING
@@ -16,18 +15,11 @@ if TESTING
16
15
  require 'bundler/setup'
17
16
  end
18
17
 
19
- require 'nhkore/app'
20
18
  require 'nhkore/lib'
21
-
22
- require 'nhkore/cli/fx_cmd'
23
- require 'nhkore/cli/get_cmd'
24
- require 'nhkore/cli/news_cmd'
25
- require 'nhkore/cli/search_cmd'
26
- require 'nhkore/cli/sift_cmd'
27
-
19
+ require 'nhkore/app'
28
20
 
29
21
  module NHKore
30
- def self.run(args=ARGV)
22
+ def self.run(args = ARGV)
31
23
  app = App.new(args)
32
24
 
33
25
  begin
data/nhkore.gemspec CHANGED
@@ -1,29 +1,30 @@
1
1
  # encoding: UTF-8
2
2
  # frozen_string_literal: true
3
3
 
4
-
5
4
  require_relative 'lib/nhkore/version'
6
5
 
7
6
  Gem::Specification.new do |spec|
8
7
  spec.name = 'nhkore'
9
8
  spec.version = NHKore::VERSION
10
- spec.authors = ['Jonathan Bradley Whited']
9
+ spec.authors = ['Bradley Whited']
11
10
  spec.email = ['code@esotericpig.com']
12
11
  spec.licenses = ['LGPL-3.0-or-later']
13
12
  spec.homepage = 'https://github.com/esotericpig/nhkore'
14
13
  spec.summary = 'NHK News Web (Easy) word frequency (core) scraper for Japanese language learners.'
15
- spec.description =
16
- 'Scrapes NHK News Web (Easy) for the word frequency (core list) for Japanese language learners.' \
17
- ' Includes a CLI app and a scraper library.'
14
+ spec.description = <<~DESC
15
+ Scrapes NHK News Web (Easy) for the word frequency (core list) for Japanese language learners.
16
+ Includes a CLI app and a scraper library.
17
+ DESC
18
18
 
19
19
  spec.metadata = {
20
- 'homepage_uri' => 'https://github.com/esotericpig/nhkore',
21
- 'source_code_uri' => 'https://github.com/esotericpig/nhkore',
22
- 'bug_tracker_uri' => 'https://github.com/esotericpig/nhkore/issues',
23
- 'changelog_uri' => 'https://github.com/esotericpig/nhkore/blob/master/CHANGELOG.md',
24
- #'documentation_uri' => '',
25
- #'wiki_uri' => '',
26
- #'mailing_list_uri' => '',
20
+ 'rubygems_mfa_required' => 'true',
21
+ 'homepage_uri' => 'https://github.com/esotericpig/nhkore',
22
+ 'source_code_uri' => 'https://github.com/esotericpig/nhkore',
23
+ 'bug_tracker_uri' => 'https://github.com/esotericpig/nhkore/issues',
24
+ 'changelog_uri' => 'https://github.com/esotericpig/nhkore/blob/master/CHANGELOG.md',
25
+ # 'documentation_uri' => '',
26
+ # 'wiki_uri' => '',
27
+ # 'mailing_list_uri' => '',
27
28
  }
28
29
 
29
30
  spec.required_ruby_version = '>= 3.1'
@@ -35,7 +36,7 @@ Gem::Specification.new do |spec|
35
36
  spec.bindir = 'bin'
36
37
  spec.executables = [spec.name]
37
38
 
38
- spec.extra_rdoc_files = %w[ CHANGELOG.md LICENSE.txt README.md ]
39
+ spec.extra_rdoc_files = %w[LICENSE.txt CHANGELOG.md README.md]
39
40
  spec.rdoc_options = [
40
41
  '--hyperlink-all','--show-hash',
41
42
  '--title',"NHKore v#{NHKore::VERSION} Doc",
@@ -43,40 +44,36 @@ Gem::Specification.new do |spec|
43
44
  ]
44
45
 
45
46
  spec.files = [
46
- Dir.glob(File.join("{#{spec.require_paths.join(',')}}",'**','*.{erb,rb}')),
47
- Dir.glob(File.join(spec.bindir,'*')),
48
- Dir.glob(File.join('{samples,test,yard}','**','*.{erb,rb}')),
49
- %W[ Gemfile Gemfile.lock #{spec.name}.gemspec Rakefile .yardopts ],
47
+ Dir.glob("{#{spec.require_paths.join(',')}}/**/*.{erb,rb}"),
48
+ Dir.glob("#{spec.bindir}/{#{spec.executables.join(',')}}"),
49
+ Dir.glob('{samples,spec,test,yard}/**/*.{erb,rb}'),
50
+ %W[Gemfile Gemfile.lock #{spec.name}.gemspec Rakefile .yardopts],
50
51
  spec.extra_rdoc_files,
51
52
  ].flatten
52
53
 
53
- run_dep = spec.method(:add_runtime_dependency)
54
- run_dep[ 'attr_bool' ,'~> 0.2' ] # attr_accessor?/attr_reader?.
55
- run_dep[ 'bimyou_segmenter' ,'~> 1.2' ] # Splitting Japanese sentences into words.
56
- run_dep[ 'cri' ,'~> 2.15' ] # CLI commands/options.
57
- run_dep[ 'down' ,'~> 5.4' ] # Downloading files (GetCmd).
58
- run_dep[ 'highline' ,'~> 3.1' ] # CLI input/output.
59
- run_dep[ 'http-cookie' ,'~> 1.0' ] # Parsing/Setting cookies [(Bing)Scraper].
60
- run_dep[ 'japanese_deinflector','~> 0.0' ] # Unconjugating Japanese words (dictionary form).
61
- run_dep[ 'nokogiri' ,'~> 1.16' ] # Scraping/Hacking.
62
- run_dep[ 'psychgus' ,'~> 1.3' ] # Styling Psych YAML.
63
- run_dep[ 'public_suffix' ,'~> 6.0' ] # Parsing URL domain names.
64
- run_dep[ 'rainbow' ,'~> 3.1' ] # CLI color output.
65
- run_dep[ 'rss' ,'~> 0.3' ] # Scraping [(Bing)Scraper].
66
- run_dep[ 'rubyzip' ,'~> 2.3' ] # Extracting Zip files (GetCmd).
67
- run_dep[ 'tiny_segmenter' ,'~> 0.0' ] # Splitting Japanese sentences into words.
68
- run_dep[ 'tty-progressbar' ,'~> 0.18' ] # CLI progress bars.
69
- run_dep[ 'tty-spinner' ,'~> 0.9' ] # CLI spinning progress.
70
-
71
- dev_dep = spec.method(:add_development_dependency)
72
- dev_dep[ 'bundler' ,'~> 2.5' ]
73
- dev_dep[ 'minitest' ,'~> 5.25' ]
74
- dev_dep[ 'rake' ,'~> 13.2' ]
75
- dev_dep[ 'raketeer' ,'~> 0.2' ] # Extra Rake tasks.
76
- dev_dep[ 'rdoc' ,'~> 6.7' ] # YARDoc RDoc (*.rb).
77
- dev_dep[ 'redcarpet' ,'~> 3.6' ] # YARDoc Markdown (*.md).
78
- dev_dep[ 'yard' ,'~> 0.9' ] # Doc.
79
- dev_dep[ 'yard_ghurt','~> 1.2' ] # Extra YARDoc Rake tasks.
54
+ # Japanese.
55
+ spec.add_dependency 'bimyou_segmenter' ,'~> 1.2' # Splits Japanese sentences into words.
56
+ spec.add_dependency 'japanese_deinflector' ,'~> 0.0' # Un-conjugates Japanese words (dictionary form).
57
+ spec.add_dependency 'tiny_segmenter' ,'~> 0.0' # Splits Japanese sentences into words.
58
+ # Network/Scraping.
59
+ spec.add_dependency 'down' ,'~> 5.4' # Downloads files (GetCmd).
60
+ spec.add_dependency 'http-cookie' ,'~> 1.0' # Parses/Sets cookies [(Bing)Scraper].
61
+ spec.add_dependency 'nokogiri' ,'~> 1' # Parses HTML.
62
+ spec.add_dependency 'public_suffix' ,'~> 6.0' # Parses URL domain names.
63
+ spec.add_dependency 'ronin-web-user_agents','~> 0.1' # Generates a random User-Agent.
64
+ spec.add_dependency 'rss' ,'~> 0.3' # Parses RSS feeds [(Bing)Scraper].
65
+ # Data/Files.
66
+ spec.add_dependency 'csv' ,'~> 3.3' # Outputs CSV.
67
+ spec.add_dependency 'psychgus' ,'~> 1.3' # Styles Psych YAML.
68
+ spec.add_dependency 'rubyzip' ,'~> 2.4' # Extracts Zip files (GetCmd).
69
+ # CLI.
70
+ spec.add_dependency 'cri' ,'~> 2.15' # CLI commands/options.
71
+ spec.add_dependency 'highline' ,'~> 3.1' # CLI IO.
72
+ spec.add_dependency 'rainbow' ,'~> 3.1' # CLI color output.
73
+ spec.add_dependency 'tty-progressbar' ,'~> 0.18' # CLI progress bars.
74
+ spec.add_dependency 'tty-spinner' ,'~> 0.9' # CLI spinning progress.
75
+ # Utils.
76
+ spec.add_dependency 'attr_bool' ,'~> 0.2' # attr_accessor?/attr_reader?.
80
77
 
81
78
  spec.post_install_message = <<~MSG
82
79
  +=============================================================================+
@@ -90,7 +87,4 @@ Gem::Specification.new do |spec|
90
87
  | Changelog: #{spec.metadata['changelog_uri']}
91
88
  +=============================================================================+
92
89
  MSG
93
-
94
- # Uncomment to see max line length:
95
- #puts spec.post_install_message.split("\n").map(&:length).max
96
90
  end
data/samples/looper.rb CHANGED
@@ -4,12 +4,11 @@
4
4
 
5
5
  #--
6
6
  # This file is part of NHKore.
7
- # Copyright (c) 2020-2021 Jonathan Bradley Whited
7
+ # Copyright (c) 2020 Bradley Whited
8
8
  #
9
9
  # SPDX-License-Identifier: LGPL-3.0-or-later
10
10
  #++
11
11
 
12
-
13
12
  ###
14
13
  # If you run this script, be aware that it uses the +-F+ force option
15
14
  # (which overwrites files without prompting).
@@ -3,18 +3,11 @@
3
3
 
4
4
  #--
5
5
  # This file is part of NHKore.
6
- # Copyright (c) 2020-2021 Jonathan Bradley Whited
6
+ # Copyright (c) 2020 Bradley Whited
7
7
  #
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  require 'minitest/autorun'
13
12
 
14
13
  require 'nhkore'
15
-
16
-
17
- module NHKore
18
- class TestHelper < Minitest::Test
19
- end
20
- end
data/test/nhkore_test.rb CHANGED
@@ -3,21 +3,17 @@
3
3
 
4
4
  #--
5
5
  # This file is part of NHKore.
6
- # Copyright (c) 2020-2021 Jonathan Bradley Whited
6
+ # Copyright (c) 2020 Bradley Whited
7
7
  #
8
8
  # SPDX-License-Identifier: LGPL-3.0-or-later
9
9
  #++
10
10
 
11
-
12
11
  require 'nhkore/test_helper'
13
12
 
13
+ describe(NHKore) do
14
+ subject { NHKore }
14
15
 
15
- module NHKore
16
- class NHKoreTest < TestHelper
17
- def setup
18
- end
19
-
20
- def test_something
21
- end
16
+ it 'has version' do
17
+ expect(subject::VERSION).wont_be_nil
22
18
  end
23
19
  end