nhkore 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -24,7 +24,9 @@
24
24
  require 'cgi'
25
25
  require 'psychgus'
26
26
  require 'public_suffix'
27
+ require 'set'
27
28
  require 'time'
29
+ require 'uri'
28
30
 
29
31
 
30
32
  module NHKore
@@ -69,7 +71,7 @@ module NHKore
69
71
 
70
72
  def self.domain(host,clean: true)
71
73
  domain = PublicSuffix.domain(host)
72
- domain = unspace_web_str(domain).downcase() if clean
74
+ domain = unspace_web_str(domain).downcase() if !domain.nil?() && clean
73
75
 
74
76
  return domain
75
77
  end
@@ -164,6 +166,39 @@ module NHKore
164
166
  return str.gsub(WEB_SPACES_REGEX,' ')
165
167
  end
166
168
 
169
+ def self.replace_uri_query!(uri,**new_query)
170
+ return uri if new_query.empty?()
171
+
172
+ query = uri.query
173
+ query = query.nil?() ? [] : URI.decode_www_form(query)
174
+
175
+ # First, remove the old ones.
176
+ if !query.empty?()
177
+ new_query_keys = Set.new(new_query.keys.map() {|key|
178
+ unspace_web_str(key.to_s()).downcase()
179
+ })
180
+
181
+ query.filter!() do |q|
182
+ if q.nil?() || q.empty?()
183
+ false
184
+ else
185
+ key = unspace_web_str(q[0].to_s()).downcase()
186
+
187
+ !new_query_keys.include?(key)
188
+ end
189
+ end
190
+ end
191
+
192
+ # Next, add the new ones.
193
+ new_query.each() do |key,value|
194
+ query << [key,value.nil?() ? '' : value]
195
+ end
196
+
197
+ uri.query = URI.encode_www_form(query)
198
+
199
+ return uri
200
+ end
201
+
167
202
  def self.sane_year?(year)
168
203
  return year >= MIN_SANE_YEAR && year <= MAX_SANE_YEAR
169
204
  end
@@ -22,5 +22,5 @@
22
22
 
23
23
 
24
24
  module NHKore
25
- VERSION = '0.2.0'
25
+ VERSION = '0.3.0'
26
26
  end
@@ -34,10 +34,9 @@ Gem::Specification.new() do |spec|
34
34
  spec.licenses = ['LGPL-3.0-or-later']
35
35
  spec.homepage = 'https://github.com/esotericpig/nhkore'
36
36
  spec.summary = 'NHK News Web (Easy) word frequency (core) scraper for Japanese language learners.'
37
- spec.description = <<-EOD.gsub(/\s{2,}/,' ').strip()
38
- Scrapes NHK News Web (Easy) for the word frequency (core list) for Japanese language learners.
39
- Includes a CLI app and a scraper library.
40
- EOD
37
+ spec.description =
38
+ 'Scrapes NHK News Web (Easy) for the word frequency (core list) for Japanese language learners.' \
39
+ ' Includes a CLI app and a scraper library.'
41
40
 
42
41
  spec.metadata = {
43
42
  'bug_tracker_uri' => 'https://github.com/esotericpig/nhkore/issues',
@@ -60,19 +59,20 @@ Gem::Specification.new() do |spec|
60
59
 
61
60
  spec.requirements << 'Nokogiri: https://www.nokogiri.org/tutorials/installing_nokogiri.html'
62
61
 
63
- spec.add_runtime_dependency 'bimyou_segmenter' ,'~> 1.2' # For splitting Japanese sentences into words
64
- spec.add_runtime_dependency 'cri' ,'~> 2.15' # For CLI commands/options
65
- spec.add_runtime_dependency 'down' ,'~> 5.1' # For downloading files (GetCmd)
66
- spec.add_runtime_dependency 'highline' ,'~> 2.0' # For CLI input/output
67
- spec.add_runtime_dependency 'japanese_deinflector','~> 0.0' # For unconjugating Japanese words (plain/dictionary form)
68
- spec.add_runtime_dependency 'nokogiri' ,'~> 1.10' # For scraping/hacking
69
- spec.add_runtime_dependency 'psychgus' ,'~> 1.2' # For styling Psych YAML
70
- spec.add_runtime_dependency 'public_suffix' ,'~> 4.0' # For parsing URL domain names
71
- spec.add_runtime_dependency 'rainbow' ,'~> 3.0' # For CLI color output
72
- spec.add_runtime_dependency 'rubyzip' ,'~> 2.3' # For extracting Zip files (GetCmd)
73
- spec.add_runtime_dependency 'tiny_segmenter' ,'~> 0.0' # For splitting Japanese sentences into words
74
- spec.add_runtime_dependency 'tty-progressbar' ,'~> 0.17' # For CLI progress bars
75
- spec.add_runtime_dependency 'tty-spinner' ,'~> 0.9' # For CLI spinning progress
62
+ spec.add_runtime_dependency 'bimyou_segmenter' ,'~> 1.2' # For splitting Japanese sentences into words
63
+ spec.add_runtime_dependency 'cri' ,'~> 2.15' # For CLI commands/options
64
+ spec.add_runtime_dependency 'down' ,'~> 5.1' # For downloading files (GetCmd)
65
+ spec.add_runtime_dependency 'highline' ,'~> 2.0' # For CLI input/output
66
+ spec.add_runtime_dependency 'http-cookie' ,'~> 1.0' # For parsing/setting cookies (BingScraper/Scraper)
67
+ spec.add_runtime_dependency 'japanese_deinflector' ,'~> 0.0' # For unconjugating Japanese words (plain/dictionary form)
68
+ spec.add_runtime_dependency 'nokogiri' ,'~> 1.10' # For scraping/hacking
69
+ spec.add_runtime_dependency 'psychgus' ,'~> 1.2' # For styling Psych YAML
70
+ spec.add_runtime_dependency 'public_suffix' ,'~> 4.0' # For parsing URL domain names
71
+ spec.add_runtime_dependency 'rainbow' ,'~> 3.0' # For CLI color output
72
+ spec.add_runtime_dependency 'rubyzip' ,'~> 2.3' # For extracting Zip files (GetCmd)
73
+ spec.add_runtime_dependency 'tiny_segmenter' ,'~> 0.0' # For splitting Japanese sentences into words
74
+ spec.add_runtime_dependency 'tty-progressbar' ,'~> 0.17' # For CLI progress bars
75
+ spec.add_runtime_dependency 'tty-spinner' ,'~> 0.9' # For CLI spinning progress
76
76
 
77
77
  spec.add_development_dependency 'bundler' ,'~> 2.1'
78
78
  spec.add_development_dependency 'minitest' ,'~> 5.14'
@@ -83,5 +83,17 @@ Gem::Specification.new() do |spec|
83
83
  spec.add_development_dependency 'yard' ,'~> 0.9' # For documentation
84
84
  spec.add_development_dependency 'yard_ghurt','~> 1.2' # For extra YARDoc Rake tasks
85
85
 
86
- spec.post_install_message = "You can now use [#{spec.executables.join(', ')}] on the command line."
86
+ spec.post_install_message = <<-EOM
87
+
88
+ NHKore v#{NHKore::VERSION}
89
+
90
+ You can now use [#{spec.executables.join(', ')}] on the command line.
91
+
92
+ Homepage: #{spec.homepage}
93
+
94
+ Code: #{spec.metadata['source_code_uri']}
95
+ Changelog: #{spec.metadata['changelog_uri']}
96
+ Bugs: #{spec.metadata['bug_tracker_uri']}
97
+
98
+ EOM
87
99
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: nhkore
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jonathan Bradley Whited (@esotericpig)
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-03-31 00:00:00.000000000 Z
11
+ date: 2020-04-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bimyou_segmenter
@@ -66,6 +66,20 @@ dependencies:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
68
  version: '2.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: http-cookie
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.0'
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.0'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: japanese_deinflector
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -324,10 +338,10 @@ files:
324
338
  - lib/nhkore/article.rb
325
339
  - lib/nhkore/article_scraper.rb
326
340
  - lib/nhkore/cleaner.rb
327
- - lib/nhkore/cli/bing_cmd.rb
328
341
  - lib/nhkore/cli/fx_cmd.rb
329
342
  - lib/nhkore/cli/get_cmd.rb
330
343
  - lib/nhkore/cli/news_cmd.rb
344
+ - lib/nhkore/cli/search_cmd.rb
331
345
  - lib/nhkore/cli/sift_cmd.rb
332
346
  - lib/nhkore/defn.rb
333
347
  - lib/nhkore/dict.rb
@@ -343,6 +357,7 @@ files:
343
357
  - lib/nhkore/search_scraper.rb
344
358
  - lib/nhkore/sifter.rb
345
359
  - lib/nhkore/splitter.rb
360
+ - lib/nhkore/user_agents.rb
346
361
  - lib/nhkore/util.rb
347
362
  - lib/nhkore/variator.rb
348
363
  - lib/nhkore/version.rb
@@ -359,7 +374,10 @@ metadata:
359
374
  changelog_uri: https://github.com/esotericpig/nhkore/blob/master/CHANGELOG.md
360
375
  homepage_uri: https://github.com/esotericpig/nhkore
361
376
  source_code_uri: https://github.com/esotericpig/nhkore
362
- post_install_message: You can now use [nhkore] on the command line.
377
+ post_install_message: " \n NHKore v0.3.0\n \n You can now use [nhkore] on the
378
+ command line.\n \n Homepage: https://github.com/esotericpig/nhkore\n \n Code:
379
+ \ https://github.com/esotericpig/nhkore\n Changelog: https://github.com/esotericpig/nhkore/blob/master/CHANGELOG.md\n
380
+ \ Bugs: https://github.com/esotericpig/nhkore/issues\n \n"
363
381
  rdoc_options: []
364
382
  require_paths:
365
383
  - lib