nhkore 0.3.1 → 0.3.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -35,7 +35,7 @@ require 'nhkore/version'
35
35
 
36
36
  PKG_DIR = 'pkg'
37
37
 
38
- CLEAN.exclude('.git/','stock/')
38
+ CLEAN.exclude('{.git,core,stock}/**/*')
39
39
  CLOBBER.include('doc/',File.join(PKG_DIR,''))
40
40
 
41
41
 
@@ -49,7 +49,7 @@ desc "Package '#{File.join(NHKore::Util::CORE_DIR,'')}' data as a Zip file into
49
49
  task :pkg_core do |task|
50
50
  mkdir_p PKG_DIR
51
51
 
52
- pattern = File.join(NHKore::Util::CORE_DIR,'*.{csv,html,yml}')
52
+ pattern = File.join(NHKore::Util::CORE_DIR,'*.{csv,html,json,yml}')
53
53
  zip_file = File.join(PKG_DIR,'nhkore-core.zip')
54
54
 
55
55
  sh 'zip','-9rv',zip_file,*Dir.glob(pattern).sort()
@@ -59,17 +59,57 @@ Rake::TestTask.new() do |task|
59
59
  task.libs = ['lib','test']
60
60
  task.pattern = File.join('test','**','*_test.rb')
61
61
  task.description += ": '#{task.pattern}'"
62
- task.verbose = true
62
+ task.verbose = false
63
63
  task.warning = true
64
64
  end
65
65
 
66
- YARD::Rake::YardocTask.new() do |task|
67
- task.files = [File.join('lib','**','*.rb')]
66
+ # If you need to run a part after the 1st part,
67
+ # just type 'n' to not overwrite the file and then 'y' for continue.
68
+ desc "Update '#{File.join(NHKore::Util::CORE_DIR,'')}' files for release"
69
+ task :update_core do |task|
70
+ require 'highline'
71
+
72
+ CONTINUE_MSG = "\nContinue (y/n)? "
73
+
74
+ cmd = ['ruby','-w','./lib/nhkore.rb','-t','300','-m','10']
75
+ hl = HighLine.new()
76
+
77
+ next unless sh(*cmd,'se','ez','bing')
78
+ next unless hl.agree(CONTINUE_MSG)
79
+ puts
68
80
 
69
- task.options += ['--files','CHANGELOG.md,LICENSE.txt']
70
- task.options += ['--readme','README.md']
81
+ next unless sh(*cmd,'news','-s','100','ez')
82
+ next unless hl.agree(CONTINUE_MSG)
83
+ puts
84
+
85
+ next unless sh(*cmd,'sift','-e','csv' ,'ez')
86
+ next unless sh(*cmd,'sift','-e','html','ez')
87
+ next unless sh(*cmd,'sift','-e','json','ez')
88
+ next unless sh(*cmd,'sift','-e','yml' ,'ez')
89
+ end
90
+
91
+ # @since 0.3.6
92
+ desc 'Update showcase file for release'
93
+ task :update_showcase do |task|
94
+ require 'highline'
71
95
 
72
- task.options << '--protected' # Show protected methods
96
+ SHOWCASE_FILE = File.join('.','nhkore-ez.html')
97
+
98
+ hl = HighLine.new()
99
+
100
+ next unless sh('ruby','-w','./lib/nhkore.rb',
101
+ 'sift','ez','--no-eng',
102
+ '--out',SHOWCASE_FILE,
103
+ )
104
+
105
+ next unless hl.agree("\nMove the file (y/n)? ")
106
+ puts
107
+ next unless sh('mv','-iv',SHOWCASE_FILE,
108
+ File.join('..','esotericpig.github.io','showcase',''),
109
+ )
110
+ end
111
+
112
+ YARD::Rake::YardocTask.new() do |task|
73
113
  task.options += ['--template-path',File.join('yard','templates')]
74
114
  task.options += ['--title',"NHKore v#{NHKore::VERSION} Doc"]
75
115
  end
@@ -29,28 +29,7 @@ if TESTING
29
29
  end
30
30
 
31
31
  require 'nhkore/app'
32
- require 'nhkore/article'
33
- require 'nhkore/article_scraper'
34
- require 'nhkore/cleaner'
35
- require 'nhkore/defn'
36
- require 'nhkore/dict'
37
- require 'nhkore/dict_scraper'
38
- require 'nhkore/entry'
39
- require 'nhkore/error'
40
- require 'nhkore/fileable'
41
- require 'nhkore/missingno'
42
- require 'nhkore/news'
43
- require 'nhkore/polisher'
44
- require 'nhkore/scraper'
45
- require 'nhkore/search_link'
46
- require 'nhkore/search_scraper'
47
- require 'nhkore/sifter'
48
- require 'nhkore/splitter'
49
- require 'nhkore/user_agents'
50
- require 'nhkore/util'
51
- require 'nhkore/variator'
52
- require 'nhkore/version'
53
- require 'nhkore/word'
32
+ require 'nhkore/lib'
54
33
 
55
34
  require 'nhkore/cli/fx_cmd'
56
35
  require 'nhkore/cli/get_cmd'
@@ -24,6 +24,7 @@
24
24
  require 'cri'
25
25
  require 'highline'
26
26
  require 'rainbow'
27
+ require 'set'
27
28
  require 'tty-spinner'
28
29
 
29
30
  require 'nhkore/error'
@@ -151,7 +152,8 @@ module NHKore
151
152
  end
152
153
 
153
154
  if color.nil?()
154
- color = ($stdout.tty?() && ENV['TERM'] != 'dumb')
155
+ # - https://no-color.org/
156
+ color = ($stdout.tty?() && ENV['TERM'] != 'dumb' && !ENV.key?('NO_COLOR'))
155
157
  end
156
158
 
157
159
  enable_color(color)
@@ -33,11 +33,11 @@ module NHKore
33
33
  # @since 0.2.0
34
34
  ###
35
35
  class Article
36
- attr_accessor :datetime
37
- attr_accessor :futsuurl
36
+ attr_reader :datetime
37
+ attr_reader :futsuurl
38
38
  attr_accessor :sha256
39
39
  attr_accessor :title
40
- attr_accessor :url
40
+ attr_reader :url
41
41
  attr_reader :words
42
42
 
43
43
  def initialize()
@@ -79,19 +79,18 @@ module NHKore
79
79
 
80
80
  coder[:datetime] = @datetime.nil?() ? @datetime : @datetime.iso8601()
81
81
  coder[:title] = @title
82
- coder[:url] = @url
83
- coder[:futsuurl] = @futsuurl
82
+ coder[:url] = @url.nil?() ? nil : @url.to_s()
83
+ coder[:futsuurl] = @futsuurl.nil?() ? nil : @futsuurl.to_s()
84
84
  coder[:sha256] = @sha256
85
85
  coder[:words] = @words
86
86
  end
87
87
 
88
88
  def self.load_data(key,hash)
89
- datetime = hash[:datetime]
90
89
  words = hash[:words]
91
90
 
92
91
  article = Article.new()
93
92
 
94
- article.datetime = Util.empty_web_str?(datetime) ? nil : Time.iso8601(datetime)
93
+ article.datetime = hash[:datetime]
95
94
  article.futsuurl = hash[:futsuurl]
96
95
  article.sha256 = hash[:sha256]
97
96
  article.title = hash[:title]
@@ -107,6 +106,24 @@ module NHKore
107
106
  return article
108
107
  end
109
108
 
109
+ def datetime=(value)
110
+ if value.is_a?(Time)
111
+ @datetime = value
112
+ else
113
+ @datetime = Util.empty_web_str?(value) ? nil : Time.iso8601(value)
114
+ end
115
+ end
116
+
117
+ def futsuurl=(value)
118
+ # Don't store URI, store String.
119
+ @futsuurl = value.nil?() ? nil : value.to_s()
120
+ end
121
+
122
+ def url=(value)
123
+ # Don't store URI, store String.
124
+ @url = value.nil?() ? nil : value.to_s()
125
+ end
126
+
110
127
  def to_s(mini: false)
111
128
  s = ''.dup()
112
129
 
@@ -21,6 +21,7 @@
21
21
  #++
22
22
 
23
23
 
24
+ require 'attr_bool'
24
25
  require 'digest'
25
26
 
26
27
  require 'nhkore/article'
@@ -47,9 +48,9 @@ module NHKore
47
48
  attr_accessor :dict
48
49
  attr_reader :kargs
49
50
  attr_accessor :missingno
50
- attr_accessor :mode
51
51
  attr_reader :polishers
52
52
  attr_accessor :splitter
53
+ attr_accessor? :strict
53
54
  attr_reader :variators
54
55
  attr_accessor :year
55
56
 
@@ -58,8 +59,8 @@ module NHKore
58
59
  # [+nil+] don't scrape/use it
59
60
  # @param missingno [Missingno] data to use as a fallback for Ruby words without kana/kanji,
60
61
  # instead of raising an error
61
- # @param mode [nil,:lenient]
62
- def initialize(url,cleaners: [BestCleaner.new()],datetime: nil,dict: :scrape,missingno: nil,mode: nil,polishers: [BestPolisher.new()],splitter: BestSplitter.new(),variators: [BestVariator.new()],year: nil,**kargs)
62
+ # @param strict [true,false]
63
+ def initialize(url,cleaners: [BestCleaner.new()],datetime: nil,dict: :scrape,missingno: nil,polishers: [BestPolisher.new()],splitter: BestSplitter.new(),strict: true,variators: [BestVariator.new()],year: nil,**kargs)
63
64
  super(url,**kargs)
64
65
 
65
66
  @cleaners = Array(cleaners)
@@ -67,9 +68,9 @@ module NHKore
67
68
  @dict = dict
68
69
  @kargs = kargs
69
70
  @missingno = missingno
70
- @mode = mode
71
71
  @polishers = Array(polishers)
72
72
  @splitter = splitter
73
+ @strict = strict
73
74
  @variators = Array(variators)
74
75
  @year = year
75
76
  end
@@ -188,7 +189,7 @@ module NHKore
188
189
  tag = doc.css('div.article-body') if tag.length < 1
189
190
 
190
191
  # - https://www3.nhk.or.jp/news/easy/tsunamikeihou/index.html
191
- tag = doc.css('div#main') if tag.length < 1 && @mode == :lenient
192
+ tag = doc.css('div#main') if tag.length < 1 && !@strict
192
193
 
193
194
  if tag.length > 0
194
195
  text = Util.unspace_web_str(tag.text.to_s())
@@ -290,7 +291,7 @@ module NHKore
290
291
 
291
292
  retry
292
293
  else
293
- raise e.exception("could not scrape dictionary at URL[#{dict_url}]: #{e}")
294
+ raise e.exception("could not scrape dictionary URL[#{dict_url}] at URL[#{@url}]: #{e}")
294
295
  end
295
296
  end
296
297
 
@@ -480,19 +481,24 @@ module NHKore
480
481
 
481
482
  def scrape_title(doc,article)
482
483
  tag = doc.css('h1.article-main__title')
484
+ tag_name = nil
483
485
 
484
- if tag.length < 1 && @mode == :lenient
486
+ if tag.length < 1
487
+ # - https://www3.nhk.or.jp/news/easy/article/disaster_earthquake_illust.html
488
+ tag_name = 'h1.article-eq__title'
489
+ tag = doc.css(tag_name)
490
+ end
491
+
492
+ if tag.length < 1 && !@strict
485
493
  # This shouldn't be used except for select sites.
486
494
  # - https://www3.nhk.or.jp/news/easy/tsunamikeihou/index.html
487
-
488
495
  tag_name = 'div#main h2'
489
-
490
- Util.warn("using [#{tag_name}] for title at URL[#{@url}]")
491
-
492
496
  tag = doc.css(tag_name)
493
497
  end
494
498
 
495
499
  if tag.length > 0
500
+ Util.warn("using [#{tag_name}] for title at URL[#{@url}]") unless tag_name.nil?()
501
+
496
502
  result = scrape_and_add_words(tag,article)
497
503
  title = result.text
498
504
 
@@ -583,7 +589,7 @@ module NHKore
583
589
  end
584
590
 
585
591
  # As a last resort, use our user-defined fallbacks (if specified).
586
- return @year unless Util.empty_web_str?(@year)
592
+ return @year.to_i() unless @year.nil?()
587
593
  return @datetime.year if !@datetime.nil?() && Util.sane_year?(@datetime.year)
588
594
 
589
595
  raise ScrapeError,"could not scrape year at URL[#{@url}]"
@@ -604,11 +610,10 @@ module NHKore
604
610
  end
605
611
 
606
612
  def warn_or_error(klass,msg)
607
- case @mode
608
- when :lenient
609
- Util.warn(msg)
610
- else
613
+ if @strict
611
614
  raise klass,msg
615
+ else
616
+ Util.warn(msg)
612
617
  end
613
618
  end
614
619
  end
@@ -23,6 +23,7 @@
23
23
 
24
24
  require 'time'
25
25
 
26
+ require 'nhkore/datetime_parser'
26
27
  require 'nhkore/error'
27
28
  require 'nhkore/missingno'
28
29
  require 'nhkore/news'
@@ -57,7 +58,7 @@ module CLI
57
58
  date time to use as a fallback in cases when an article doesn't have one;
58
59
  format: YYYY-mm-dd H:M; example: 2020-03-30 15:30
59
60
  EOD
60
- value = Time.strptime(value,'%Y-%m-%d %H:%M',&Util.method(:guess_year))
61
+ value = Time.strptime(value,'%Y-%m-%d %H:%M',&DatetimeParser.method(:guess_year))
61
62
  value = Util.jst_time(value)
62
63
  value
63
64
  end
@@ -237,7 +238,7 @@ module CLI
237
238
  dict: dict,
238
239
  is_file: is_file,
239
240
  missingno: missingno ? Missingno.new(news) : nil,
240
- mode: lenient ? :lenient : nil,
241
+ strict: !lenient,
241
242
  })
242
243
  @news_dict_scraper_kargs = @scraper_kargs.merge({
243
244
  is_file: is_file,
@@ -83,7 +83,7 @@ module CLI
83
83
  key = key.to_s()
84
84
 
85
85
  if key.include?('show')
86
- raise CLIError.new("must specify a sub command for option[#{key}]")
86
+ raise CLIError,"must specify a sub command for option[#{key}]"
87
87
  end
88
88
  end
89
89
 
@@ -283,7 +283,7 @@ module CLI
283
283
  puts "> Easy: #{BingScraper.build_url(SearchScraper::YASASHII_SITE,count: count)}"
284
284
  puts "> Regular: #{BingScraper.build_url(SearchScraper::FUTSUU_SITE,count: count)}"
285
285
  else
286
- raise CLIError.new('must specify a sub command for option[show-urls]')
286
+ raise CLIError,'must specify a sub command for option[show-urls]'
287
287
  end
288
288
 
289
289
  return true
@@ -24,6 +24,7 @@
24
24
  require 'date'
25
25
  require 'time'
26
26
 
27
+ require 'nhkore/datetime_parser'
27
28
  require 'nhkore/news'
28
29
  require 'nhkore/sifter'
29
30
  require 'nhkore/util'
@@ -39,27 +40,7 @@ module CLI
39
40
  DEFAULT_SIFT_EXT = :csv
40
41
  DEFAULT_SIFT_FUTSUU_FILE = "#{Sifter::DEFAULT_FUTSUU_FILE}{search.criteria}{file.ext}"
41
42
  DEFAULT_SIFT_YASASHII_FILE = "#{Sifter::DEFAULT_YASASHII_FILE}{search.criteria}{file.ext}"
42
- SIFT_EXTS = [:csv,:htm,:html,:yaml,:yml]
43
-
44
- # Order matters.
45
- SIFT_DATETIME_FMTS = [
46
- '%Y-%m-%d %H:%M',
47
- '%Y-%m-%d %H',
48
- '%Y-%m-%d',
49
- '%m-%d %H:%M',
50
- '%Y-%m %H:%M',
51
- '%m-%d %H',
52
- '%Y-%m %H',
53
- '%m-%d',
54
- '%Y-%m',
55
- '%d %H:%M',
56
- '%y %H:%M',
57
- '%d %H',
58
- '%Y %H',
59
- '%H:%M',
60
- '%d',
61
- '%Y'
62
- ]
43
+ SIFT_EXTS = [:csv,:htm,:html,:json,:yaml,:yml]
63
44
 
64
45
  attr_accessor :sift_datetime_text
65
46
  attr_accessor :sift_search_criteria
@@ -90,7 +71,11 @@ module CLI
90
71
  '9' (9th of Current Year & Month)
91
72
  EOD
92
73
  app.sift_datetime_text = value # Save the original value for the file name
93
- value = app.parse_sift_datetime(value)
74
+
75
+ value = DatetimeParser.parse_range(value)
76
+
77
+ app.check_empty_opt(:datetime,value) if value.nil?()
78
+
94
79
  value
95
80
  end
96
81
  option :e,:ext,<<-EOD,argument: :required,default: DEFAULT_SIFT_EXT,transform: -> (value) do
@@ -211,96 +196,6 @@ module CLI
211
196
  return filename
212
197
  end
213
198
 
214
- # TODO: This should probably be moved into its own class, into Util, or into Sifter?
215
- def parse_sift_datetime(value)
216
- value = Util.reduce_space(value).strip() # Don't use unspace_web_str(), want spaces for formats
217
- value = value.split('...',2)
218
-
219
- check_empty_opt(:datetime,nil) if value.empty?() # For ''
220
-
221
- # Make a "to" and a "from" date time range.
222
- value << value[0].dup() if value.length == 1
223
-
224
- to_day = nil
225
- to_hour = 23
226
- to_minute = 59
227
- to_month = 12
228
- to_year = Util::MAX_SANE_YEAR
229
-
230
- value.each_with_index() do |v,i|
231
- v = check_empty_opt(:datetime,v) # For '...', '12-25...', or '...12-25'
232
-
233
- has_day = false
234
- has_hour = false
235
- has_minute = false
236
- has_month = false
237
- has_year = false
238
-
239
- SIFT_DATETIME_FMTS.each_with_index() do |fmt,i|
240
- begin
241
- # If don't do this, "%d" values will be parsed using "%d %H".
242
- # It seems as though strptime() ignores space.
243
- raise ArgumentError if !v.include?(' ') && fmt.include?(' ')
244
-
245
- # If don't do this, "%y" values will be parsed using "%d".
246
- raise ArgumentError if fmt == '%d' && v.length > 2
247
-
248
- v = Time.strptime(v,fmt,&Util.method(:guess_year))
249
-
250
- has_day = fmt.include?('%d')
251
- has_hour = fmt.include?('%H')
252
- has_minute = fmt.include?('%M')
253
- has_month = fmt.include?('%m')
254
- has_year = fmt.include?('%Y')
255
-
256
- break # No problem; this format worked
257
- rescue ArgumentError
258
- # Out of formats.
259
- raise if i >= (SIFT_DATETIME_FMTS.length - 1)
260
- end
261
- end
262
-
263
- # "From" date time.
264
- if i == 0
265
- # Set these so that "2012-7-4...7-9" will use the appropriate year
266
- # of "2012" for "7-9".
267
- to_day = v.day if has_day
268
- to_hour = v.hour if has_hour
269
- to_minute = v.min if has_minute
270
- to_month = v.month if has_month
271
- to_year = v.year if has_year
272
-
273
- v = Time.new(
274
- has_year ? v.year : Util::MIN_SANE_YEAR,
275
- has_month ? v.month : 1,
276
- has_day ? v.day : 1,
277
- has_hour ? v.hour : 0,
278
- has_minute ? v.min : 0
279
- )
280
- # "To" date time.
281
- else
282
- to_hour = v.hour if has_hour
283
- to_minute = v.min if has_minute
284
- to_month = v.month if has_month
285
- to_year = v.year if has_year
286
-
287
- if has_day
288
- to_day = v.day
289
- # Nothing passed from the "from" date time?
290
- elsif to_day.nil?()
291
- # Last day of month.
292
- to_day = Date.new(to_year,to_month,-1).day
293
- end
294
-
295
- v = Time.new(to_year,to_month,to_day,to_hour,to_minute)
296
- end
297
-
298
- value[i] = v
299
- end
300
-
301
- return value
302
- end
303
-
304
199
  def run_sift_cmd(type)
305
200
  news_name = nil
306
201
 
@@ -364,6 +259,8 @@ module CLI
364
259
  sifter.put_csv!()
365
260
  when :htm,:html
366
261
  sifter.put_html!()
262
+ when :json
263
+ sifter.put_json!()
367
264
  when :yaml,:yml
368
265
  sifter.put_yaml!()
369
266
  else