nhkore 0.3.1 → 0.3.6

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -35,7 +35,7 @@ require 'nhkore/version'
35
35
 
36
36
  PKG_DIR = 'pkg'
37
37
 
38
- CLEAN.exclude('.git/','stock/')
38
+ CLEAN.exclude('{.git,core,stock}/**/*')
39
39
  CLOBBER.include('doc/',File.join(PKG_DIR,''))
40
40
 
41
41
 
@@ -49,7 +49,7 @@ desc "Package '#{File.join(NHKore::Util::CORE_DIR,'')}' data as a Zip file into
49
49
  task :pkg_core do |task|
50
50
  mkdir_p PKG_DIR
51
51
 
52
- pattern = File.join(NHKore::Util::CORE_DIR,'*.{csv,html,yml}')
52
+ pattern = File.join(NHKore::Util::CORE_DIR,'*.{csv,html,json,yml}')
53
53
  zip_file = File.join(PKG_DIR,'nhkore-core.zip')
54
54
 
55
55
  sh 'zip','-9rv',zip_file,*Dir.glob(pattern).sort()
@@ -59,17 +59,57 @@ Rake::TestTask.new() do |task|
59
59
  task.libs = ['lib','test']
60
60
  task.pattern = File.join('test','**','*_test.rb')
61
61
  task.description += ": '#{task.pattern}'"
62
- task.verbose = true
62
+ task.verbose = false
63
63
  task.warning = true
64
64
  end
65
65
 
66
- YARD::Rake::YardocTask.new() do |task|
67
- task.files = [File.join('lib','**','*.rb')]
66
+ # If you need to run a part after the 1st part,
67
+ # just type 'n' to not overwrite the file and then 'y' for continue.
68
+ desc "Update '#{File.join(NHKore::Util::CORE_DIR,'')}' files for release"
69
+ task :update_core do |task|
70
+ require 'highline'
71
+
72
+ CONTINUE_MSG = "\nContinue (y/n)? "
73
+
74
+ cmd = ['ruby','-w','./lib/nhkore.rb','-t','300','-m','10']
75
+ hl = HighLine.new()
76
+
77
+ next unless sh(*cmd,'se','ez','bing')
78
+ next unless hl.agree(CONTINUE_MSG)
79
+ puts
68
80
 
69
- task.options += ['--files','CHANGELOG.md,LICENSE.txt']
70
- task.options += ['--readme','README.md']
81
+ next unless sh(*cmd,'news','-s','100','ez')
82
+ next unless hl.agree(CONTINUE_MSG)
83
+ puts
84
+
85
+ next unless sh(*cmd,'sift','-e','csv' ,'ez')
86
+ next unless sh(*cmd,'sift','-e','html','ez')
87
+ next unless sh(*cmd,'sift','-e','json','ez')
88
+ next unless sh(*cmd,'sift','-e','yml' ,'ez')
89
+ end
90
+
91
+ # @since 0.3.6
92
+ desc 'Update showcase file for release'
93
+ task :update_showcase do |task|
94
+ require 'highline'
71
95
 
72
- task.options << '--protected' # Show protected methods
96
+ SHOWCASE_FILE = File.join('.','nhkore-ez.html')
97
+
98
+ hl = HighLine.new()
99
+
100
+ next unless sh('ruby','-w','./lib/nhkore.rb',
101
+ 'sift','ez','--no-eng',
102
+ '--out',SHOWCASE_FILE,
103
+ )
104
+
105
+ next unless hl.agree("\nMove the file (y/n)? ")
106
+ puts
107
+ next unless sh('mv','-iv',SHOWCASE_FILE,
108
+ File.join('..','esotericpig.github.io','showcase',''),
109
+ )
110
+ end
111
+
112
+ YARD::Rake::YardocTask.new() do |task|
73
113
  task.options += ['--template-path',File.join('yard','templates')]
74
114
  task.options += ['--title',"NHKore v#{NHKore::VERSION} Doc"]
75
115
  end
@@ -29,28 +29,7 @@ if TESTING
29
29
  end
30
30
 
31
31
  require 'nhkore/app'
32
- require 'nhkore/article'
33
- require 'nhkore/article_scraper'
34
- require 'nhkore/cleaner'
35
- require 'nhkore/defn'
36
- require 'nhkore/dict'
37
- require 'nhkore/dict_scraper'
38
- require 'nhkore/entry'
39
- require 'nhkore/error'
40
- require 'nhkore/fileable'
41
- require 'nhkore/missingno'
42
- require 'nhkore/news'
43
- require 'nhkore/polisher'
44
- require 'nhkore/scraper'
45
- require 'nhkore/search_link'
46
- require 'nhkore/search_scraper'
47
- require 'nhkore/sifter'
48
- require 'nhkore/splitter'
49
- require 'nhkore/user_agents'
50
- require 'nhkore/util'
51
- require 'nhkore/variator'
52
- require 'nhkore/version'
53
- require 'nhkore/word'
32
+ require 'nhkore/lib'
54
33
 
55
34
  require 'nhkore/cli/fx_cmd'
56
35
  require 'nhkore/cli/get_cmd'
@@ -24,6 +24,7 @@
24
24
  require 'cri'
25
25
  require 'highline'
26
26
  require 'rainbow'
27
+ require 'set'
27
28
  require 'tty-spinner'
28
29
 
29
30
  require 'nhkore/error'
@@ -151,7 +152,8 @@ module NHKore
151
152
  end
152
153
 
153
154
  if color.nil?()
154
- color = ($stdout.tty?() && ENV['TERM'] != 'dumb')
155
+ # - https://no-color.org/
156
+ color = ($stdout.tty?() && ENV['TERM'] != 'dumb' && !ENV.key?('NO_COLOR'))
155
157
  end
156
158
 
157
159
  enable_color(color)
@@ -33,11 +33,11 @@ module NHKore
33
33
  # @since 0.2.0
34
34
  ###
35
35
  class Article
36
- attr_accessor :datetime
37
- attr_accessor :futsuurl
36
+ attr_reader :datetime
37
+ attr_reader :futsuurl
38
38
  attr_accessor :sha256
39
39
  attr_accessor :title
40
- attr_accessor :url
40
+ attr_reader :url
41
41
  attr_reader :words
42
42
 
43
43
  def initialize()
@@ -79,19 +79,18 @@ module NHKore
79
79
 
80
80
  coder[:datetime] = @datetime.nil?() ? @datetime : @datetime.iso8601()
81
81
  coder[:title] = @title
82
- coder[:url] = @url
83
- coder[:futsuurl] = @futsuurl
82
+ coder[:url] = @url.nil?() ? nil : @url.to_s()
83
+ coder[:futsuurl] = @futsuurl.nil?() ? nil : @futsuurl.to_s()
84
84
  coder[:sha256] = @sha256
85
85
  coder[:words] = @words
86
86
  end
87
87
 
88
88
  def self.load_data(key,hash)
89
- datetime = hash[:datetime]
90
89
  words = hash[:words]
91
90
 
92
91
  article = Article.new()
93
92
 
94
- article.datetime = Util.empty_web_str?(datetime) ? nil : Time.iso8601(datetime)
93
+ article.datetime = hash[:datetime]
95
94
  article.futsuurl = hash[:futsuurl]
96
95
  article.sha256 = hash[:sha256]
97
96
  article.title = hash[:title]
@@ -107,6 +106,24 @@ module NHKore
107
106
  return article
108
107
  end
109
108
 
109
+ def datetime=(value)
110
+ if value.is_a?(Time)
111
+ @datetime = value
112
+ else
113
+ @datetime = Util.empty_web_str?(value) ? nil : Time.iso8601(value)
114
+ end
115
+ end
116
+
117
+ def futsuurl=(value)
118
+ # Don't store URI, store String.
119
+ @futsuurl = value.nil?() ? nil : value.to_s()
120
+ end
121
+
122
+ def url=(value)
123
+ # Don't store URI, store String.
124
+ @url = value.nil?() ? nil : value.to_s()
125
+ end
126
+
110
127
  def to_s(mini: false)
111
128
  s = ''.dup()
112
129
 
@@ -21,6 +21,7 @@
21
21
  #++
22
22
 
23
23
 
24
+ require 'attr_bool'
24
25
  require 'digest'
25
26
 
26
27
  require 'nhkore/article'
@@ -47,9 +48,9 @@ module NHKore
47
48
  attr_accessor :dict
48
49
  attr_reader :kargs
49
50
  attr_accessor :missingno
50
- attr_accessor :mode
51
51
  attr_reader :polishers
52
52
  attr_accessor :splitter
53
+ attr_accessor? :strict
53
54
  attr_reader :variators
54
55
  attr_accessor :year
55
56
 
@@ -58,8 +59,8 @@ module NHKore
58
59
  # [+nil+] don't scrape/use it
59
60
  # @param missingno [Missingno] data to use as a fallback for Ruby words without kana/kanji,
60
61
  # instead of raising an error
61
- # @param mode [nil,:lenient]
62
- def initialize(url,cleaners: [BestCleaner.new()],datetime: nil,dict: :scrape,missingno: nil,mode: nil,polishers: [BestPolisher.new()],splitter: BestSplitter.new(),variators: [BestVariator.new()],year: nil,**kargs)
62
+ # @param strict [true,false]
63
+ def initialize(url,cleaners: [BestCleaner.new()],datetime: nil,dict: :scrape,missingno: nil,polishers: [BestPolisher.new()],splitter: BestSplitter.new(),strict: true,variators: [BestVariator.new()],year: nil,**kargs)
63
64
  super(url,**kargs)
64
65
 
65
66
  @cleaners = Array(cleaners)
@@ -67,9 +68,9 @@ module NHKore
67
68
  @dict = dict
68
69
  @kargs = kargs
69
70
  @missingno = missingno
70
- @mode = mode
71
71
  @polishers = Array(polishers)
72
72
  @splitter = splitter
73
+ @strict = strict
73
74
  @variators = Array(variators)
74
75
  @year = year
75
76
  end
@@ -188,7 +189,7 @@ module NHKore
188
189
  tag = doc.css('div.article-body') if tag.length < 1
189
190
 
190
191
  # - https://www3.nhk.or.jp/news/easy/tsunamikeihou/index.html
191
- tag = doc.css('div#main') if tag.length < 1 && @mode == :lenient
192
+ tag = doc.css('div#main') if tag.length < 1 && !@strict
192
193
 
193
194
  if tag.length > 0
194
195
  text = Util.unspace_web_str(tag.text.to_s())
@@ -290,7 +291,7 @@ module NHKore
290
291
 
291
292
  retry
292
293
  else
293
- raise e.exception("could not scrape dictionary at URL[#{dict_url}]: #{e}")
294
+ raise e.exception("could not scrape dictionary URL[#{dict_url}] at URL[#{@url}]: #{e}")
294
295
  end
295
296
  end
296
297
 
@@ -480,19 +481,24 @@ module NHKore
480
481
 
481
482
  def scrape_title(doc,article)
482
483
  tag = doc.css('h1.article-main__title')
484
+ tag_name = nil
483
485
 
484
- if tag.length < 1 && @mode == :lenient
486
+ if tag.length < 1
487
+ # - https://www3.nhk.or.jp/news/easy/article/disaster_earthquake_illust.html
488
+ tag_name = 'h1.article-eq__title'
489
+ tag = doc.css(tag_name)
490
+ end
491
+
492
+ if tag.length < 1 && !@strict
485
493
  # This shouldn't be used except for select sites.
486
494
  # - https://www3.nhk.or.jp/news/easy/tsunamikeihou/index.html
487
-
488
495
  tag_name = 'div#main h2'
489
-
490
- Util.warn("using [#{tag_name}] for title at URL[#{@url}]")
491
-
492
496
  tag = doc.css(tag_name)
493
497
  end
494
498
 
495
499
  if tag.length > 0
500
+ Util.warn("using [#{tag_name}] for title at URL[#{@url}]") unless tag_name.nil?()
501
+
496
502
  result = scrape_and_add_words(tag,article)
497
503
  title = result.text
498
504
 
@@ -583,7 +589,7 @@ module NHKore
583
589
  end
584
590
 
585
591
  # As a last resort, use our user-defined fallbacks (if specified).
586
- return @year unless Util.empty_web_str?(@year)
592
+ return @year.to_i() unless @year.nil?()
587
593
  return @datetime.year if !@datetime.nil?() && Util.sane_year?(@datetime.year)
588
594
 
589
595
  raise ScrapeError,"could not scrape year at URL[#{@url}]"
@@ -604,11 +610,10 @@ module NHKore
604
610
  end
605
611
 
606
612
  def warn_or_error(klass,msg)
607
- case @mode
608
- when :lenient
609
- Util.warn(msg)
610
- else
613
+ if @strict
611
614
  raise klass,msg
615
+ else
616
+ Util.warn(msg)
612
617
  end
613
618
  end
614
619
  end
@@ -23,6 +23,7 @@
23
23
 
24
24
  require 'time'
25
25
 
26
+ require 'nhkore/datetime_parser'
26
27
  require 'nhkore/error'
27
28
  require 'nhkore/missingno'
28
29
  require 'nhkore/news'
@@ -57,7 +58,7 @@ module CLI
57
58
  date time to use as a fallback in cases when an article doesn't have one;
58
59
  format: YYYY-mm-dd H:M; example: 2020-03-30 15:30
59
60
  EOD
60
- value = Time.strptime(value,'%Y-%m-%d %H:%M',&Util.method(:guess_year))
61
+ value = Time.strptime(value,'%Y-%m-%d %H:%M',&DatetimeParser.method(:guess_year))
61
62
  value = Util.jst_time(value)
62
63
  value
63
64
  end
@@ -237,7 +238,7 @@ module CLI
237
238
  dict: dict,
238
239
  is_file: is_file,
239
240
  missingno: missingno ? Missingno.new(news) : nil,
240
- mode: lenient ? :lenient : nil,
241
+ strict: !lenient,
241
242
  })
242
243
  @news_dict_scraper_kargs = @scraper_kargs.merge({
243
244
  is_file: is_file,
@@ -83,7 +83,7 @@ module CLI
83
83
  key = key.to_s()
84
84
 
85
85
  if key.include?('show')
86
- raise CLIError.new("must specify a sub command for option[#{key}]")
86
+ raise CLIError,"must specify a sub command for option[#{key}]"
87
87
  end
88
88
  end
89
89
 
@@ -283,7 +283,7 @@ module CLI
283
283
  puts "> Easy: #{BingScraper.build_url(SearchScraper::YASASHII_SITE,count: count)}"
284
284
  puts "> Regular: #{BingScraper.build_url(SearchScraper::FUTSUU_SITE,count: count)}"
285
285
  else
286
- raise CLIError.new('must specify a sub command for option[show-urls]')
286
+ raise CLIError,'must specify a sub command for option[show-urls]'
287
287
  end
288
288
 
289
289
  return true
@@ -24,6 +24,7 @@
24
24
  require 'date'
25
25
  require 'time'
26
26
 
27
+ require 'nhkore/datetime_parser'
27
28
  require 'nhkore/news'
28
29
  require 'nhkore/sifter'
29
30
  require 'nhkore/util'
@@ -39,27 +40,7 @@ module CLI
39
40
  DEFAULT_SIFT_EXT = :csv
40
41
  DEFAULT_SIFT_FUTSUU_FILE = "#{Sifter::DEFAULT_FUTSUU_FILE}{search.criteria}{file.ext}"
41
42
  DEFAULT_SIFT_YASASHII_FILE = "#{Sifter::DEFAULT_YASASHII_FILE}{search.criteria}{file.ext}"
42
- SIFT_EXTS = [:csv,:htm,:html,:yaml,:yml]
43
-
44
- # Order matters.
45
- SIFT_DATETIME_FMTS = [
46
- '%Y-%m-%d %H:%M',
47
- '%Y-%m-%d %H',
48
- '%Y-%m-%d',
49
- '%m-%d %H:%M',
50
- '%Y-%m %H:%M',
51
- '%m-%d %H',
52
- '%Y-%m %H',
53
- '%m-%d',
54
- '%Y-%m',
55
- '%d %H:%M',
56
- '%y %H:%M',
57
- '%d %H',
58
- '%Y %H',
59
- '%H:%M',
60
- '%d',
61
- '%Y'
62
- ]
43
+ SIFT_EXTS = [:csv,:htm,:html,:json,:yaml,:yml]
63
44
 
64
45
  attr_accessor :sift_datetime_text
65
46
  attr_accessor :sift_search_criteria
@@ -90,7 +71,11 @@ module CLI
90
71
  '9' (9th of Current Year & Month)
91
72
  EOD
92
73
  app.sift_datetime_text = value # Save the original value for the file name
93
- value = app.parse_sift_datetime(value)
74
+
75
+ value = DatetimeParser.parse_range(value)
76
+
77
+ app.check_empty_opt(:datetime,value) if value.nil?()
78
+
94
79
  value
95
80
  end
96
81
  option :e,:ext,<<-EOD,argument: :required,default: DEFAULT_SIFT_EXT,transform: -> (value) do
@@ -211,96 +196,6 @@ module CLI
211
196
  return filename
212
197
  end
213
198
 
214
- # TODO: This should probably be moved into its own class, into Util, or into Sifter?
215
- def parse_sift_datetime(value)
216
- value = Util.reduce_space(value).strip() # Don't use unspace_web_str(), want spaces for formats
217
- value = value.split('...',2)
218
-
219
- check_empty_opt(:datetime,nil) if value.empty?() # For ''
220
-
221
- # Make a "to" and a "from" date time range.
222
- value << value[0].dup() if value.length == 1
223
-
224
- to_day = nil
225
- to_hour = 23
226
- to_minute = 59
227
- to_month = 12
228
- to_year = Util::MAX_SANE_YEAR
229
-
230
- value.each_with_index() do |v,i|
231
- v = check_empty_opt(:datetime,v) # For '...', '12-25...', or '...12-25'
232
-
233
- has_day = false
234
- has_hour = false
235
- has_minute = false
236
- has_month = false
237
- has_year = false
238
-
239
- SIFT_DATETIME_FMTS.each_with_index() do |fmt,i|
240
- begin
241
- # If don't do this, "%d" values will be parsed using "%d %H".
242
- # It seems as though strptime() ignores space.
243
- raise ArgumentError if !v.include?(' ') && fmt.include?(' ')
244
-
245
- # If don't do this, "%y" values will be parsed using "%d".
246
- raise ArgumentError if fmt == '%d' && v.length > 2
247
-
248
- v = Time.strptime(v,fmt,&Util.method(:guess_year))
249
-
250
- has_day = fmt.include?('%d')
251
- has_hour = fmt.include?('%H')
252
- has_minute = fmt.include?('%M')
253
- has_month = fmt.include?('%m')
254
- has_year = fmt.include?('%Y')
255
-
256
- break # No problem; this format worked
257
- rescue ArgumentError
258
- # Out of formats.
259
- raise if i >= (SIFT_DATETIME_FMTS.length - 1)
260
- end
261
- end
262
-
263
- # "From" date time.
264
- if i == 0
265
- # Set these so that "2012-7-4...7-9" will use the appropriate year
266
- # of "2012" for "7-9".
267
- to_day = v.day if has_day
268
- to_hour = v.hour if has_hour
269
- to_minute = v.min if has_minute
270
- to_month = v.month if has_month
271
- to_year = v.year if has_year
272
-
273
- v = Time.new(
274
- has_year ? v.year : Util::MIN_SANE_YEAR,
275
- has_month ? v.month : 1,
276
- has_day ? v.day : 1,
277
- has_hour ? v.hour : 0,
278
- has_minute ? v.min : 0
279
- )
280
- # "To" date time.
281
- else
282
- to_hour = v.hour if has_hour
283
- to_minute = v.min if has_minute
284
- to_month = v.month if has_month
285
- to_year = v.year if has_year
286
-
287
- if has_day
288
- to_day = v.day
289
- # Nothing passed from the "from" date time?
290
- elsif to_day.nil?()
291
- # Last day of month.
292
- to_day = Date.new(to_year,to_month,-1).day
293
- end
294
-
295
- v = Time.new(to_year,to_month,to_day,to_hour,to_minute)
296
- end
297
-
298
- value[i] = v
299
- end
300
-
301
- return value
302
- end
303
-
304
199
  def run_sift_cmd(type)
305
200
  news_name = nil
306
201
 
@@ -364,6 +259,8 @@ module CLI
364
259
  sifter.put_csv!()
365
260
  when :htm,:html
366
261
  sifter.put_html!()
262
+ when :json
263
+ sifter.put_json!()
367
264
  when :yaml,:yml
368
265
  sifter.put_yaml!()
369
266
  else