nhkore 0.3.4 → 0.3.9

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,23 +1,11 @@
1
- #!/usr/bin/env ruby
2
1
  # encoding: UTF-8
3
2
  # frozen_string_literal: true
4
3
 
5
4
  #--
6
5
  # This file is part of NHKore.
7
- # Copyright (c) 2020 Jonathan Bradley Whited (@esotericpig)
8
- #
9
- # NHKore is free software: you can redistribute it and/or modify
10
- # it under the terms of the GNU Lesser General Public License as published by
11
- # the Free Software Foundation, either version 3 of the License, or
12
- # (at your option) any later version.
13
- #
14
- # NHKore is distributed in the hope that it will be useful,
15
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
- # GNU Lesser General Public License for more details.
18
- #
19
- # You should have received a copy of the GNU Lesser General Public License
20
- # along with NHKore. If not, see <https://www.gnu.org/licenses/>.
6
+ # Copyright (c) 2020-2021 Jonathan Bradley Whited
7
+ #
8
+ # SPDX-License-Identifier: LGPL-3.0-or-later
21
9
  #++
22
10
 
23
11
 
@@ -31,189 +19,198 @@ require 'nhkore/util'
31
19
 
32
20
  module NHKore
33
21
  ###
34
- # @author Jonathan Bradley Whited (@esotericpig)
22
+ # @author Jonathan Bradley Whited
35
23
  # @since 0.2.0
36
24
  ###
37
25
  class SearchScraper < Scraper
38
26
  DEFAULT_RESULT_COUNT = 100
39
27
  FUTSUU_SITE = 'nhk.or.jp/news/html/'
40
28
  YASASHII_SITE = 'nhk.or.jp/news/easy/'
41
-
29
+
42
30
  # https://www3.nhk.or.jp/news/html/20200220/k10012294001000.html
43
- FUTSUU_REGEX = /\A[^\.]+\.#{Regexp.quote(FUTSUU_SITE)}.+\.html?/i
31
+ FUTSUU_REGEX = /\A[^.]+\.#{Regexp.quote(FUTSUU_SITE)}.+\.html?/i.freeze
44
32
  # https://www3.nhk.or.jp/news/easy/k10012294001000/k10012294001000.html
45
33
  # - https://www3.nhk.or.jp/news/easy/article/disaster_heat.html
46
- YASASHII_REGEX = /\A[^\.]+\.#{Regexp.quote(YASASHII_SITE)}.+\.html?/i
47
-
34
+ YASASHII_REGEX = /\A[^.]+\.#{Regexp.quote(YASASHII_SITE)}.+\.html?/i.freeze
35
+
36
+ IGNORE_LINK_REGEX = %r{
37
+ /about\.html? # https://www3.nhk.or.jp/news/easy/about.html
38
+ |/movieplayer\.html? # https://www3.nhk.or.jp/news/easy/movieplayer.html?id=k10038422811_1207251719_1207251728.mp4&teacuprbbs=4feb73432045dbb97c283d64d459f7cf
39
+ |/audio\.html? # https://www3.nhk.or.jp/news/easy/player/audio.html?id=k10011555691000
40
+ |/news/easy/index\.html? # http://www3.nhk.or.jp/news/easy/index.html
41
+
42
+ # https://cgi2.nhk.or.jp/news/easy/easy_enq/bin/form/enqform.html?id=k10011916321000&title=日本の会社が作った鉄道の車両「あずま」がイギリスで走る
43
+ # https://www3.nhk.or.jp/news/easy/easy_enq/bin/form/enqform.html?id=k10012689671000&title=「鬼滅の刃」の映画が台湾でも始まって大勢の人が見に行く
44
+ |/enqform\.html?
45
+ }x.freeze
46
+
48
47
  # Search Engines are strict, so trigger using the default HTTP header fields
49
48
  # with +header: {}+ and fetch/set the cookie using +eat_cookie: true+.
50
49
  def initialize(url,eat_cookie: true,header: {},**kargs)
51
50
  super(url,eat_cookie: eat_cookie,header: header,**kargs)
52
51
  end
53
-
52
+
54
53
  def ignore_link?(link,cleaned: true)
55
- return true if link.nil?()
56
-
57
- link = Util.unspace_web_str(link).downcase() unless cleaned
58
-
59
- return true if link.empty?()
60
- return true if link =~ /\/about\.html?/ # https://www3.nhk.or.jp/news/easy/about.html
61
- return true if link =~ /\/movieplayer\.html?/ # https://www3.nhk.or.jp/news/easy/movieplayer.html?id=k10038422811_1207251719_1207251728.mp4&teacuprbbs=4feb73432045dbb97c283d64d459f7cf
62
- return true if link =~ /\/audio\.html?/ # https://www3.nhk.or.jp/news/easy/player/audio.html?id=k10011555691000
63
- return true if link =~ /\/news\/easy\/index\.html?/ # http://www3.nhk.or.jp/news/easy/index.html
64
-
54
+ return true if link.nil?
55
+
56
+ link = Util.unspace_web_str(link).downcase unless cleaned
57
+
58
+ return true if link.empty?
59
+
60
+ return true if IGNORE_LINK_REGEX.match?(link)
61
+
65
62
  return false
66
63
  end
67
64
  end
68
-
65
+
69
66
  ###
70
- # @author Jonathan Bradley Whited (@esotericpig)
67
+ # @author Jonathan Bradley Whited
71
68
  # @since 0.2.0
72
69
  ###
73
70
  class BingScraper < SearchScraper
74
71
  attr_reader :regex
75
72
  attr_reader :site
76
-
73
+
77
74
  def initialize(site,regex: nil,url: nil,**kargs)
78
75
  case site
79
76
  when :futsuu
80
- regex = FUTSUU_REGEX if regex.nil?()
77
+ regex = FUTSUU_REGEX if regex.nil?
81
78
  site = FUTSUU_SITE
82
79
  when :yasashii
83
- regex = YASASHII_REGEX if regex.nil?()
80
+ regex = YASASHII_REGEX if regex.nil?
84
81
  site = YASASHII_SITE
85
82
  else
86
83
  raise ArgumentError,"invalid site[#{site}]"
87
84
  end
88
-
89
- raise ArgumentError,"empty regex[#{regex}]" if regex.nil?()
90
-
85
+
86
+ raise ArgumentError,"empty regex[#{regex}]" if regex.nil?
87
+
91
88
  @regex = regex
92
89
  @site = site
93
- url = self.class.build_url(site,**kargs) if url.nil?()
94
-
90
+ url = self.class.build_url(site,**kargs) if url.nil?
91
+
95
92
  # Delete class-specific args (don't pass to Open-URI).
96
93
  kargs.delete(:count)
97
-
94
+
98
95
  super(url,**kargs)
99
96
  end
100
-
97
+
101
98
  def self.build_url(site,count: DEFAULT_RESULT_COUNT,**kargs)
102
- url = ''.dup()
103
-
99
+ url = ''.dup
100
+
104
101
  url << 'https://www.bing.com/search?'
105
102
  url << URI.encode_www_form(
106
103
  q: "site:#{site}",
107
104
  count: count
108
105
  )
109
-
106
+
110
107
  return url
111
108
  end
112
-
109
+
113
110
  def scrape(slinks,page=NextPage.new())
114
111
  next_page,link_count = scrape_html(slinks,page)
115
-
112
+
116
113
  if link_count <= 0
117
114
  scrape_rss(slinks,page,next_page)
118
115
  end
119
-
116
+
120
117
  return next_page
121
118
  end
122
-
119
+
123
120
  def scrape_html(slinks,page,next_page=NextPage.new())
124
- doc = html_doc()
121
+ doc = html_doc
125
122
  link_count = 0
126
-
123
+
127
124
  anchors = doc.css('a')
128
-
129
- anchors.each() do |anchor|
130
- href = anchor['href'].to_s()
131
- href = Util.unspace_web_str(href).downcase()
132
-
125
+
126
+ anchors.each do |anchor|
127
+ href = anchor['href'].to_s
128
+ href = Util.unspace_web_str(href).downcase
129
+
133
130
  next if ignore_link?(href)
134
-
135
- if (md = href.match(/first\=(\d+)/))
136
- count = md[1].to_i()
137
-
131
+
132
+ if (md = href.match(/first=(\d+)/))
133
+ count = md[1].to_i
134
+
138
135
  if count > page.count && (next_page.count < 0 || count < next_page.count)
139
136
  next_page.count = count
140
137
  next_page.url = join_url(href)
141
138
  end
142
139
  elsif href =~ regex
143
140
  slinks.add_link(SearchLink.new(href))
144
-
141
+
145
142
  link_count += 1
146
143
  end
147
144
  end
148
-
145
+
149
146
  return [next_page,link_count]
150
147
  end
151
-
148
+
152
149
  def scrape_rss(slinks,page,next_page=NextPage.new())
153
150
  link_count = 0
154
-
151
+
155
152
  if !@is_file
156
153
  uri = URI(@url)
157
-
154
+
158
155
  Util.replace_uri_query!(uri,format: 'rss')
159
- open(uri)
160
-
161
- doc = rss_doc()
156
+ self.open(uri)
157
+
158
+ doc = rss_doc
162
159
  rss_links = []
163
-
164
- doc.items.each() do |item|
165
- link = item.link.to_s()
166
- link = Util.unspace_web_str(link).downcase()
167
-
160
+
161
+ doc.items.each do |item|
162
+ link = item.link.to_s
163
+ link = Util.unspace_web_str(link).downcase
164
+
168
165
  rss_links << link
169
-
166
+
170
167
  next if ignore_link?(link)
171
168
  next if link !~ regex
172
-
169
+
173
170
  slinks.add_link(SearchLink.new(link))
174
-
171
+
175
172
  link_count += 1
176
173
  end
177
-
174
+
178
175
  # For RSS, Bing will keep returning the same links over and over
179
176
  # if it's the last page or the "first=" query is the wrong count.
180
177
  # Therefore, we have to test the previous RSS links (+page.rss_links+).
181
- if next_page.empty?() && doc.items.length >= 1 && page.rss_links != rss_links
178
+ if next_page.empty? && doc.items.length >= 1 && page.rss_links != rss_links
182
179
  next_page.count = (page.count < 0) ? 0 : page.count
183
180
  next_page.count += doc.items.length
184
181
  next_page.rss_links = rss_links
185
-
186
- uri = URI(page.url.nil?() ? @url : page.url)
187
-
182
+
183
+ uri = URI(page.url.nil? ? @url : page.url)
184
+
188
185
  Util.replace_uri_query!(uri,first: next_page.count)
189
-
186
+
190
187
  next_page.url = uri
191
188
  end
192
189
  end
193
-
190
+
194
191
  return [next_page,link_count]
195
192
  end
196
193
  end
197
-
194
+
198
195
  ###
199
- # @author Jonathan Bradley Whited (@esotericpig)
196
+ # @author Jonathan Bradley Whited
200
197
  # @since 0.2.0
201
198
  ###
202
199
  class NextPage
203
200
  attr_accessor :count
204
201
  attr_accessor :rss_links
205
202
  attr_accessor :url
206
-
207
- def initialize()
203
+
204
+ def initialize
208
205
  super()
209
-
206
+
210
207
  @count = -1
211
208
  @rss_links = nil
212
209
  @url = nil
213
210
  end
214
-
215
- def empty?()
216
- return @url.nil?() || @count < 0
211
+
212
+ def empty?
213
+ return @url.nil? || @count < 0
217
214
  end
218
215
  end
219
216
  end
data/lib/nhkore/sifter.rb CHANGED
@@ -1,23 +1,11 @@
1
- #!/usr/bin/env ruby
2
1
  # encoding: UTF-8
3
2
  # frozen_string_literal: true
4
3
 
5
4
  #--
6
5
  # This file is part of NHKore.
7
- # Copyright (c) 2020 Jonathan Bradley Whited (@esotericpig)
8
- #
9
- # NHKore is free software: you can redistribute it and/or modify
10
- # it under the terms of the GNU Lesser General Public License as published by
11
- # the Free Software Foundation, either version 3 of the License, or
12
- # (at your option) any later version.
13
- #
14
- # NHKore is distributed in the hope that it will be useful,
15
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
16
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17
- # GNU Lesser General Public License for more details.
18
- #
19
- # You should have received a copy of the GNU Lesser General Public License
20
- # along with NHKore. If not, see <https://www.gnu.org/licenses/>.
6
+ # Copyright (c) 2020-2021 Jonathan Bradley Whited
7
+ #
8
+ # SPDX-License-Identifier: LGPL-3.0-or-later
21
9
  #++
22
10
 
23
11
 
@@ -28,179 +16,179 @@ require 'nhkore/util'
28
16
 
29
17
  module NHKore
30
18
  ###
31
- # @author Jonathan Bradley Whited (@esotericpig)
19
+ # @author Jonathan Bradley Whited
32
20
  # @since 0.2.0
33
21
  ###
34
22
  class Sifter
35
23
  include Fileable
36
-
24
+
37
25
  DEFAULT_DIR = Util::CORE_DIR
38
-
26
+
39
27
  DEFAULT_FUTSUU_FILENAME = 'sift_nhk_news_web_regular'
40
28
  DEFAULT_YASASHII_FILENAME = 'sift_nhk_news_web_easy'
41
-
29
+
42
30
  def self.build_file(filename)
43
31
  return File.join(DEFAULT_DIR,filename)
44
32
  end
45
-
33
+
46
34
  DEFAULT_FUTSUU_FILE = build_file(DEFAULT_FUTSUU_FILENAME)
47
35
  DEFAULT_YASASHII_FILE = build_file(DEFAULT_YASASHII_FILENAME)
48
-
36
+
49
37
  attr_accessor :articles
50
38
  attr_accessor :caption
51
39
  attr_accessor :filters
52
40
  attr_accessor :ignores
53
41
  attr_accessor :output
54
-
42
+
55
43
  def initialize(news)
56
- @articles = news.articles.values.dup()
44
+ @articles = news.articles.values.dup
57
45
  @caption = nil
58
46
  @filters = {}
59
47
  @ignores = {}
60
48
  @output = nil
61
49
  end
62
-
63
- def build_header()
50
+
51
+ def build_header
64
52
  header = []
65
-
53
+
66
54
  header << 'Frequency' unless @ignores[:freq]
67
55
  header << 'Word' unless @ignores[:word]
68
56
  header << 'Kana' unless @ignores[:kana]
69
57
  header << 'English' unless @ignores[:eng]
70
58
  header << 'Definition' unless @ignores[:defn]
71
-
59
+
72
60
  return header
73
61
  end
74
-
62
+
75
63
  def build_rows(words)
76
64
  rows = []
77
-
78
- words.each() do |word|
65
+
66
+ words.each do |word|
79
67
  rows << build_word_row(word)
80
68
  end
81
-
69
+
82
70
  return rows
83
71
  end
84
-
72
+
85
73
  def build_word_row(word)
86
74
  row = []
87
-
75
+
88
76
  row << word.freq unless @ignores[:freq]
89
77
  row << word.word unless @ignores[:word]
90
78
  row << word.kana unless @ignores[:kana]
91
79
  row << word.eng unless @ignores[:eng]
92
80
  row << word.defn unless @ignores[:defn]
93
-
81
+
94
82
  return row
95
83
  end
96
-
84
+
97
85
  def filter?(article)
98
- return false if @filters.empty?()
99
-
86
+ return false if @filters.empty?
87
+
100
88
  datetime_filter = @filters[:datetime]
101
89
  title_filter = @filters[:title]
102
90
  url_filter = @filters[:url]
103
-
104
- if !datetime_filter.nil?()
91
+
92
+ if !datetime_filter.nil?
105
93
  datetime = article.datetime
106
-
107
- return true if datetime.nil?() ||
94
+
95
+ return true if datetime.nil? ||
108
96
  datetime < datetime_filter[:from] || datetime > datetime_filter[:to]
109
97
  end
110
-
111
- if !title_filter.nil?()
112
- title = article.title.to_s()
98
+
99
+ if !title_filter.nil?
100
+ title = article.title.to_s
113
101
  title = Util.unspace_web_str(title) if title_filter[:unspace]
114
- title = title.downcase() if title_filter[:uncase]
115
-
102
+ title = title.downcase if title_filter[:uncase]
103
+
116
104
  return true unless title.include?(title_filter[:filter])
117
105
  end
118
-
119
- if !url_filter.nil?()
120
- url = article.url.to_s()
106
+
107
+ if !url_filter.nil?
108
+ url = article.url.to_s
121
109
  url = Util.unspace_web_str(url) if url_filter[:unspace]
122
- url = url.downcase() if url_filter[:uncase]
123
-
110
+ url = url.downcase if url_filter[:uncase]
111
+
124
112
  return true unless url.include?(url_filter[:filter])
125
113
  end
126
-
114
+
127
115
  return false
128
116
  end
129
-
117
+
130
118
  def filter_by_datetime(datetime_filter=nil,from: nil,to: nil)
131
- if !datetime_filter.nil?()
132
- if datetime_filter.respond_to?(:'[]')
119
+ if !datetime_filter.nil?
120
+ if datetime_filter.respond_to?(:[])
133
121
  # If out-of-bounds, just nil.
134
- from = datetime_filter[0] if from.nil?()
135
- to = datetime_filter[1] if to.nil?()
122
+ from = datetime_filter[0] if from.nil?
123
+ to = datetime_filter[1] if to.nil?
136
124
  else
137
- from = datetime_filter if from.nil?()
138
- to = datetime_filter if to.nil?()
125
+ from = datetime_filter if from.nil?
126
+ to = datetime_filter if to.nil?
139
127
  end
140
128
  end
141
-
142
- from = to if from.nil?()
143
- to = from if to.nil?()
144
-
145
- from = Util.jst_time(from) unless from.nil?()
146
- to = Util.jst_time(to) unless to.nil?()
147
-
129
+
130
+ from = to if from.nil?
131
+ to = from if to.nil?
132
+
133
+ from = Util.jst_time(from) unless from.nil?
134
+ to = Util.jst_time(to) unless to.nil?
135
+
148
136
  datetime_filter = [from,to]
149
-
150
- return self if datetime_filter.flatten().compact().empty?()
151
-
137
+
138
+ return self if datetime_filter.flatten.compact.empty?
139
+
152
140
  @filters[:datetime] = {from: from,to: to}
153
-
141
+
154
142
  return self
155
143
  end
156
-
144
+
157
145
  def filter_by_title(title_filter,uncase: true,unspace: true)
158
146
  title_filter = Util.unspace_web_str(title_filter) if unspace
159
- title_filter = title_filter.downcase() if uncase
160
-
147
+ title_filter = title_filter.downcase if uncase
148
+
161
149
  @filters[:title] = {filter: title_filter,uncase: uncase,unspace: unspace}
162
-
150
+
163
151
  return self
164
152
  end
165
-
153
+
166
154
  def filter_by_url(url_filter,uncase: true,unspace: true)
167
155
  url_filter = Util.unspace_web_str(url_filter) if unspace
168
- url_filter = url_filter.downcase() if uncase
169
-
156
+ url_filter = url_filter.downcase if uncase
157
+
170
158
  @filters[:url] = {filter: url_filter,uncase: uncase,unspace: unspace}
171
-
159
+
172
160
  return self
173
161
  end
174
-
162
+
175
163
  def ignore(key)
176
164
  @ignores[key] = true
177
-
165
+
178
166
  return self
179
167
  end
180
-
168
+
181
169
  # This does not output {caption}.
182
- def put_csv!()
170
+ def put_csv!
183
171
  require 'csv'
184
-
185
- words = sift()
186
-
172
+
173
+ words = sift
174
+
187
175
  @output = CSV.generate(headers: :first_row,write_headers: true) do |csv|
188
- csv << build_header()
189
-
190
- words.each() do |word|
176
+ csv << build_header
177
+
178
+ words.each do |word|
191
179
  csv << build_word_row(word)
192
180
  end
193
181
  end
194
-
182
+
195
183
  return @output
196
184
  end
197
-
198
- def put_html!()
199
- words = sift()
200
-
201
- @output = ''.dup()
202
-
203
- @output << <<~EOH
185
+
186
+ def put_html!
187
+ words = sift
188
+
189
+ @output = ''.dup
190
+
191
+ @output << <<~HTML
204
192
  <!DOCTYPE html>
205
193
  <html lang="ja">
206
194
  <head>
@@ -249,146 +237,144 @@ module NHKore
249
237
  <h1>NHKore</h1>
250
238
  <h2>#{@caption}</h2>
251
239
  <table>
252
- EOH
253
- #"
254
-
240
+ HTML
241
+
255
242
  # If have too few or too many '<col>', invalid HTML.
256
- @output << %Q{<col style="width:6em;">\n} unless @ignores[:freq]
257
- @output << %Q{<col style="width:17em;">\n} unless @ignores[:word]
258
- @output << %Q{<col style="width:17em;">\n} unless @ignores[:kana]
259
- @output << %Q{<col style="width:5em;">\n} unless @ignores[:eng]
243
+ @output << %Q(<col style="width:6em;">\n) unless @ignores[:freq]
244
+ @output << %Q(<col style="width:17em;">\n) unless @ignores[:word]
245
+ @output << %Q(<col style="width:17em;">\n) unless @ignores[:kana]
246
+ @output << %Q(<col style="width:5em;">\n) unless @ignores[:eng]
260
247
  @output << "<col>\n" unless @ignores[:defn] # No width for defn, fills rest of page
261
-
248
+
262
249
  @output << '<tr>'
263
-
264
- build_header().each() do |h|
250
+
251
+ build_header.each do |h|
265
252
  @output << "<th>#{h}</th>"
266
253
  end
267
-
254
+
268
255
  @output << "</tr>\n"
269
-
270
- words.each() do |word|
256
+
257
+ words.each do |word|
271
258
  @output << '<tr>'
272
-
273
- build_word_row(word).each() do |w|
274
- @output << "<td>#{Util.escape_html(w.to_s())}</td>"
259
+
260
+ build_word_row(word).each do |w|
261
+ @output << "<td>#{Util.escape_html(w.to_s)}</td>"
275
262
  end
276
-
263
+
277
264
  @output << "</tr>\n"
278
265
  end
279
-
280
- @output << <<~EOH
266
+
267
+ @output << <<~HTML
281
268
  </table>
282
269
  </body>
283
270
  </html>
284
- EOH
285
- #/
286
-
271
+ HTML
272
+
287
273
  return @output
288
274
  end
289
-
290
- def put_json!()
275
+
276
+ def put_json!
291
277
  require 'json'
292
-
293
- words = sift()
294
-
295
- @output = ''.dup()
296
-
297
- @output << <<~EOJ
278
+
279
+ words = sift
280
+
281
+ @output = ''.dup
282
+
283
+ @output << <<~JSON
298
284
  {
299
285
  "caption": #{JSON.generate(@caption)},
300
- "header": #{JSON.generate(build_header())},
286
+ "header": #{JSON.generate(build_header)},
301
287
  "words": [
302
- EOJ
303
-
304
- if !words.empty?()
288
+ JSON
289
+
290
+ if !words.empty?
305
291
  0.upto(words.length - 2) do |i|
306
292
  @output << " #{JSON.generate(build_word_row(words[i]))},\n"
307
293
  end
308
-
294
+
309
295
  @output << " #{JSON.generate(build_word_row(words[-1]))}\n"
310
296
  end
311
-
297
+
312
298
  @output << "]\n}\n"
313
-
299
+
314
300
  return @output
315
301
  end
316
-
317
- def put_yaml!()
302
+
303
+ def put_yaml!
318
304
  require 'psychgus'
319
-
320
- words = sift()
321
-
305
+
306
+ words = sift
307
+
322
308
  yaml = {
323
309
  caption: @caption,
324
- header: build_header(),
310
+ header: build_header,
325
311
  words: build_rows(words),
326
312
  }
327
-
328
- header_styler = Class.new() do
313
+
314
+ header_styler = Class.new do
329
315
  include Psychgus::Styler
330
-
316
+
331
317
  def style_sequence(sniffer,node)
332
318
  parent = sniffer.parent
333
-
334
- if !parent.nil?() && parent.node.respond_to?(:value) && parent.value == 'header'
319
+
320
+ if !parent.nil? && parent.node.respond_to?(:value) && parent.value == 'header'
335
321
  node.style = Psychgus::SEQUENCE_FLOW
336
322
  end
337
323
  end
338
324
  end
339
-
325
+
340
326
  # Put each Word on one line (flow/inline style).
341
- @output = Util.dump_yaml(yaml,flow_level: 4,stylers: header_styler.new())
342
-
327
+ @output = Util.dump_yaml(yaml,flow_level: 4,stylers: header_styler.new)
328
+
343
329
  return @output
344
330
  end
345
-
346
- def sift()
347
- master_article = Article.new()
348
-
349
- @articles.each() do |article|
331
+
332
+ def sift
333
+ master_article = Article.new
334
+
335
+ @articles.each do |article|
350
336
  next if filter?(article)
351
-
352
- article.words.values().each() do |word|
337
+
338
+ article.words.each_value do |word|
353
339
  master_article.add_word(word,use_freq: true)
354
340
  end
355
341
  end
356
-
357
- words = master_article.words.values()
358
-
359
- words.sort!() do |word1,word2|
342
+
343
+ words = master_article.words.values
344
+
345
+ words.sort! do |word1,word2|
360
346
  # Order by freq DESC (most frequent words to top).
361
347
  i = (word2.freq <=> word1.freq)
362
-
348
+
363
349
  # Order by !defn.empty, word ASC, !kana.empty, kana ASC, defn.len DESC, defn ASC.
364
350
  i = compare_empty_str(word1.defn,word2.defn) if i == 0 # Favor words that have definitions
365
- i = (word1.word.to_s() <=> word2.word.to_s()) if i == 0
351
+ i = (word1.word.to_s <=> word2.word.to_s) if i == 0
366
352
  i = compare_empty_str(word1.kana,word2.kana) if i == 0 # Favor words that have kana
367
- i = (word1.kana.to_s() <=> word2.kana.to_s()) if i == 0
368
- i = (word2.defn.to_s().length <=> word1.defn.to_s().length) if i == 0 # Favor longer definitions
369
- i = (word1.defn.to_s() <=> word2.defn.to_s()) if i == 0
370
-
353
+ i = (word1.kana.to_s <=> word2.kana.to_s) if i == 0
354
+ i = (word2.defn.to_s.length <=> word1.defn.to_s.length) if i == 0 # Favor longer definitions
355
+ i = (word1.defn.to_s <=> word2.defn.to_s) if i == 0
356
+
371
357
  i
372
358
  end
373
-
359
+
374
360
  return words
375
361
  end
376
-
362
+
377
363
  def compare_empty_str(str1,str2)
378
364
  has_str1 = !Util.empty_web_str?(str1)
379
365
  has_str2 = !Util.empty_web_str?(str2)
380
-
366
+
381
367
  if has_str1 && !has_str2
382
368
  return -1 # Bubble word1 to top
383
369
  elsif !has_str1 && has_str2
384
370
  return 1 # Bubble word2 to top
385
371
  end
386
-
372
+
387
373
  return 0 # Further comparison needed
388
374
  end
389
-
390
- def to_s()
391
- return @output.to_s()
375
+
376
+ def to_s
377
+ return @output.to_s
392
378
  end
393
379
  end
394
380
  end