nhkore 0.3.6 → 0.3.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.yardopts +3 -0
- data/CHANGELOG.md +63 -2
- data/Gemfile +0 -18
- data/Gemfile.lock +89 -0
- data/README.md +36 -30
- data/Rakefile +38 -52
- data/bin/nhkore +4 -15
- data/lib/nhkore.rb +8 -20
- data/lib/nhkore/app.rb +236 -236
- data/lib/nhkore/article.rb +39 -53
- data/lib/nhkore/article_scraper.rb +301 -287
- data/lib/nhkore/cleaner.rb +20 -32
- data/lib/nhkore/cli/fx_cmd.rb +41 -53
- data/lib/nhkore/cli/get_cmd.rb +59 -70
- data/lib/nhkore/cli/news_cmd.rb +143 -153
- data/lib/nhkore/cli/search_cmd.rb +108 -118
- data/lib/nhkore/cli/sift_cmd.rb +109 -120
- data/lib/nhkore/datetime_parser.rb +89 -103
- data/lib/nhkore/defn.rb +48 -55
- data/lib/nhkore/dict.rb +26 -38
- data/lib/nhkore/dict_scraper.rb +31 -40
- data/lib/nhkore/entry.rb +43 -55
- data/lib/nhkore/error.rb +16 -21
- data/lib/nhkore/fileable.rb +10 -21
- data/lib/nhkore/lib.rb +5 -17
- data/lib/nhkore/missingno.rb +21 -33
- data/lib/nhkore/news.rb +58 -72
- data/lib/nhkore/polisher.rb +22 -34
- data/lib/nhkore/scraper.rb +75 -82
- data/lib/nhkore/search_link.rb +63 -75
- data/lib/nhkore/search_scraper.rb +89 -93
- data/lib/nhkore/sifter.rb +157 -171
- data/lib/nhkore/splitter.rb +19 -31
- data/lib/nhkore/user_agents.rb +28 -32
- data/lib/nhkore/util.rb +72 -84
- data/lib/nhkore/variator.rb +20 -32
- data/lib/nhkore/version.rb +4 -16
- data/lib/nhkore/word.rb +105 -99
- data/nhkore.gemspec +54 -65
- data/samples/looper.rb +71 -0
- data/test/nhkore/test_helper.rb +3 -15
- data/test/nhkore_test.rb +6 -18
- metadata +50 -28
data/lib/nhkore/dict.rb
CHANGED
@@ -1,23 +1,11 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
1
|
# encoding: UTF-8
|
3
2
|
# frozen_string_literal: true
|
4
3
|
|
5
4
|
#--
|
6
5
|
# This file is part of NHKore.
|
7
|
-
# Copyright (c) 2020 Jonathan Bradley Whited
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# it under the terms of the GNU Lesser General Public License as published by
|
11
|
-
# the Free Software Foundation, either version 3 of the License, or
|
12
|
-
# (at your option) any later version.
|
13
|
-
#
|
14
|
-
# NHKore is distributed in the hope that it will be useful,
|
15
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
-
# GNU Lesser General Public License for more details.
|
18
|
-
#
|
19
|
-
# You should have received a copy of the GNU Lesser General Public License
|
20
|
-
# along with NHKore. If not, see <https://www.gnu.org/licenses/>.
|
6
|
+
# Copyright (c) 2020-2021 Jonathan Bradley Whited
|
7
|
+
#
|
8
|
+
# SPDX-License-Identifier: LGPL-3.0-or-later
|
21
9
|
#++
|
22
10
|
|
23
11
|
|
@@ -27,53 +15,53 @@ require 'nhkore/error'
|
|
27
15
|
|
28
16
|
module NHKore
|
29
17
|
###
|
30
|
-
# @author Jonathan Bradley Whited
|
18
|
+
# @author Jonathan Bradley Whited
|
31
19
|
# @since 0.2.0
|
32
20
|
###
|
33
21
|
class Dict
|
34
22
|
attr_reader :entries
|
35
|
-
|
36
|
-
def initialize
|
23
|
+
|
24
|
+
def initialize
|
37
25
|
super()
|
38
|
-
|
26
|
+
|
39
27
|
@entries = {}
|
40
28
|
end
|
41
|
-
|
29
|
+
|
42
30
|
def [](id)
|
43
31
|
return @entries[id]
|
44
32
|
end
|
45
|
-
|
33
|
+
|
46
34
|
def []=(id,entry)
|
47
|
-
|
35
|
+
@entries[id] = entry
|
48
36
|
end
|
49
|
-
|
37
|
+
|
50
38
|
def self.scrape(hash,missingno: nil,url: nil)
|
51
|
-
dict = Dict.new
|
52
|
-
|
53
|
-
hash.each
|
39
|
+
dict = Dict.new
|
40
|
+
|
41
|
+
hash.each do |id,array|
|
54
42
|
entry = Entry.scrape(id,array,missingno: missingno,url: url)
|
55
|
-
|
56
|
-
next if entry.nil?
|
43
|
+
|
44
|
+
next if entry.nil?
|
57
45
|
raise ScrapeError,"duplicate ID[#{id}] at URL[#{url}] in hash[#{hash}]" if dict.key?(id)
|
58
|
-
|
46
|
+
|
59
47
|
dict[id] = entry
|
60
48
|
end
|
61
|
-
|
49
|
+
|
62
50
|
return dict
|
63
51
|
end
|
64
|
-
|
52
|
+
|
65
53
|
def key?(id)
|
66
54
|
return @entries.key?(id)
|
67
55
|
end
|
68
|
-
|
69
|
-
def to_s
|
70
|
-
s = ''.dup
|
71
|
-
|
72
|
-
@entries.each
|
56
|
+
|
57
|
+
def to_s
|
58
|
+
s = ''.dup
|
59
|
+
|
60
|
+
@entries.each do |id,entry|
|
73
61
|
s << "#{id}:\n"
|
74
|
-
s << " #{entry.to_s
|
62
|
+
s << " #{entry.to_s.gsub("\n","\n ").rstrip}\n"
|
75
63
|
end
|
76
|
-
|
64
|
+
|
77
65
|
return s
|
78
66
|
end
|
79
67
|
end
|
data/lib/nhkore/dict_scraper.rb
CHANGED
@@ -1,23 +1,11 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
1
|
# encoding: UTF-8
|
3
2
|
# frozen_string_literal: true
|
4
3
|
|
5
4
|
#--
|
6
5
|
# This file is part of NHKore.
|
7
|
-
# Copyright (c) 2020 Jonathan Bradley Whited
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# it under the terms of the GNU Lesser General Public License as published by
|
11
|
-
# the Free Software Foundation, either version 3 of the License, or
|
12
|
-
# (at your option) any later version.
|
13
|
-
#
|
14
|
-
# NHKore is distributed in the hope that it will be useful,
|
15
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
-
# GNU Lesser General Public License for more details.
|
18
|
-
#
|
19
|
-
# You should have received a copy of the GNU Lesser General Public License
|
20
|
-
# along with NHKore. If not, see <https://www.gnu.org/licenses/>.
|
6
|
+
# Copyright (c) 2020-2021 Jonathan Bradley Whited
|
7
|
+
#
|
8
|
+
# SPDX-License-Identifier: LGPL-3.0-or-later
|
21
9
|
#++
|
22
10
|
|
23
11
|
|
@@ -29,48 +17,51 @@ require 'nhkore/util'
|
|
29
17
|
|
30
18
|
module NHKore
|
31
19
|
###
|
32
|
-
# @author Jonathan Bradley Whited
|
20
|
+
# @author Jonathan Bradley Whited
|
33
21
|
# @since 0.2.0
|
34
22
|
###
|
35
23
|
class DictScraper < Scraper
|
36
24
|
attr_accessor :missingno
|
37
|
-
|
25
|
+
|
38
26
|
def initialize(url,missingno: nil,parse_url: true,**kargs)
|
39
27
|
url = self.class.parse_url(url) if parse_url
|
40
|
-
|
28
|
+
|
41
29
|
super(url,**kargs)
|
42
|
-
|
30
|
+
|
43
31
|
@missingno = missingno
|
44
32
|
end
|
45
|
-
|
33
|
+
|
46
34
|
def self.parse_url(url,basename: nil)
|
47
|
-
url = Util.strip_web_str(url.to_s
|
48
|
-
|
49
|
-
raise ParseError,"cannot parse dictionary URL from URL[#{url}]" if url.empty?
|
50
|
-
|
35
|
+
url = Util.strip_web_str(url.to_s)
|
36
|
+
|
37
|
+
raise ParseError,"cannot parse dictionary URL from URL[#{url}]" if url.empty?
|
38
|
+
|
51
39
|
i = url.rindex(%r{[/\\]}) # Can be a URL or a file
|
52
|
-
i = i.nil?
|
53
|
-
|
54
|
-
basename = File.basename(url[i..-1],'.*') if basename.nil?
|
40
|
+
i = i.nil? ? 0 : (i + 1) # If no match found, no path
|
41
|
+
|
42
|
+
basename = File.basename(url[i..-1],'.*') if basename.nil?
|
55
43
|
path = url[0...i]
|
56
|
-
|
44
|
+
|
57
45
|
return "#{path}#{basename}.out.dic"
|
58
46
|
end
|
59
|
-
|
60
|
-
def scrape
|
47
|
+
|
48
|
+
def scrape
|
61
49
|
require 'json'
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
50
|
+
|
51
|
+
str = read # Make sure it has all been read.
|
52
|
+
str = str.string if str.respond_to?(:string) # For StringIO.
|
53
|
+
|
54
|
+
json = JSON.parse(str)
|
55
|
+
|
56
|
+
return Dict.new if json.nil?
|
57
|
+
|
67
58
|
hash = json['reikai']
|
68
|
-
|
69
|
-
return Dict.new
|
70
|
-
|
59
|
+
|
60
|
+
return Dict.new if hash.nil?
|
61
|
+
|
71
62
|
hash = hash['entries']
|
72
|
-
|
73
|
-
return Dict.new
|
63
|
+
|
64
|
+
return Dict.new if hash.nil?
|
74
65
|
return Dict.scrape(hash,missingno: @missingno,url: @url)
|
75
66
|
end
|
76
67
|
end
|
data/lib/nhkore/entry.rb
CHANGED
@@ -1,23 +1,11 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
1
|
# encoding: UTF-8
|
3
2
|
# frozen_string_literal: true
|
4
3
|
|
5
4
|
#--
|
6
5
|
# This file is part of NHKore.
|
7
|
-
# Copyright (c) 2020 Jonathan Bradley Whited
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# it under the terms of the GNU Lesser General Public License as published by
|
11
|
-
# the Free Software Foundation, either version 3 of the License, or
|
12
|
-
# (at your option) any later version.
|
13
|
-
#
|
14
|
-
# NHKore is distributed in the hope that it will be useful,
|
15
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
-
# GNU Lesser General Public License for more details.
|
18
|
-
#
|
19
|
-
# You should have received a copy of the GNU Lesser General Public License
|
20
|
-
# along with NHKore. If not, see <https://www.gnu.org/licenses/>.
|
6
|
+
# Copyright (c) 2020-2021 Jonathan Bradley Whited
|
7
|
+
#
|
8
|
+
# SPDX-License-Identifier: LGPL-3.0-or-later
|
21
9
|
#++
|
22
10
|
|
23
11
|
|
@@ -27,77 +15,77 @@ require 'nhkore/util'
|
|
27
15
|
|
28
16
|
module NHKore
|
29
17
|
###
|
30
|
-
# @author Jonathan Bradley Whited
|
18
|
+
# @author Jonathan Bradley Whited
|
31
19
|
# @since 0.2.0
|
32
20
|
###
|
33
21
|
class Entry
|
34
22
|
HYOUKI_SEP = '・'
|
35
|
-
|
23
|
+
|
36
24
|
attr_reader :defns
|
37
25
|
attr_accessor :id
|
38
|
-
|
39
|
-
def initialize
|
26
|
+
|
27
|
+
def initialize
|
40
28
|
super()
|
41
|
-
|
29
|
+
|
42
30
|
@defns = []
|
43
31
|
@id = nil
|
44
32
|
end
|
45
|
-
|
46
|
-
def build_defn
|
33
|
+
|
34
|
+
def build_defn
|
47
35
|
defns = []
|
48
36
|
i = 0
|
49
|
-
|
50
|
-
@defns.each
|
37
|
+
|
38
|
+
@defns.each do |defn|
|
51
39
|
defns << "#{i += 1})#{defn}" # Japanese parenthesis
|
52
40
|
end
|
53
|
-
|
41
|
+
|
54
42
|
return defns.join("\n")
|
55
43
|
end
|
56
|
-
|
57
|
-
def build_hyouki
|
44
|
+
|
45
|
+
def build_hyouki
|
58
46
|
# Since Ruby v1.9, Hash preserves order.
|
59
47
|
# Ruby v2.7 doc for Set still says no guarantee of order, so don't use.
|
60
48
|
hyoukis = {}
|
61
|
-
|
62
|
-
@defns.each
|
63
|
-
defn.hyoukis.each
|
49
|
+
|
50
|
+
@defns.each do |defn|
|
51
|
+
defn.hyoukis.each do |hyouki|
|
64
52
|
hyouki = hyouki.chomp(HYOUKI_SEP)
|
65
|
-
|
66
|
-
next if hyouki.empty?
|
67
|
-
|
53
|
+
|
54
|
+
next if hyouki.empty?
|
55
|
+
|
68
56
|
hyoukis[hyouki] = true
|
69
57
|
end
|
70
58
|
end
|
71
|
-
|
59
|
+
|
72
60
|
return hyoukis.keys.join(HYOUKI_SEP)
|
73
61
|
end
|
74
|
-
|
62
|
+
|
75
63
|
def self.scrape(id,array,missingno: nil,url: nil)
|
76
|
-
entry = Entry.new
|
77
|
-
|
78
|
-
entry.id = Util.unspace_web_str(id.to_s
|
79
|
-
|
80
|
-
return nil if entry.id.empty?
|
81
|
-
|
82
|
-
array.each
|
64
|
+
entry = Entry.new
|
65
|
+
|
66
|
+
entry.id = Util.unspace_web_str(id.to_s).downcase
|
67
|
+
|
68
|
+
return nil if entry.id.empty?
|
69
|
+
|
70
|
+
array.each do |hash|
|
83
71
|
defn = Defn.scrape(hash,missingno: missingno,url: url)
|
84
|
-
entry.defns << defn unless defn.nil?
|
72
|
+
entry.defns << defn unless defn.nil?
|
85
73
|
end
|
86
|
-
|
87
|
-
return nil if entry.defns.empty?
|
74
|
+
|
75
|
+
return nil if entry.defns.empty?
|
88
76
|
return entry
|
89
77
|
end
|
90
|
-
|
91
|
-
def to_s
|
92
|
-
s = ''.dup
|
93
|
-
|
94
|
-
return s if @defns.empty?
|
95
|
-
|
96
|
-
hyouki = build_hyouki
|
97
|
-
|
78
|
+
|
79
|
+
def to_s
|
80
|
+
s = ''.dup
|
81
|
+
|
82
|
+
return s if @defns.empty?
|
83
|
+
|
84
|
+
hyouki = build_hyouki
|
85
|
+
|
98
86
|
s << "#{hyouki}\n" unless Util.empty_web_str?(hyouki)
|
99
|
-
s << build_defn
|
100
|
-
|
87
|
+
s << build_defn
|
88
|
+
|
101
89
|
return s
|
102
90
|
end
|
103
91
|
end
|
data/lib/nhkore/error.rb
CHANGED
@@ -1,35 +1,30 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
1
|
# encoding: UTF-8
|
3
2
|
# frozen_string_literal: true
|
4
3
|
|
5
4
|
#--
|
6
5
|
# This file is part of NHKore.
|
7
|
-
# Copyright (c) 2020 Jonathan Bradley Whited
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# it under the terms of the GNU Lesser General Public License as published by
|
11
|
-
# the Free Software Foundation, either version 3 of the License, or
|
12
|
-
# (at your option) any later version.
|
13
|
-
#
|
14
|
-
# NHKore is distributed in the hope that it will be useful,
|
15
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
-
# GNU Lesser General Public License for more details.
|
18
|
-
#
|
19
|
-
# You should have received a copy of the GNU Lesser General Public License
|
20
|
-
# along with NHKore. If not, see <https://www.gnu.org/licenses/>.
|
6
|
+
# Copyright (c) 2020-2021 Jonathan Bradley Whited
|
7
|
+
#
|
8
|
+
# SPDX-License-Identifier: LGPL-3.0-or-later
|
21
9
|
#++
|
22
10
|
|
23
11
|
|
24
12
|
module NHKore
|
25
13
|
###
|
26
|
-
# @author Jonathan Bradley Whited
|
14
|
+
# @author Jonathan Bradley Whited
|
27
15
|
# @since 0.2.0
|
28
16
|
###
|
29
17
|
class Error < ::StandardError; end
|
30
|
-
|
31
|
-
|
32
|
-
class
|
33
|
-
|
34
|
-
|
18
|
+
|
19
|
+
# @since 0.2.0
|
20
|
+
class CLIError < Error; end
|
21
|
+
|
22
|
+
# @since 0.2.0
|
23
|
+
class ParseError < Error; end
|
24
|
+
|
25
|
+
# @since 0.2.0
|
26
|
+
class ScrapeError < Error; end
|
27
|
+
|
28
|
+
# @since 0.2.0
|
29
|
+
class ZipError < Error; end
|
35
30
|
end
|
data/lib/nhkore/fileable.rb
CHANGED
@@ -1,46 +1,35 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
1
|
# encoding: UTF-8
|
3
2
|
# frozen_string_literal: true
|
4
3
|
|
5
4
|
#--
|
6
5
|
# This file is part of NHKore.
|
7
|
-
# Copyright (c) 2020 Jonathan Bradley Whited
|
8
|
-
#
|
9
|
-
#
|
10
|
-
# it under the terms of the GNU Lesser General Public License as published by
|
11
|
-
# the Free Software Foundation, either version 3 of the License, or
|
12
|
-
# (at your option) any later version.
|
13
|
-
#
|
14
|
-
# NHKore is distributed in the hope that it will be useful,
|
15
|
-
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
16
|
-
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
17
|
-
# GNU Lesser General Public License for more details.
|
18
|
-
#
|
19
|
-
# You should have received a copy of the GNU Lesser General Public License
|
20
|
-
# along with NHKore. If not, see <https://www.gnu.org/licenses/>.
|
6
|
+
# Copyright (c) 2020-2021 Jonathan Bradley Whited
|
7
|
+
#
|
8
|
+
# SPDX-License-Identifier: LGPL-3.0-or-later
|
21
9
|
#++
|
22
10
|
|
23
11
|
|
24
12
|
module NHKore
|
25
13
|
###
|
26
|
-
# @author Jonathan Bradley Whited
|
14
|
+
# @author Jonathan Bradley Whited
|
27
15
|
# @since 0.2.0
|
28
16
|
###
|
29
17
|
module Fileable
|
30
18
|
def self.included(mod)
|
31
19
|
mod.extend ClassMethods
|
32
20
|
end
|
33
|
-
|
21
|
+
|
34
22
|
def save_file(file,mode: 'wt',**kargs)
|
35
|
-
File.open(file,mode: mode,**kargs) do |
|
36
|
-
|
23
|
+
File.open(file,mode: mode,**kargs) do |f|
|
24
|
+
f.write(to_s)
|
37
25
|
end
|
38
26
|
end
|
39
|
-
|
27
|
+
|
28
|
+
# Auto-extended when Fileable is included.
|
40
29
|
module ClassMethods
|
41
30
|
def load_file(file,mode: 'rt:BOM|UTF-8',**kargs)
|
42
31
|
data = File.read(file,mode: mode,**kargs)
|
43
|
-
|
32
|
+
|
44
33
|
return load_data(data,file: file,**kargs)
|
45
34
|
end
|
46
35
|
end
|