sekitori-search 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/sekitori/generators/juryo_list_generator.rb +12 -10
- data/lib/sekitori/generators/komusubi_list_generator.rb +16 -14
- data/lib/sekitori/generators/list_generator.rb +58 -60
- data/lib/sekitori/generators/makushita_list_generator.rb +10 -8
- data/lib/sekitori/generators/makuuchi_list_generator.rb +14 -12
- data/lib/sekitori/generators/oozeki_list_generator.rb +11 -9
- data/lib/sekitori/generators/sekiwake_list_generator.rb +15 -13
- data/lib/sekitori/generators/yokozuna_list_generator.rb +8 -6
- data/lib/sekitori/search.rb +1 -1
- data/lib/sekitori/search/operator.rb +13 -15
- data/lib/sekitori/search/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1e964322f72e242a5d04261ae3a74a106d858c1f
|
4
|
+
data.tar.gz: d692531cf43b0fd607e8bf6318b21b78f2db9944
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 59abe1fd3490ddec4194590092452af5e2be27ad267bce31c9ed4f7cb43172e08b415630fcc0975da6e13ee12c2bc1959ce4c61afffb7ce78234b53a384b5437
|
7
|
+
data.tar.gz: be8d609f4b4b6c3a739ad9702ba87458fd9a7b54297e9482949ef758f5be842323f06aed86cb377d80acca4eeb30a7ceb682fe4c551fe6274b890a70c9a452f0
|
@@ -1,16 +1,18 @@
|
|
1
1
|
require 'sekitori/generators/list_generator.rb'
|
2
2
|
|
3
|
-
module Sekitori
|
4
|
-
|
5
|
-
|
6
|
-
|
3
|
+
module Sekitori
|
4
|
+
module Generators
|
5
|
+
class JuryoListGenerator < ListGenerator
|
6
|
+
BANZUKE = '前頭(十両)'.freeze
|
7
|
+
URL = 'https://ja.wikipedia.org/wiki/大相撲力士一覧'.freeze
|
7
8
|
|
8
|
-
|
9
|
-
|
10
|
-
|
9
|
+
TARGET_XPATH_LIST = [
|
10
|
+
'//h2[contains(span/text(), "十両(十枚目)")]/following-sibling::div/ul/li/a'
|
11
|
+
].freeze
|
11
12
|
|
12
|
-
|
13
|
-
|
14
|
-
|
13
|
+
IGNORE_XPATH_LIST = [
|
14
|
+
'//h2[contains(span/text(), "幕下以下")]/following-sibling::div/ul/li/a'
|
15
|
+
].freeze
|
16
|
+
end
|
15
17
|
end
|
16
18
|
end
|
@@ -1,17 +1,19 @@
|
|
1
1
|
require_relative './list_generator.rb'
|
2
2
|
|
3
|
-
module Sekitori
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
3
|
+
module Sekitori
|
4
|
+
module Generators
|
5
|
+
class KomusubiListGenerator < ListGenerator
|
6
|
+
BANZUKE = '小結'.freeze
|
7
|
+
URL = 'https://ja.wikipedia.org/wiki/小結一覧'.freeze
|
8
|
+
|
9
|
+
TARGET_XPATH_LIST = [
|
10
|
+
'//h2[contains(span/text(), "現役力士")]/following-sibling::ul/li//a',
|
11
|
+
'//h2[contains(span/text(), "引退・廃業した力士")]/following-sibling::div/ul/li/a'
|
12
|
+
].freeze
|
13
|
+
|
14
|
+
IGNORE_XPATH_LIST = [
|
15
|
+
'//h2[contains(span/text(), "関連項目")]/following-sibling::ul/li//a'
|
16
|
+
].freeze
|
17
|
+
end
|
16
18
|
end
|
17
|
-
end
|
19
|
+
end
|
@@ -5,67 +5,65 @@ require 'tmpdir'
|
|
5
5
|
require 'nokogiri'
|
6
6
|
require 'active_support/all'
|
7
7
|
|
8
|
-
module Sekitori
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
File.read path
|
52
|
-
|
53
|
-
|
8
|
+
module Sekitori
|
9
|
+
module Generators
|
10
|
+
class ListGenerator
|
11
|
+
CACHE_DIR = File.join(Dir.tmpdir, 'sekitori_checker')
|
12
|
+
|
13
|
+
TARGET_XPATH_LIST = [].freeze
|
14
|
+
IGNORE_XPATH_LIST = [].freeze
|
15
|
+
|
16
|
+
def execute
|
17
|
+
return @list if @list.present?
|
18
|
+
url = self.class::URL
|
19
|
+
target_xpaths = self.class::TARGET_XPATH_LIST
|
20
|
+
ignore_xpath = self.class::IGNORE_XPATH_LIST
|
21
|
+
|
22
|
+
html = ListGenerator.get url
|
23
|
+
list = ListGenerator.parse html, url, target_xpaths
|
24
|
+
ignore_list = ListGenerator.parse html, url, ignore_xpath
|
25
|
+
@list = list - ignore_list
|
26
|
+
@list
|
27
|
+
end
|
28
|
+
|
29
|
+
def list
|
30
|
+
execute
|
31
|
+
end
|
32
|
+
|
33
|
+
def to_h
|
34
|
+
{ self.class::BANZUKE => execute }
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.get(url)
|
38
|
+
file_name = url.split('/').last
|
39
|
+
path = File.join(CACHE_DIR, file_name)
|
40
|
+
cache = get_from_file(path)
|
41
|
+
return cache if cache
|
42
|
+
|
43
|
+
puts "Creating cache for #{file_name}"
|
44
|
+
html = get_from_http(url)
|
45
|
+
FileUtils.mkdir_p CACHE_DIR
|
46
|
+
open(path, 'w') { |file| file.write html }
|
47
|
+
html
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.get_from_file(path)
|
51
|
+
File.read path if File.exist? path
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.get_from_http(url)
|
55
|
+
encode_url = URI.escape(url)
|
56
|
+
html = open(encode_url).read
|
57
|
+
end
|
58
|
+
|
59
|
+
def self.parse(html, url, xpath_list)
|
60
|
+
document = Nokogiri::HTML(html, url)
|
61
|
+
xpath_list.flat_map { |xpath| document.xpath(xpath).map(&:text) }
|
62
|
+
end
|
63
|
+
|
64
|
+
def self.clean(path = CACHE_DIR)
|
65
|
+
FileUtils.rm_rf(path, secure: true)
|
54
66
|
end
|
55
|
-
end
|
56
|
-
|
57
|
-
def self.get_from_http url
|
58
|
-
encode_url = URI.escape(url)
|
59
|
-
html = open(encode_url).read
|
60
|
-
end
|
61
|
-
|
62
|
-
def self.parse html, url, xpath_list
|
63
|
-
document = Nokogiri::HTML(html, url)
|
64
|
-
xpath_list.flat_map{|xpath| document.xpath(xpath).map(&:text)}
|
65
|
-
end
|
66
|
-
|
67
|
-
def self.clean path=CACHE_DIR
|
68
|
-
FileUtils.rm_rf(path, secure: true)
|
69
67
|
end
|
70
68
|
end
|
71
69
|
end
|
@@ -1,12 +1,14 @@
|
|
1
1
|
require 'sekitori/generators/list_generator.rb'
|
2
2
|
|
3
|
-
module Sekitori
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
3
|
+
module Sekitori
|
4
|
+
module Generators
|
5
|
+
class MakushitaListGenerator < ListGenerator
|
6
|
+
BANZUKE = '前頭(幕下)'.freeze
|
7
|
+
URL = 'https://ja.wikipedia.org/wiki/大相撲力士一覧'.freeze
|
8
|
+
|
9
|
+
TARGET_XPATH_LIST = [
|
10
|
+
'//h2[contains(span/text(), "幕下以下")]/following-sibling::div/ul/li/a'
|
11
|
+
].freeze
|
12
|
+
end
|
11
13
|
end
|
12
14
|
end
|
@@ -1,16 +1,18 @@
|
|
1
1
|
require 'sekitori/generators/list_generator.rb'
|
2
2
|
|
3
|
-
module Sekitori
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
3
|
+
module Sekitori
|
4
|
+
module Generators
|
5
|
+
class MakuuchiListGenerator < ListGenerator
|
6
|
+
BANZUKE = '前頭(幕内)'.freeze
|
7
|
+
URL = 'https://ja.wikipedia.org/wiki/大相撲力士一覧'.freeze
|
8
|
+
|
9
|
+
TARGET_XPATH_LIST = [
|
10
|
+
'//h2[contains(span/text(), "幕内")]/following-sibling::div/ul/li/a'
|
11
|
+
].freeze
|
12
|
+
|
13
|
+
IGNORE_XPATH_LIST = [
|
14
|
+
'//h2[contains(span/text(), "十両(十枚目)")]/following-sibling::div/ul/li/a'
|
15
|
+
].freeze
|
16
|
+
end
|
15
17
|
end
|
16
18
|
end
|
@@ -1,13 +1,15 @@
|
|
1
1
|
require 'sekitori/generators/list_generator.rb'
|
2
2
|
|
3
|
-
module Sekitori
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
3
|
+
module Sekitori
|
4
|
+
module Generators
|
5
|
+
class OozekiListGenerator < ListGenerator
|
6
|
+
BANZUKE = '大関'.freeze
|
7
|
+
URL = 'https://ja.wikipedia.org/wiki/大関一覧'.freeze
|
8
|
+
|
9
|
+
TARGET_XPATH_LIST = [
|
10
|
+
'//h2[contains(span/text(), "現役力士")]/following-sibling::ul[1]/li//a',
|
11
|
+
'//h2[contains(span/text(), "引退した力士")]/following-sibling::div/ul/li/a'
|
12
|
+
].freeze
|
13
|
+
end
|
12
14
|
end
|
13
15
|
end
|
@@ -1,17 +1,19 @@
|
|
1
1
|
require 'sekitori/generators/list_generator.rb'
|
2
2
|
|
3
|
-
module Sekitori
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
3
|
+
module Sekitori
|
4
|
+
module Generators
|
5
|
+
class SekiwakeListGenerator < ListGenerator
|
6
|
+
BANZUKE = '関脇'.freeze
|
7
|
+
URL = 'https://ja.wikipedia.org/wiki/関脇一覧'.freeze
|
8
|
+
|
9
|
+
TARGET_XPATH_LIST = [
|
10
|
+
'//h2[contains(span/text(), "現役力士")]/following-sibling::ul/li//a',
|
11
|
+
'//h2[contains(span/text(), "引退・廃業した力士")]/following-sibling::div/ul/li/a'
|
12
|
+
].freeze
|
13
|
+
|
14
|
+
IGNORE_XPATH_LIST = [
|
15
|
+
'//h2[contains(span/text(), "関連項目")]/following-sibling::ul/li//a'
|
16
|
+
].freeze
|
17
|
+
end
|
16
18
|
end
|
17
19
|
end
|
@@ -1,10 +1,12 @@
|
|
1
1
|
require 'sekitori/generators/list_generator.rb'
|
2
2
|
|
3
|
-
module Sekitori
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
3
|
+
module Sekitori
|
4
|
+
module Generators
|
5
|
+
class YokozunaListGenerator < ListGenerator
|
6
|
+
BANZUKE = '横綱'.freeze
|
7
|
+
URL = 'https://ja.wikipedia.org/wiki/横綱一覧'.freeze
|
8
|
+
|
9
|
+
TARGET_XPATH_LIST = ['//h2[contains(span/text(), "歴代横綱")]/following-sibling::table[1]//td[1]//a'].freeze
|
10
|
+
end
|
9
11
|
end
|
10
12
|
end
|
data/lib/sekitori/search.rb
CHANGED
@@ -2,7 +2,6 @@ require 'sekitori/generators/all'
|
|
2
2
|
|
3
3
|
module Sekitori::Search
|
4
4
|
class Operator
|
5
|
-
|
6
5
|
def initialize
|
7
6
|
@yokozuna_generator = Sekitori::Generators::YokozunaListGenerator.new
|
8
7
|
@oozeki_generator = Sekitori::Generators::OozekiListGenerator.new
|
@@ -19,33 +18,32 @@ module Sekitori::Search
|
|
19
18
|
@komusubi_generator,
|
20
19
|
@makuuchi_generator,
|
21
20
|
@juryo_generator,
|
22
|
-
@makuuchi_generator
|
21
|
+
@makuuchi_generator
|
23
22
|
]
|
24
23
|
|
25
24
|
@rikishi_generators = @sekitori_generators + [@makushita_generator]
|
26
25
|
end
|
27
26
|
|
28
|
-
def sekitori_search_from
|
27
|
+
def sekitori_search_from(word)
|
29
28
|
search_from word, sekitori_hash
|
30
29
|
end
|
31
30
|
|
32
|
-
def rikishi_search_from
|
31
|
+
def rikishi_search_from(word)
|
33
32
|
search_from word, rikishi_hash
|
34
33
|
end
|
35
34
|
|
36
|
-
def sekitori_detail_search_from
|
35
|
+
def sekitori_detail_search_from(name)
|
37
36
|
detail_search_from name, sekitori_list
|
38
37
|
end
|
39
38
|
|
40
|
-
def rikishi_detail_search_from
|
39
|
+
def rikishi_detail_search_from(name)
|
41
40
|
detail_search_from name, rikishi_list
|
42
41
|
end
|
43
42
|
|
44
|
-
|
45
43
|
private
|
46
44
|
|
47
45
|
def sekitori_hash
|
48
|
-
@sekitori_generators.reduce({}) {|work, item| work.merge item.to_h }
|
46
|
+
@sekitori_generators.reduce({}) { |work, item| work.merge item.to_h }
|
49
47
|
end
|
50
48
|
|
51
49
|
def sekitori_list
|
@@ -53,26 +51,26 @@ module Sekitori::Search
|
|
53
51
|
end
|
54
52
|
|
55
53
|
def rikishi_hash
|
56
|
-
@rikishi_generators.reduce({}) {|work, item| work.merge item.to_h }
|
54
|
+
@rikishi_generators.reduce({}) { |work, item| work.merge item.to_h }
|
57
55
|
end
|
58
56
|
|
59
57
|
def rikishi_list
|
60
58
|
@rikishi_generators.map(&:list).flatten
|
61
59
|
end
|
62
60
|
|
63
|
-
def search_from
|
61
|
+
def search_from(word, hash)
|
64
62
|
sekitori_result = hash.map do |key, list|
|
65
|
-
filterd_list = list.select {|name| name.include? word }
|
63
|
+
filterd_list = list.select { |name| name.include? word }
|
66
64
|
[key, filterd_list]
|
67
65
|
end.to_h
|
68
66
|
end
|
69
67
|
|
70
|
-
def detail_search_from
|
71
|
-
result = list.find{|rikishi| rikishi == name }
|
68
|
+
def detail_search_from(name, list)
|
69
|
+
result = list.find { |rikishi| rikishi == name }
|
72
70
|
if result
|
73
|
-
"https://ja.wikipedia.org/wiki/#{result.
|
71
|
+
"https://ja.wikipedia.org/wiki/#{result.tr(' ', '_')}"
|
74
72
|
else
|
75
|
-
puts
|
73
|
+
puts '見つかりませんでした'
|
76
74
|
end
|
77
75
|
end
|
78
76
|
end
|