dmm-crawler 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Gemfile.lock +1 -1
- data/lib/dmm-crawler.rb +2 -0
- data/lib/dmm-crawler/attributes.rb +89 -0
- data/lib/dmm-crawler/ranking.rb +3 -62
- data/lib/dmm-crawler/version.rb +1 -1
- data/spec/dmm-crawler/ranking_spec.rb +1 -1
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '06925a3fd6d470aaaf623d6b2da213c5b8ff631f'
|
4
|
+
data.tar.gz: 40dfc7eb3824eade889b6218d8c2042c4c62ae02
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 32e43bd62c3af54c67b60eda38d7455f2209a9d3b2351b7893b94ec9329964c0a38c1ac9975fb3d6b720d2cd5d8be3c0c00a3d927c6021b1c2a9849f64bc3520
|
7
|
+
data.tar.gz: 42b519cd6268171d10634953c9392b5fdd1b220067dd7ca36c2e8ca1b2496c8d3b147026eb965417c52d7bde2a656caf0d39679d5c3a0fe18b79653f82008767
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
data/lib/dmm-crawler.rb
CHANGED
@@ -0,0 +1,89 @@
|
|
1
|
+
module DMMCrawler
|
2
|
+
class Attributes
|
3
|
+
def initialize(element, submedia = nil)
|
4
|
+
@agent = Agent.instance.agent
|
5
|
+
@element = element
|
6
|
+
|
7
|
+
@submedia = submedia
|
8
|
+
end
|
9
|
+
|
10
|
+
def to_a
|
11
|
+
[
|
12
|
+
title,
|
13
|
+
title_link,
|
14
|
+
image_url,
|
15
|
+
description,
|
16
|
+
description_raw,
|
17
|
+
submedia,
|
18
|
+
tags
|
19
|
+
]
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def title
|
25
|
+
if art_page?
|
26
|
+
@element.search('.productTitle__txt span').remove
|
27
|
+
@element.search('.productTitle__txt').text.strip
|
28
|
+
else
|
29
|
+
@element.search('.rank-name').first.text.strip
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def image_url
|
34
|
+
if art_page?
|
35
|
+
@element.search('.productPreview__item img').last.attributes['data-src'].value
|
36
|
+
else
|
37
|
+
@element.search('img').last.attributes['src'].value
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def title_link
|
42
|
+
if art_page?
|
43
|
+
@element.uri.to_s
|
44
|
+
else
|
45
|
+
File.join(BASE_URL, @element.search('.rank-name').first.search('a').first.attributes.first[1].value)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def description
|
50
|
+
if art_page?
|
51
|
+
@element.search('.summary .summary__txt').text
|
52
|
+
else
|
53
|
+
@element.search('.rank-desc').text
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def description_raw
|
58
|
+
if art_page?
|
59
|
+
@element.search('.summary .summary__txt').to_s.gsub(/\sclass=".*"/, '').tr('"', "'")
|
60
|
+
else
|
61
|
+
@element.search('.rank-desc').to_s.tr('"', "'")
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def submedia
|
66
|
+
return @submedia if @submedia
|
67
|
+
|
68
|
+
@element
|
69
|
+
.search('.productAttribute-listItem .c_icon_productGenre')
|
70
|
+
.first
|
71
|
+
.attributes['class']
|
72
|
+
.value
|
73
|
+
.gsub('c_icon_productGenre ', '')
|
74
|
+
.delete('-')
|
75
|
+
end
|
76
|
+
|
77
|
+
def tags
|
78
|
+
if art_page?
|
79
|
+
@element.search('.genreTagList .genreTagList__item a').map { |e| e.text.strip }
|
80
|
+
else
|
81
|
+
@element.search('.rank-labelListItem').map { |e| e.search('a').text.strip }
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def art_page?
|
86
|
+
@element.search('.rank-name').empty?
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
data/lib/dmm-crawler/ranking.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
module DMMCrawler
|
2
2
|
class Ranking
|
3
|
-
BASE_URL = 'http://www.dmm.co.jp'.freeze
|
4
|
-
|
5
3
|
def initialize(arguments)
|
6
4
|
@term = discriminate_term(arguments[:term])
|
7
5
|
@submedia = discriminate_submedia(arguments[:submedia])
|
@@ -11,79 +9,22 @@ module DMMCrawler
|
|
11
9
|
|
12
10
|
def arts
|
13
11
|
arts = page.search('.rank-rankListItem.fn-setPurchaseChange').map do |element|
|
14
|
-
|
12
|
+
Attributes.new(element, @submedia).to_a
|
15
13
|
end
|
16
14
|
|
17
|
-
arts.map.with_index(1) do |(title, title_link, image_url, description, description_raw, tags), rank|
|
15
|
+
arts.map.with_index(1) do |(title, title_link, image_url, description, description_raw, submedia, tags), rank|
|
18
16
|
{
|
19
17
|
title: "#{rank}位: #{title}",
|
20
18
|
title_link: title_link,
|
21
19
|
image_url: image_url,
|
22
20
|
description: description,
|
23
21
|
description_raw: description_raw,
|
22
|
+
submedia: submedia,
|
24
23
|
tags: tags
|
25
24
|
}
|
26
25
|
end
|
27
26
|
end
|
28
27
|
|
29
|
-
class Attribute
|
30
|
-
def initialize(element)
|
31
|
-
@element = element
|
32
|
-
@agent = Agent.instance.agent
|
33
|
-
end
|
34
|
-
|
35
|
-
def to_a
|
36
|
-
[
|
37
|
-
title,
|
38
|
-
title_link,
|
39
|
-
image_url,
|
40
|
-
description,
|
41
|
-
description_raw,
|
42
|
-
tags
|
43
|
-
]
|
44
|
-
end
|
45
|
-
|
46
|
-
private
|
47
|
-
|
48
|
-
def title
|
49
|
-
@element.search('.rank-name').first.text.strip
|
50
|
-
end
|
51
|
-
|
52
|
-
def image_url
|
53
|
-
@element.search('img').last.attributes['src'].value
|
54
|
-
end
|
55
|
-
|
56
|
-
def title_link
|
57
|
-
File.join(BASE_URL, @element.search('.rank-name').first.search('a').first.attributes.first[1].value)
|
58
|
-
end
|
59
|
-
|
60
|
-
def description
|
61
|
-
@element.search('.rank-desc').text.nil? ? fetch_description : @element.search('.rank-desc').text
|
62
|
-
end
|
63
|
-
|
64
|
-
def description_raw
|
65
|
-
@element.search('.rank-desc').text.nil? ? fetch_description_raw : @element.search('.rank-desc').to_s.tr('"', "'")
|
66
|
-
end
|
67
|
-
|
68
|
-
def fetch_description
|
69
|
-
url = File.join(BASE_URL, @element.search('.rank-name a').first.attributes['href'].value)
|
70
|
-
page = @agent.get(url)
|
71
|
-
page.search('.summary .summary__txt').text
|
72
|
-
end
|
73
|
-
|
74
|
-
def fetch_description_raw
|
75
|
-
url = File.join(BASE_URL, @element.search('.rank-name a').first.attributes['href'].value)
|
76
|
-
page = @agent.get(url)
|
77
|
-
page.search('.summary .summary__txt').to_s.gsub(/\sclass=".*"/, '').tr('"', "'")
|
78
|
-
end
|
79
|
-
|
80
|
-
def tags
|
81
|
-
@element.search('.rank-labelListItem').map { |e| e.search('a').text.strip }
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
private_constant :Attribute
|
86
|
-
|
87
28
|
private
|
88
29
|
|
89
30
|
def page
|
data/lib/dmm-crawler/version.rb
CHANGED
@@ -21,7 +21,7 @@ describe DMMCrawler::Ranking do
|
|
21
21
|
let(:term) { '24' }
|
22
22
|
|
23
23
|
it { is_expected.not_to be_empty }
|
24
|
-
it { is_expected.to all(include(:title, :title_link, :image_url, :description, :description_raw, :tags)) }
|
24
|
+
it { is_expected.to all(include(:title, :title_link, :image_url, :description, :description_raw, :submedia, :tags)) }
|
25
25
|
end
|
26
26
|
|
27
27
|
context 'with not registered argument' do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dmm-crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Ohmori
|
@@ -115,6 +115,7 @@ files:
|
|
115
115
|
- doc/ja/README.md
|
116
116
|
- lib/dmm-crawler.rb
|
117
117
|
- lib/dmm-crawler/agent.rb
|
118
|
+
- lib/dmm-crawler/attributes.rb
|
118
119
|
- lib/dmm-crawler/ranking.rb
|
119
120
|
- lib/dmm-crawler/version.rb
|
120
121
|
- spec/dmm-crawler/ranking_spec.rb
|