dmm-crawler 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +5 -0
- data/Gemfile.lock +1 -1
- data/lib/dmm-crawler.rb +2 -0
- data/lib/dmm-crawler/attributes.rb +89 -0
- data/lib/dmm-crawler/ranking.rb +3 -62
- data/lib/dmm-crawler/version.rb +1 -1
- data/spec/dmm-crawler/ranking_spec.rb +1 -1
- metadata +2 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '06925a3fd6d470aaaf623d6b2da213c5b8ff631f'
|
4
|
+
data.tar.gz: 40dfc7eb3824eade889b6218d8c2042c4c62ae02
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 32e43bd62c3af54c67b60eda38d7455f2209a9d3b2351b7893b94ec9329964c0a38c1ac9975fb3d6b720d2cd5d8be3c0c00a3d927c6021b1c2a9849f64bc3520
|
7
|
+
data.tar.gz: 42b519cd6268171d10634953c9392b5fdd1b220067dd7ca36c2e8ca1b2496c8d3b147026eb965417c52d7bde2a656caf0d39679d5c3a0fe18b79653f82008767
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
data/lib/dmm-crawler.rb
CHANGED
@@ -0,0 +1,89 @@
|
|
1
|
+
module DMMCrawler
|
2
|
+
class Attributes
|
3
|
+
def initialize(element, submedia = nil)
|
4
|
+
@agent = Agent.instance.agent
|
5
|
+
@element = element
|
6
|
+
|
7
|
+
@submedia = submedia
|
8
|
+
end
|
9
|
+
|
10
|
+
def to_a
|
11
|
+
[
|
12
|
+
title,
|
13
|
+
title_link,
|
14
|
+
image_url,
|
15
|
+
description,
|
16
|
+
description_raw,
|
17
|
+
submedia,
|
18
|
+
tags
|
19
|
+
]
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
|
24
|
+
def title
|
25
|
+
if art_page?
|
26
|
+
@element.search('.productTitle__txt span').remove
|
27
|
+
@element.search('.productTitle__txt').text.strip
|
28
|
+
else
|
29
|
+
@element.search('.rank-name').first.text.strip
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
def image_url
|
34
|
+
if art_page?
|
35
|
+
@element.search('.productPreview__item img').last.attributes['data-src'].value
|
36
|
+
else
|
37
|
+
@element.search('img').last.attributes['src'].value
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
def title_link
|
42
|
+
if art_page?
|
43
|
+
@element.uri.to_s
|
44
|
+
else
|
45
|
+
File.join(BASE_URL, @element.search('.rank-name').first.search('a').first.attributes.first[1].value)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def description
|
50
|
+
if art_page?
|
51
|
+
@element.search('.summary .summary__txt').text
|
52
|
+
else
|
53
|
+
@element.search('.rank-desc').text
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
def description_raw
|
58
|
+
if art_page?
|
59
|
+
@element.search('.summary .summary__txt').to_s.gsub(/\sclass=".*"/, '').tr('"', "'")
|
60
|
+
else
|
61
|
+
@element.search('.rank-desc').to_s.tr('"', "'")
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
def submedia
|
66
|
+
return @submedia if @submedia
|
67
|
+
|
68
|
+
@element
|
69
|
+
.search('.productAttribute-listItem .c_icon_productGenre')
|
70
|
+
.first
|
71
|
+
.attributes['class']
|
72
|
+
.value
|
73
|
+
.gsub('c_icon_productGenre ', '')
|
74
|
+
.delete('-')
|
75
|
+
end
|
76
|
+
|
77
|
+
def tags
|
78
|
+
if art_page?
|
79
|
+
@element.search('.genreTagList .genreTagList__item a').map { |e| e.text.strip }
|
80
|
+
else
|
81
|
+
@element.search('.rank-labelListItem').map { |e| e.search('a').text.strip }
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def art_page?
|
86
|
+
@element.search('.rank-name').empty?
|
87
|
+
end
|
88
|
+
end
|
89
|
+
end
|
data/lib/dmm-crawler/ranking.rb
CHANGED
@@ -1,7 +1,5 @@
|
|
1
1
|
module DMMCrawler
|
2
2
|
class Ranking
|
3
|
-
BASE_URL = 'http://www.dmm.co.jp'.freeze
|
4
|
-
|
5
3
|
def initialize(arguments)
|
6
4
|
@term = discriminate_term(arguments[:term])
|
7
5
|
@submedia = discriminate_submedia(arguments[:submedia])
|
@@ -11,79 +9,22 @@ module DMMCrawler
|
|
11
9
|
|
12
10
|
def arts
|
13
11
|
arts = page.search('.rank-rankListItem.fn-setPurchaseChange').map do |element|
|
14
|
-
|
12
|
+
Attributes.new(element, @submedia).to_a
|
15
13
|
end
|
16
14
|
|
17
|
-
arts.map.with_index(1) do |(title, title_link, image_url, description, description_raw, tags), rank|
|
15
|
+
arts.map.with_index(1) do |(title, title_link, image_url, description, description_raw, submedia, tags), rank|
|
18
16
|
{
|
19
17
|
title: "#{rank}位: #{title}",
|
20
18
|
title_link: title_link,
|
21
19
|
image_url: image_url,
|
22
20
|
description: description,
|
23
21
|
description_raw: description_raw,
|
22
|
+
submedia: submedia,
|
24
23
|
tags: tags
|
25
24
|
}
|
26
25
|
end
|
27
26
|
end
|
28
27
|
|
29
|
-
class Attribute
|
30
|
-
def initialize(element)
|
31
|
-
@element = element
|
32
|
-
@agent = Agent.instance.agent
|
33
|
-
end
|
34
|
-
|
35
|
-
def to_a
|
36
|
-
[
|
37
|
-
title,
|
38
|
-
title_link,
|
39
|
-
image_url,
|
40
|
-
description,
|
41
|
-
description_raw,
|
42
|
-
tags
|
43
|
-
]
|
44
|
-
end
|
45
|
-
|
46
|
-
private
|
47
|
-
|
48
|
-
def title
|
49
|
-
@element.search('.rank-name').first.text.strip
|
50
|
-
end
|
51
|
-
|
52
|
-
def image_url
|
53
|
-
@element.search('img').last.attributes['src'].value
|
54
|
-
end
|
55
|
-
|
56
|
-
def title_link
|
57
|
-
File.join(BASE_URL, @element.search('.rank-name').first.search('a').first.attributes.first[1].value)
|
58
|
-
end
|
59
|
-
|
60
|
-
def description
|
61
|
-
@element.search('.rank-desc').text.nil? ? fetch_description : @element.search('.rank-desc').text
|
62
|
-
end
|
63
|
-
|
64
|
-
def description_raw
|
65
|
-
@element.search('.rank-desc').text.nil? ? fetch_description_raw : @element.search('.rank-desc').to_s.tr('"', "'")
|
66
|
-
end
|
67
|
-
|
68
|
-
def fetch_description
|
69
|
-
url = File.join(BASE_URL, @element.search('.rank-name a').first.attributes['href'].value)
|
70
|
-
page = @agent.get(url)
|
71
|
-
page.search('.summary .summary__txt').text
|
72
|
-
end
|
73
|
-
|
74
|
-
def fetch_description_raw
|
75
|
-
url = File.join(BASE_URL, @element.search('.rank-name a').first.attributes['href'].value)
|
76
|
-
page = @agent.get(url)
|
77
|
-
page.search('.summary .summary__txt').to_s.gsub(/\sclass=".*"/, '').tr('"', "'")
|
78
|
-
end
|
79
|
-
|
80
|
-
def tags
|
81
|
-
@element.search('.rank-labelListItem').map { |e| e.search('a').text.strip }
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
private_constant :Attribute
|
86
|
-
|
87
28
|
private
|
88
29
|
|
89
30
|
def page
|
data/lib/dmm-crawler/version.rb
CHANGED
@@ -21,7 +21,7 @@ describe DMMCrawler::Ranking do
|
|
21
21
|
let(:term) { '24' }
|
22
22
|
|
23
23
|
it { is_expected.not_to be_empty }
|
24
|
-
it { is_expected.to all(include(:title, :title_link, :image_url, :description, :description_raw, :tags)) }
|
24
|
+
it { is_expected.to all(include(:title, :title_link, :image_url, :description, :description_raw, :submedia, :tags)) }
|
25
25
|
end
|
26
26
|
|
27
27
|
context 'with not registered argument' do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dmm-crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Ohmori
|
@@ -115,6 +115,7 @@ files:
|
|
115
115
|
- doc/ja/README.md
|
116
116
|
- lib/dmm-crawler.rb
|
117
117
|
- lib/dmm-crawler/agent.rb
|
118
|
+
- lib/dmm-crawler/attributes.rb
|
118
119
|
- lib/dmm-crawler/ranking.rb
|
119
120
|
- lib/dmm-crawler/version.rb
|
120
121
|
- spec/dmm-crawler/ranking_spec.rb
|