dmm-crawler 0.3.5 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/CHANGELOG.md +4 -0
- data/README.md +6 -0
- data/doc/ja/README.md +5 -0
- data/lib/dmm-crawler.rb +6 -2
- data/lib/dmm-crawler/attributes/adult_game_attributes.rb +56 -0
- data/lib/dmm-crawler/attributes/base_attributes.rb +58 -0
- data/lib/dmm-crawler/attributes/dojin_attributes.rb +60 -0
- data/lib/dmm-crawler/client.rb +7 -3
- data/lib/dmm-crawler/ranking/adult_game_ranking.rb +53 -0
- data/lib/dmm-crawler/ranking/base_ranking.rb +25 -0
- data/lib/dmm-crawler/ranking/dojin_ranking.rb +39 -0
- data/lib/dmm-crawler/version.rb +1 -1
- data/spec/dmm-crawler/ranking/adult_game_ranking_spec.rb +33 -0
- data/spec/dmm-crawler/{ranking_spec.rb → ranking/dojin_ranking_spec.rb} +4 -3
- data/spec/spec_helper.rb +2 -0
- metadata +11 -6
- data/lib/dmm-crawler/attributes.rb +0 -103
- data/lib/dmm-crawler/ranking.rb +0 -54
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: daae94752568d25d37a9e8f43e791ec58d649f12
|
4
|
+
data.tar.gz: 27326b524fbe3f3c55f75b87228f047963eb5566
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a0fbbb9d7ef6453ec7515137939bcceff13f2f26398a2aa51a80f6c0db6c5acc256068ccbffaa95aabf34972f185bce4423f9bc735da4d8f32380a9084d43950
|
7
|
+
data.tar.gz: 59901768a8928a88bf69df43a084aff0d9cd2689435be47514940c1e45c54ef7f6b642511a8f5bc18a6366d42ef1aae30a57f1de3977ff26ba67885a57f50a4a
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -2,6 +2,12 @@
|
|
2
2
|
|
3
3
|
# DMM Crawler
|
4
4
|
|
5
|
+
## :warning: Cation :warning:
|
6
|
+
|
7
|
+
FANZA does not accepted crawling pages so I recommend to not use this gem.
|
8
|
+
|
9
|
+
I do not take any responsibility or liability for any damage or loss caused by mine gem.
|
10
|
+
|
5
11
|
## What is DMM Crawler
|
6
12
|
|
7
13
|
Show DMM and DMM.R18's crawled data. Now, All rankings for doujin is crawlable.
|
data/doc/ja/README.md
CHANGED
data/lib/dmm-crawler.rb
CHANGED
@@ -6,7 +6,11 @@ module DMMCrawler
|
|
6
6
|
end
|
7
7
|
|
8
8
|
require 'dmm-crawler/agent'
|
9
|
-
require 'dmm-crawler/attributes'
|
10
|
-
require 'dmm-crawler/ranking'
|
11
9
|
require 'dmm-crawler/client'
|
10
|
+
require 'dmm-crawler/attributes/base_attributes'
|
11
|
+
require 'dmm-crawler/attributes/dojin_attributes.rb'
|
12
|
+
require 'dmm-crawler/attributes/adult_game_attributes.rb'
|
13
|
+
require 'dmm-crawler/ranking/base_ranking'
|
14
|
+
require 'dmm-crawler/ranking/dojin_ranking.rb'
|
15
|
+
require 'dmm-crawler/ranking/adult_game_ranking.rb'
|
12
16
|
require 'dmm-crawler/version'
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module DMMCrawler
|
2
|
+
module Attributes
|
3
|
+
class AdultGameAttributes < BaseAttributes
|
4
|
+
def to_a
|
5
|
+
[
|
6
|
+
title,
|
7
|
+
title_link,
|
8
|
+
main_image_url,
|
9
|
+
sample_image_urls,
|
10
|
+
submedia,
|
11
|
+
brand,
|
12
|
+
affiliateable?,
|
13
|
+
tags
|
14
|
+
]
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def title
|
20
|
+
@page.search('.page-detail h1').first.children.last.text.strip.gsub(/【.*】/, '')
|
21
|
+
end
|
22
|
+
|
23
|
+
def title_link
|
24
|
+
@page.uri.to_s
|
25
|
+
end
|
26
|
+
|
27
|
+
def main_image_url
|
28
|
+
@page.search('.area-package-image').search('.package-image-box a').first.attributes['href'].value
|
29
|
+
end
|
30
|
+
|
31
|
+
def sample_image_urls
|
32
|
+
@page.search('#item-rotationbnr li span img').take(3).map { |img| img&.attributes&.send(:[], 'src')&.value }.compact
|
33
|
+
end
|
34
|
+
|
35
|
+
def submedia
|
36
|
+
'adult_game'
|
37
|
+
end
|
38
|
+
|
39
|
+
def brand
|
40
|
+
@page.search('.head-detail table tr td').take(8).last.text.strip
|
41
|
+
end
|
42
|
+
|
43
|
+
def tags
|
44
|
+
item['iteminfo']['genre'].map { |h| h['name'] }
|
45
|
+
end
|
46
|
+
|
47
|
+
def content_id
|
48
|
+
@page.uri.to_s.match(/views_\d*/)
|
49
|
+
end
|
50
|
+
|
51
|
+
def item
|
52
|
+
@item ||= @r_client.list_items(site: 'DMM.R18', content_id: content_id).body['result']['items'][0]
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module DMMCrawler
|
2
|
+
module Attributes
|
3
|
+
class BaseAttributes
|
4
|
+
HTTP_STATUS_CODE_OF_SUCCESS = 200
|
5
|
+
|
6
|
+
def initialize(url, agent: Agent.instance.agent)
|
7
|
+
@page = agent.get(url)
|
8
|
+
@r_client = Rdmm::Client.new(affiliate_id: ENV['DMM_AFFILIATE_ID'], api_id: ENV['DMM_API_ID'])
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_a
|
12
|
+
raise NotImplementedError
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def affiliateable?
|
18
|
+
@r_client.list_items(site: 'DMM.R18', keyword: title).body['result']['status'] == HTTP_STATUS_CODE_OF_SUCCESS
|
19
|
+
end
|
20
|
+
|
21
|
+
def art_page?
|
22
|
+
@page.uri.to_s =~ /doujin/
|
23
|
+
end
|
24
|
+
|
25
|
+
def adult_game?
|
26
|
+
@page.uri.to_s =~ /dlsoft/
|
27
|
+
end
|
28
|
+
|
29
|
+
def title
|
30
|
+
raise NotImplementedError
|
31
|
+
end
|
32
|
+
|
33
|
+
def title_link
|
34
|
+
raise NotImplementedError
|
35
|
+
end
|
36
|
+
|
37
|
+
def image_url
|
38
|
+
raise NotImplementedError
|
39
|
+
end
|
40
|
+
|
41
|
+
def submedia
|
42
|
+
raise NotImplementedError
|
43
|
+
end
|
44
|
+
|
45
|
+
def author
|
46
|
+
raise NotImplementedError
|
47
|
+
end
|
48
|
+
|
49
|
+
def brand
|
50
|
+
raise NotImplementedError
|
51
|
+
end
|
52
|
+
|
53
|
+
def tags
|
54
|
+
raise NotImplementedError
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module DMMCrawler
|
2
|
+
module Attributes
|
3
|
+
class DojinAttributes < BaseAttributes
|
4
|
+
def to_a
|
5
|
+
[
|
6
|
+
title,
|
7
|
+
title_link,
|
8
|
+
image_url,
|
9
|
+
submedia,
|
10
|
+
author,
|
11
|
+
affiliateable?,
|
12
|
+
tags
|
13
|
+
]
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def title
|
19
|
+
@page.search('.productTitle__txt span').remove
|
20
|
+
@page.search('.productTitle__txt').text.strip
|
21
|
+
end
|
22
|
+
|
23
|
+
def title_link
|
24
|
+
@page.uri.to_s
|
25
|
+
end
|
26
|
+
|
27
|
+
def image_url
|
28
|
+
attrs = @page.search('.productPreview__item img').first.attributes
|
29
|
+
|
30
|
+
if attrs['data-src']
|
31
|
+
attrs['data-src'].value
|
32
|
+
else
|
33
|
+
attrs['src'].value
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def submedia
|
38
|
+
@page
|
39
|
+
.search('.productAttribute-listItem .c_icon_productGenre')
|
40
|
+
.first
|
41
|
+
.attributes['class']
|
42
|
+
.value
|
43
|
+
.gsub('c_icon_productGenre ', '')
|
44
|
+
.delete('-')
|
45
|
+
end
|
46
|
+
|
47
|
+
def author
|
48
|
+
@page.search('div.circleName__item').text.strip
|
49
|
+
end
|
50
|
+
|
51
|
+
def brand
|
52
|
+
@page.search('.head-detail table tr td').take(8).last
|
53
|
+
end
|
54
|
+
|
55
|
+
def tags
|
56
|
+
@page.search('.genreTagList .genreTagList__item a').map { |e| e.text.strip }
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
data/lib/dmm-crawler/client.rb
CHANGED
@@ -9,15 +9,19 @@ module DMMCrawler
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def rankings(arguments)
|
12
|
-
Ranking.new(arguments.merge!(agent: @agent)).arts
|
12
|
+
Ranking::DojinRanking.new(arguments.merge!(agent: @agent)).arts
|
13
|
+
end
|
14
|
+
|
15
|
+
def adult_game_rankings(arguments)
|
16
|
+
Ranking::AdultGameRanking.new(arguments.merge!(agent: @agent)).arts
|
13
17
|
end
|
14
18
|
|
15
19
|
def get_attributes(url)
|
16
|
-
Attributes.new(url, agent: @agent).to_a
|
20
|
+
Attributes::DojinAttributes.new(url, agent: @agent).to_a
|
17
21
|
end
|
18
22
|
|
19
23
|
def affiliateable?(url)
|
20
|
-
Attributes.new(url, agent: @agent).affiliateable?
|
24
|
+
Attributes::DojinAttributes.new(url, agent: @agent).affiliateable?
|
21
25
|
end
|
22
26
|
end
|
23
27
|
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module DMMCrawler
|
2
|
+
module Ranking
|
3
|
+
class AdultGameRanking < BaseRanking
|
4
|
+
include Attributes
|
5
|
+
|
6
|
+
FETCHING_LIMITATION = 20
|
7
|
+
DLSOFT_URL = "http://dlsoft.dmm.co.jp/"
|
8
|
+
|
9
|
+
def initialize(agent: Agent.instance.agent, term: nil)
|
10
|
+
@agent = discriminate_agent(agent)
|
11
|
+
@term = term
|
12
|
+
@url = URI.join(DLSOFT_URL, File.join('ranking', parameterized_term))
|
13
|
+
end
|
14
|
+
|
15
|
+
def arts
|
16
|
+
games = page.search('.rankingList-content .rankingList-item.fn-rankListItem').take(FETCHING_LIMITATION)
|
17
|
+
arts = games.map do |game|
|
18
|
+
sleep_each do
|
19
|
+
url = game.search('.rankingList-link').first.attributes['href'].value
|
20
|
+
AdultGameAttributes.new(url, agent: @agent).to_a
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
arts.map.with_index(1) do |(title, title_link, main_image_url, sample_image_urls, submedia, author, affiliateable, tags), rank|
|
25
|
+
{
|
26
|
+
title: title,
|
27
|
+
title_link: title_link,
|
28
|
+
main_image_url: main_image_url,
|
29
|
+
sample_image_urls: sample_image_urls,
|
30
|
+
submedia: submedia,
|
31
|
+
author: author,
|
32
|
+
rank: rank,
|
33
|
+
affiliateable: affiliateable,
|
34
|
+
tags: tags
|
35
|
+
}
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
def parameterized_term
|
42
|
+
case @term
|
43
|
+
when 'weekly'
|
44
|
+
'term=weekly'
|
45
|
+
when 'monthly'
|
46
|
+
nil
|
47
|
+
when 'yearly'
|
48
|
+
"term=first/year=#{Time.now.year}/"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module DMMCrawler
|
2
|
+
module Ranking
|
3
|
+
class BaseRanking
|
4
|
+
def arts
|
5
|
+
raise NotImplementedError
|
6
|
+
end
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
def page
|
11
|
+
@agent.get(@url)
|
12
|
+
end
|
13
|
+
|
14
|
+
def sleep_each
|
15
|
+
sleep rand(0.7..1.3)
|
16
|
+
yield
|
17
|
+
end
|
18
|
+
|
19
|
+
def discriminate_agent(agent)
|
20
|
+
return agent if agent.is_a?(Mechanize)
|
21
|
+
raise TypeError
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module DMMCrawler
|
2
|
+
module Ranking
|
3
|
+
class DojinRanking < BaseRanking
|
4
|
+
include Attributes
|
5
|
+
|
6
|
+
FETCHING_LIMITATION = 10
|
7
|
+
|
8
|
+
def initialize(agent:, submedia: nil, term: nil)
|
9
|
+
@agent = discriminate_agent(agent)
|
10
|
+
@submedia = submedia
|
11
|
+
@term = submedia
|
12
|
+
@url = File.join(BASE_URL, "/dc/doujin/-/ranking-all/=/sort=popular/submedia=#{@submedia}/term=#{@term}")
|
13
|
+
end
|
14
|
+
|
15
|
+
def arts
|
16
|
+
arts = page.search('.rank-rankListItem.fn-setPurchaseChange').take(FETCHING_LIMITATION).map do |element|
|
17
|
+
sleep_each do
|
18
|
+
url = File.join(BASE_URL, element.search('.rank-name a').first.attributes['href'].value)
|
19
|
+
DojinAttributes.new(url, agent: @agent).to_a
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
arts.map.with_index(1) do |(title, title_link, image_url, submedia, author, price, affiliateable, tags), rank|
|
24
|
+
{
|
25
|
+
title: title,
|
26
|
+
title_link: title_link,
|
27
|
+
image_url: image_url,
|
28
|
+
submedia: submedia,
|
29
|
+
author: author,
|
30
|
+
rank: rank,
|
31
|
+
price: price,
|
32
|
+
affiliateable: affiliateable,
|
33
|
+
tags: tags
|
34
|
+
}
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
data/lib/dmm-crawler/version.rb
CHANGED
@@ -0,0 +1,33 @@
|
|
1
|
+
describe DMMCrawler::Ranking::AdultGameRanking do
|
2
|
+
let(:agent) { DMMCrawler::Agent.instance.agent }
|
3
|
+
let(:arguments) { { agent: agent, term: term } }
|
4
|
+
|
5
|
+
describe '#arts' do
|
6
|
+
subject { attachments }
|
7
|
+
|
8
|
+
after { sleep 2 }
|
9
|
+
|
10
|
+
context 'with length' do
|
11
|
+
let(:term) { 'weekly' }
|
12
|
+
let(:attachments) { described_class.new(arguments).arts.length }
|
13
|
+
|
14
|
+
it { is_expected.to be 20 }
|
15
|
+
end
|
16
|
+
|
17
|
+
context 'with weekly argument' do
|
18
|
+
let(:term) { 'weekly' }
|
19
|
+
let(:attachments) { described_class.new(arguments).arts }
|
20
|
+
|
21
|
+
it { is_expected.to all(include(:title, :title_link, :main_image_url, :sample_image_urls, :submedia, :author, :rank, :affiliateable, :tags)) }
|
22
|
+
it { is_expected.to all(satisfy { |art| art.all? { |_k, v| v != '' && v != nil} }) }
|
23
|
+
end
|
24
|
+
|
25
|
+
context 'with not registered argument' do
|
26
|
+
let(:agent) { nil }
|
27
|
+
let(:term) { 'weekly' }
|
28
|
+
let(:attachments) { -> { described_class.new(arguments).arts } }
|
29
|
+
|
30
|
+
it { is_expected.to raise_error(TypeError) }
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
describe DMMCrawler::Ranking do
|
1
|
+
describe DMMCrawler::Ranking::DojinRanking do
|
2
2
|
let(:agent) { DMMCrawler::Agent.instance.agent }
|
3
3
|
let(:submedia) { 'cg' }
|
4
4
|
let(:arguments) { { submedia: submedia, term: term, agent: agent } }
|
@@ -19,13 +19,14 @@ describe DMMCrawler::Ranking do
|
|
19
19
|
let(:attachments) { described_class.new(arguments).arts }
|
20
20
|
let(:term) { '24' }
|
21
21
|
|
22
|
-
it { is_expected.to all(include(:title, :title_link, :image_url, :submedia, :author, :
|
22
|
+
it { is_expected.to all(include(:title, :title_link, :image_url, :submedia, :author, :rank, :affiliateable, :tags)) }
|
23
23
|
it { is_expected.to all(satisfy { |art| art.all? { |_k, v| v != '' } }) }
|
24
24
|
end
|
25
25
|
|
26
26
|
context 'with not registered argument' do
|
27
27
|
let(:attachments) { -> { described_class.new(arguments).arts } }
|
28
|
-
let(:term) {
|
28
|
+
let(:term) { '24' }
|
29
|
+
let(:agent) { nil }
|
29
30
|
|
30
31
|
it { is_expected.to raise_error(TypeError) }
|
31
32
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dmm-crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Ohmori
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-12-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rdmm
|
@@ -128,11 +128,16 @@ files:
|
|
128
128
|
- doc/ja/README.md
|
129
129
|
- lib/dmm-crawler.rb
|
130
130
|
- lib/dmm-crawler/agent.rb
|
131
|
-
- lib/dmm-crawler/attributes.rb
|
131
|
+
- lib/dmm-crawler/attributes/adult_game_attributes.rb
|
132
|
+
- lib/dmm-crawler/attributes/base_attributes.rb
|
133
|
+
- lib/dmm-crawler/attributes/dojin_attributes.rb
|
132
134
|
- lib/dmm-crawler/client.rb
|
133
|
-
- lib/dmm-crawler/ranking.rb
|
135
|
+
- lib/dmm-crawler/ranking/adult_game_ranking.rb
|
136
|
+
- lib/dmm-crawler/ranking/base_ranking.rb
|
137
|
+
- lib/dmm-crawler/ranking/dojin_ranking.rb
|
134
138
|
- lib/dmm-crawler/version.rb
|
135
|
-
- spec/dmm-crawler/
|
139
|
+
- spec/dmm-crawler/ranking/adult_game_ranking_spec.rb
|
140
|
+
- spec/dmm-crawler/ranking/dojin_ranking_spec.rb
|
136
141
|
- spec/spec_helper.rb
|
137
142
|
homepage: https://github.com/sachin21/dmm-crawler
|
138
143
|
licenses:
|
@@ -154,7 +159,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
154
159
|
version: '0'
|
155
160
|
requirements: []
|
156
161
|
rubyforge_project:
|
157
|
-
rubygems_version: 2.
|
162
|
+
rubygems_version: 2.6.14.3
|
158
163
|
signing_key:
|
159
164
|
specification_version: 4
|
160
165
|
summary: Show DMM and DMM.R18's crawled data
|
@@ -1,103 +0,0 @@
|
|
1
|
-
module DMMCrawler
|
2
|
-
class Attributes
|
3
|
-
HTTP_STATUS_CODE_OF_SUCCESS = 200
|
4
|
-
|
5
|
-
def initialize(url, agent: Agent.instance.agent)
|
6
|
-
@page = agent.get(url)
|
7
|
-
@r_client = Rdmm::Client.new(affiliate_id: ENV['DMM_AFFILIATE_ID'], api_id: ENV['DMM_API_ID'])
|
8
|
-
end
|
9
|
-
|
10
|
-
def to_a
|
11
|
-
[
|
12
|
-
title,
|
13
|
-
title_link,
|
14
|
-
image_url,
|
15
|
-
submedia,
|
16
|
-
author,
|
17
|
-
informations,
|
18
|
-
affiliateable?,
|
19
|
-
tags
|
20
|
-
]
|
21
|
-
end
|
22
|
-
|
23
|
-
def affiliateable?
|
24
|
-
@r_client.list_items(site: 'DMM.R18', keyword: title).body['result']['status'] == HTTP_STATUS_CODE_OF_SUCCESS
|
25
|
-
end
|
26
|
-
|
27
|
-
private
|
28
|
-
|
29
|
-
def title
|
30
|
-
if art_page?
|
31
|
-
@page.search('.productTitle__txt span').remove
|
32
|
-
@page.search('.productTitle__txt').text.strip
|
33
|
-
else
|
34
|
-
@page.search('.rank-name').first.text.strip
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
def title_link
|
39
|
-
if art_page?
|
40
|
-
@page.uri.to_s
|
41
|
-
else
|
42
|
-
File.join(BASE_URL, @page.search('.rank-name').first.search('a').first.attributes.first[1].value)
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
def image_url
|
47
|
-
attrs = @page.search('.productPreview__item img').first.attributes
|
48
|
-
|
49
|
-
if attrs['data-src']
|
50
|
-
attrs['data-src'].value
|
51
|
-
else
|
52
|
-
attrs['src'].value
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def submedia
|
57
|
-
@page
|
58
|
-
.search('.productAttribute-listItem .c_icon_productGenre')
|
59
|
-
.first
|
60
|
-
.attributes['class']
|
61
|
-
.value
|
62
|
-
.gsub('c_icon_productGenre ', '')
|
63
|
-
.delete('-')
|
64
|
-
end
|
65
|
-
|
66
|
-
def author
|
67
|
-
@page.search('div.circleName__item').text.strip
|
68
|
-
end
|
69
|
-
|
70
|
-
def informations
|
71
|
-
keys = extract_text(@page.search('.m-productInformation .productInformation__item .informationList__ttl'))
|
72
|
-
values = extract_text(@page.search('.m-productInformation .productInformation__item .informationList__txt'))
|
73
|
-
|
74
|
-
information = keys.zip(values)
|
75
|
-
series = information.find { |array| array.first == 'シリーズ' }
|
76
|
-
|
77
|
-
if series
|
78
|
-
information = information.reject { |array| array.first == 'シリーズ' }
|
79
|
-
information.push(series)
|
80
|
-
end
|
81
|
-
|
82
|
-
information.map { |key, value| { key: key, value: value } }
|
83
|
-
end
|
84
|
-
|
85
|
-
def tags
|
86
|
-
if art_page?
|
87
|
-
@page.search('.genreTagList .genreTagList__item a').map { |e| e.text.strip }
|
88
|
-
else
|
89
|
-
@page.search('.rank-labelListItem').map { |e| e.search('a').text.strip }
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
def extract_text(elements)
|
94
|
-
elements
|
95
|
-
.reject { |element| element.text.strip == 'ジャンル' }
|
96
|
-
.map { |element| element.children.text.strip }
|
97
|
-
end
|
98
|
-
|
99
|
-
def art_page?
|
100
|
-
@page.search('.rank-name').empty?
|
101
|
-
end
|
102
|
-
end
|
103
|
-
end
|
data/lib/dmm-crawler/ranking.rb
DELETED
@@ -1,54 +0,0 @@
|
|
1
|
-
module DMMCrawler
|
2
|
-
class Ranking
|
3
|
-
def initialize(arguments)
|
4
|
-
@agent = discriminate_agent(arguments[:agent])
|
5
|
-
@term = discriminate_term(arguments[:term])
|
6
|
-
@submedia = arguments[:submedia]
|
7
|
-
@url = File.join(BASE_URL, "/dc/doujin/-/ranking-all/=/sort=popular/submedia=#{@submedia}/term=#{@term}")
|
8
|
-
end
|
9
|
-
|
10
|
-
def arts
|
11
|
-
arts = page.search('.rank-rankListItem.fn-setPurchaseChange').take(10).map do |element|
|
12
|
-
sleep_each do
|
13
|
-
url = File.join(BASE_URL, element.search('.rank-name a').first.attributes['href'].value)
|
14
|
-
Attributes.new(url, agent: @agent).to_a
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
arts.map.with_index(1) do |(title, title_link, image_url, submedia, author, informations, affiliateable, tags), rank|
|
19
|
-
{
|
20
|
-
title: title,
|
21
|
-
title_link: title_link,
|
22
|
-
image_url: image_url,
|
23
|
-
submedia: submedia,
|
24
|
-
author: author,
|
25
|
-
informations: informations,
|
26
|
-
rank: rank,
|
27
|
-
affiliateable: affiliateable,
|
28
|
-
tags: tags
|
29
|
-
}
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
private
|
34
|
-
|
35
|
-
def page
|
36
|
-
@agent.get(@url)
|
37
|
-
end
|
38
|
-
|
39
|
-
def discriminate_term(term)
|
40
|
-
return term if %w[24 weekly monthly total].include?(term)
|
41
|
-
raise TypeError
|
42
|
-
end
|
43
|
-
|
44
|
-
def discriminate_agent(agent)
|
45
|
-
return agent if agent.is_a?(Mechanize)
|
46
|
-
raise TypeError
|
47
|
-
end
|
48
|
-
|
49
|
-
def sleep_each
|
50
|
-
sleep rand(0.7..1.3)
|
51
|
-
yield
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|