dmm-crawler 0.3.5 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/CHANGELOG.md +4 -0
- data/README.md +6 -0
- data/doc/ja/README.md +5 -0
- data/lib/dmm-crawler.rb +6 -2
- data/lib/dmm-crawler/attributes/adult_game_attributes.rb +56 -0
- data/lib/dmm-crawler/attributes/base_attributes.rb +58 -0
- data/lib/dmm-crawler/attributes/dojin_attributes.rb +60 -0
- data/lib/dmm-crawler/client.rb +7 -3
- data/lib/dmm-crawler/ranking/adult_game_ranking.rb +53 -0
- data/lib/dmm-crawler/ranking/base_ranking.rb +25 -0
- data/lib/dmm-crawler/ranking/dojin_ranking.rb +39 -0
- data/lib/dmm-crawler/version.rb +1 -1
- data/spec/dmm-crawler/ranking/adult_game_ranking_spec.rb +33 -0
- data/spec/dmm-crawler/{ranking_spec.rb → ranking/dojin_ranking_spec.rb} +4 -3
- data/spec/spec_helper.rb +2 -0
- metadata +11 -6
- data/lib/dmm-crawler/attributes.rb +0 -103
- data/lib/dmm-crawler/ranking.rb +0 -54
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: daae94752568d25d37a9e8f43e791ec58d649f12
|
4
|
+
data.tar.gz: 27326b524fbe3f3c55f75b87228f047963eb5566
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a0fbbb9d7ef6453ec7515137939bcceff13f2f26398a2aa51a80f6c0db6c5acc256068ccbffaa95aabf34972f185bce4423f9bc735da4d8f32380a9084d43950
|
7
|
+
data.tar.gz: 59901768a8928a88bf69df43a084aff0d9cd2689435be47514940c1e45c54ef7f6b642511a8f5bc18a6366d42ef1aae30a57f1de3977ff26ba67885a57f50a4a
|
data/CHANGELOG.md
CHANGED
data/README.md
CHANGED
@@ -2,6 +2,12 @@
|
|
2
2
|
|
3
3
|
# DMM Crawler
|
4
4
|
|
5
|
+
## :warning: Cation :warning:
|
6
|
+
|
7
|
+
FANZA does not accepted crawling pages so I recommend to not use this gem.
|
8
|
+
|
9
|
+
I do not take any responsibility or liability for any damage or loss caused by mine gem.
|
10
|
+
|
5
11
|
## What is DMM Crawler
|
6
12
|
|
7
13
|
Show DMM and DMM.R18's crawled data. Now, All rankings for doujin is crawlable.
|
data/doc/ja/README.md
CHANGED
data/lib/dmm-crawler.rb
CHANGED
@@ -6,7 +6,11 @@ module DMMCrawler
|
|
6
6
|
end
|
7
7
|
|
8
8
|
require 'dmm-crawler/agent'
|
9
|
-
require 'dmm-crawler/attributes'
|
10
|
-
require 'dmm-crawler/ranking'
|
11
9
|
require 'dmm-crawler/client'
|
10
|
+
require 'dmm-crawler/attributes/base_attributes'
|
11
|
+
require 'dmm-crawler/attributes/dojin_attributes.rb'
|
12
|
+
require 'dmm-crawler/attributes/adult_game_attributes.rb'
|
13
|
+
require 'dmm-crawler/ranking/base_ranking'
|
14
|
+
require 'dmm-crawler/ranking/dojin_ranking.rb'
|
15
|
+
require 'dmm-crawler/ranking/adult_game_ranking.rb'
|
12
16
|
require 'dmm-crawler/version'
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module DMMCrawler
|
2
|
+
module Attributes
|
3
|
+
class AdultGameAttributes < BaseAttributes
|
4
|
+
def to_a
|
5
|
+
[
|
6
|
+
title,
|
7
|
+
title_link,
|
8
|
+
main_image_url,
|
9
|
+
sample_image_urls,
|
10
|
+
submedia,
|
11
|
+
brand,
|
12
|
+
affiliateable?,
|
13
|
+
tags
|
14
|
+
]
|
15
|
+
end
|
16
|
+
|
17
|
+
private
|
18
|
+
|
19
|
+
def title
|
20
|
+
@page.search('.page-detail h1').first.children.last.text.strip.gsub(/【.*】/, '')
|
21
|
+
end
|
22
|
+
|
23
|
+
def title_link
|
24
|
+
@page.uri.to_s
|
25
|
+
end
|
26
|
+
|
27
|
+
def main_image_url
|
28
|
+
@page.search('.area-package-image').search('.package-image-box a').first.attributes['href'].value
|
29
|
+
end
|
30
|
+
|
31
|
+
def sample_image_urls
|
32
|
+
@page.search('#item-rotationbnr li span img').take(3).map { |img| img&.attributes&.send(:[], 'src')&.value }.compact
|
33
|
+
end
|
34
|
+
|
35
|
+
def submedia
|
36
|
+
'adult_game'
|
37
|
+
end
|
38
|
+
|
39
|
+
def brand
|
40
|
+
@page.search('.head-detail table tr td').take(8).last.text.strip
|
41
|
+
end
|
42
|
+
|
43
|
+
def tags
|
44
|
+
item['iteminfo']['genre'].map { |h| h['name'] }
|
45
|
+
end
|
46
|
+
|
47
|
+
def content_id
|
48
|
+
@page.uri.to_s.match(/views_\d*/)
|
49
|
+
end
|
50
|
+
|
51
|
+
def item
|
52
|
+
@item ||= @r_client.list_items(site: 'DMM.R18', content_id: content_id).body['result']['items'][0]
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module DMMCrawler
|
2
|
+
module Attributes
|
3
|
+
class BaseAttributes
|
4
|
+
HTTP_STATUS_CODE_OF_SUCCESS = 200
|
5
|
+
|
6
|
+
def initialize(url, agent: Agent.instance.agent)
|
7
|
+
@page = agent.get(url)
|
8
|
+
@r_client = Rdmm::Client.new(affiliate_id: ENV['DMM_AFFILIATE_ID'], api_id: ENV['DMM_API_ID'])
|
9
|
+
end
|
10
|
+
|
11
|
+
def to_a
|
12
|
+
raise NotImplementedError
|
13
|
+
end
|
14
|
+
|
15
|
+
private
|
16
|
+
|
17
|
+
def affiliateable?
|
18
|
+
@r_client.list_items(site: 'DMM.R18', keyword: title).body['result']['status'] == HTTP_STATUS_CODE_OF_SUCCESS
|
19
|
+
end
|
20
|
+
|
21
|
+
def art_page?
|
22
|
+
@page.uri.to_s =~ /doujin/
|
23
|
+
end
|
24
|
+
|
25
|
+
def adult_game?
|
26
|
+
@page.uri.to_s =~ /dlsoft/
|
27
|
+
end
|
28
|
+
|
29
|
+
def title
|
30
|
+
raise NotImplementedError
|
31
|
+
end
|
32
|
+
|
33
|
+
def title_link
|
34
|
+
raise NotImplementedError
|
35
|
+
end
|
36
|
+
|
37
|
+
def image_url
|
38
|
+
raise NotImplementedError
|
39
|
+
end
|
40
|
+
|
41
|
+
def submedia
|
42
|
+
raise NotImplementedError
|
43
|
+
end
|
44
|
+
|
45
|
+
def author
|
46
|
+
raise NotImplementedError
|
47
|
+
end
|
48
|
+
|
49
|
+
def brand
|
50
|
+
raise NotImplementedError
|
51
|
+
end
|
52
|
+
|
53
|
+
def tags
|
54
|
+
raise NotImplementedError
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module DMMCrawler
|
2
|
+
module Attributes
|
3
|
+
class DojinAttributes < BaseAttributes
|
4
|
+
def to_a
|
5
|
+
[
|
6
|
+
title,
|
7
|
+
title_link,
|
8
|
+
image_url,
|
9
|
+
submedia,
|
10
|
+
author,
|
11
|
+
affiliateable?,
|
12
|
+
tags
|
13
|
+
]
|
14
|
+
end
|
15
|
+
|
16
|
+
private
|
17
|
+
|
18
|
+
def title
|
19
|
+
@page.search('.productTitle__txt span').remove
|
20
|
+
@page.search('.productTitle__txt').text.strip
|
21
|
+
end
|
22
|
+
|
23
|
+
def title_link
|
24
|
+
@page.uri.to_s
|
25
|
+
end
|
26
|
+
|
27
|
+
def image_url
|
28
|
+
attrs = @page.search('.productPreview__item img').first.attributes
|
29
|
+
|
30
|
+
if attrs['data-src']
|
31
|
+
attrs['data-src'].value
|
32
|
+
else
|
33
|
+
attrs['src'].value
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def submedia
|
38
|
+
@page
|
39
|
+
.search('.productAttribute-listItem .c_icon_productGenre')
|
40
|
+
.first
|
41
|
+
.attributes['class']
|
42
|
+
.value
|
43
|
+
.gsub('c_icon_productGenre ', '')
|
44
|
+
.delete('-')
|
45
|
+
end
|
46
|
+
|
47
|
+
def author
|
48
|
+
@page.search('div.circleName__item').text.strip
|
49
|
+
end
|
50
|
+
|
51
|
+
def brand
|
52
|
+
@page.search('.head-detail table tr td').take(8).last
|
53
|
+
end
|
54
|
+
|
55
|
+
def tags
|
56
|
+
@page.search('.genreTagList .genreTagList__item a').map { |e| e.text.strip }
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
data/lib/dmm-crawler/client.rb
CHANGED
@@ -9,15 +9,19 @@ module DMMCrawler
|
|
9
9
|
end
|
10
10
|
|
11
11
|
def rankings(arguments)
|
12
|
-
Ranking.new(arguments.merge!(agent: @agent)).arts
|
12
|
+
Ranking::DojinRanking.new(arguments.merge!(agent: @agent)).arts
|
13
|
+
end
|
14
|
+
|
15
|
+
def adult_game_rankings(arguments)
|
16
|
+
Ranking::AdultGameRanking.new(arguments.merge!(agent: @agent)).arts
|
13
17
|
end
|
14
18
|
|
15
19
|
def get_attributes(url)
|
16
|
-
Attributes.new(url, agent: @agent).to_a
|
20
|
+
Attributes::DojinAttributes.new(url, agent: @agent).to_a
|
17
21
|
end
|
18
22
|
|
19
23
|
def affiliateable?(url)
|
20
|
-
Attributes.new(url, agent: @agent).affiliateable?
|
24
|
+
Attributes::DojinAttributes.new(url, agent: @agent).affiliateable?
|
21
25
|
end
|
22
26
|
end
|
23
27
|
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module DMMCrawler
|
2
|
+
module Ranking
|
3
|
+
class AdultGameRanking < BaseRanking
|
4
|
+
include Attributes
|
5
|
+
|
6
|
+
FETCHING_LIMITATION = 20
|
7
|
+
DLSOFT_URL = "http://dlsoft.dmm.co.jp/"
|
8
|
+
|
9
|
+
def initialize(agent: Agent.instance.agent, term: nil)
|
10
|
+
@agent = discriminate_agent(agent)
|
11
|
+
@term = term
|
12
|
+
@url = URI.join(DLSOFT_URL, File.join('ranking', parameterized_term))
|
13
|
+
end
|
14
|
+
|
15
|
+
def arts
|
16
|
+
games = page.search('.rankingList-content .rankingList-item.fn-rankListItem').take(FETCHING_LIMITATION)
|
17
|
+
arts = games.map do |game|
|
18
|
+
sleep_each do
|
19
|
+
url = game.search('.rankingList-link').first.attributes['href'].value
|
20
|
+
AdultGameAttributes.new(url, agent: @agent).to_a
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
arts.map.with_index(1) do |(title, title_link, main_image_url, sample_image_urls, submedia, author, affiliateable, tags), rank|
|
25
|
+
{
|
26
|
+
title: title,
|
27
|
+
title_link: title_link,
|
28
|
+
main_image_url: main_image_url,
|
29
|
+
sample_image_urls: sample_image_urls,
|
30
|
+
submedia: submedia,
|
31
|
+
author: author,
|
32
|
+
rank: rank,
|
33
|
+
affiliateable: affiliateable,
|
34
|
+
tags: tags
|
35
|
+
}
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
private
|
40
|
+
|
41
|
+
def parameterized_term
|
42
|
+
case @term
|
43
|
+
when 'weekly'
|
44
|
+
'term=weekly'
|
45
|
+
when 'monthly'
|
46
|
+
nil
|
47
|
+
when 'yearly'
|
48
|
+
"term=first/year=#{Time.now.year}/"
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module DMMCrawler
|
2
|
+
module Ranking
|
3
|
+
class BaseRanking
|
4
|
+
def arts
|
5
|
+
raise NotImplementedError
|
6
|
+
end
|
7
|
+
|
8
|
+
private
|
9
|
+
|
10
|
+
def page
|
11
|
+
@agent.get(@url)
|
12
|
+
end
|
13
|
+
|
14
|
+
def sleep_each
|
15
|
+
sleep rand(0.7..1.3)
|
16
|
+
yield
|
17
|
+
end
|
18
|
+
|
19
|
+
def discriminate_agent(agent)
|
20
|
+
return agent if agent.is_a?(Mechanize)
|
21
|
+
raise TypeError
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module DMMCrawler
|
2
|
+
module Ranking
|
3
|
+
class DojinRanking < BaseRanking
|
4
|
+
include Attributes
|
5
|
+
|
6
|
+
FETCHING_LIMITATION = 10
|
7
|
+
|
8
|
+
def initialize(agent:, submedia: nil, term: nil)
|
9
|
+
@agent = discriminate_agent(agent)
|
10
|
+
@submedia = submedia
|
11
|
+
@term = submedia
|
12
|
+
@url = File.join(BASE_URL, "/dc/doujin/-/ranking-all/=/sort=popular/submedia=#{@submedia}/term=#{@term}")
|
13
|
+
end
|
14
|
+
|
15
|
+
def arts
|
16
|
+
arts = page.search('.rank-rankListItem.fn-setPurchaseChange').take(FETCHING_LIMITATION).map do |element|
|
17
|
+
sleep_each do
|
18
|
+
url = File.join(BASE_URL, element.search('.rank-name a').first.attributes['href'].value)
|
19
|
+
DojinAttributes.new(url, agent: @agent).to_a
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
arts.map.with_index(1) do |(title, title_link, image_url, submedia, author, price, affiliateable, tags), rank|
|
24
|
+
{
|
25
|
+
title: title,
|
26
|
+
title_link: title_link,
|
27
|
+
image_url: image_url,
|
28
|
+
submedia: submedia,
|
29
|
+
author: author,
|
30
|
+
rank: rank,
|
31
|
+
price: price,
|
32
|
+
affiliateable: affiliateable,
|
33
|
+
tags: tags
|
34
|
+
}
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
data/lib/dmm-crawler/version.rb
CHANGED
@@ -0,0 +1,33 @@
|
|
1
|
+
describe DMMCrawler::Ranking::AdultGameRanking do
|
2
|
+
let(:agent) { DMMCrawler::Agent.instance.agent }
|
3
|
+
let(:arguments) { { agent: agent, term: term } }
|
4
|
+
|
5
|
+
describe '#arts' do
|
6
|
+
subject { attachments }
|
7
|
+
|
8
|
+
after { sleep 2 }
|
9
|
+
|
10
|
+
context 'with length' do
|
11
|
+
let(:term) { 'weekly' }
|
12
|
+
let(:attachments) { described_class.new(arguments).arts.length }
|
13
|
+
|
14
|
+
it { is_expected.to be 20 }
|
15
|
+
end
|
16
|
+
|
17
|
+
context 'with weekly argument' do
|
18
|
+
let(:term) { 'weekly' }
|
19
|
+
let(:attachments) { described_class.new(arguments).arts }
|
20
|
+
|
21
|
+
it { is_expected.to all(include(:title, :title_link, :main_image_url, :sample_image_urls, :submedia, :author, :rank, :affiliateable, :tags)) }
|
22
|
+
it { is_expected.to all(satisfy { |art| art.all? { |_k, v| v != '' && v != nil} }) }
|
23
|
+
end
|
24
|
+
|
25
|
+
context 'with not registered argument' do
|
26
|
+
let(:agent) { nil }
|
27
|
+
let(:term) { 'weekly' }
|
28
|
+
let(:attachments) { -> { described_class.new(arguments).arts } }
|
29
|
+
|
30
|
+
it { is_expected.to raise_error(TypeError) }
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
describe DMMCrawler::Ranking do
|
1
|
+
describe DMMCrawler::Ranking::DojinRanking do
|
2
2
|
let(:agent) { DMMCrawler::Agent.instance.agent }
|
3
3
|
let(:submedia) { 'cg' }
|
4
4
|
let(:arguments) { { submedia: submedia, term: term, agent: agent } }
|
@@ -19,13 +19,14 @@ describe DMMCrawler::Ranking do
|
|
19
19
|
let(:attachments) { described_class.new(arguments).arts }
|
20
20
|
let(:term) { '24' }
|
21
21
|
|
22
|
-
it { is_expected.to all(include(:title, :title_link, :image_url, :submedia, :author, :
|
22
|
+
it { is_expected.to all(include(:title, :title_link, :image_url, :submedia, :author, :rank, :affiliateable, :tags)) }
|
23
23
|
it { is_expected.to all(satisfy { |art| art.all? { |_k, v| v != '' } }) }
|
24
24
|
end
|
25
25
|
|
26
26
|
context 'with not registered argument' do
|
27
27
|
let(:attachments) { -> { described_class.new(arguments).arts } }
|
28
|
-
let(:term) {
|
28
|
+
let(:term) { '24' }
|
29
|
+
let(:agent) { nil }
|
29
30
|
|
30
31
|
it { is_expected.to raise_error(TypeError) }
|
31
32
|
end
|
data/spec/spec_helper.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dmm-crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Ohmori
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-12-16 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: rdmm
|
@@ -128,11 +128,16 @@ files:
|
|
128
128
|
- doc/ja/README.md
|
129
129
|
- lib/dmm-crawler.rb
|
130
130
|
- lib/dmm-crawler/agent.rb
|
131
|
-
- lib/dmm-crawler/attributes.rb
|
131
|
+
- lib/dmm-crawler/attributes/adult_game_attributes.rb
|
132
|
+
- lib/dmm-crawler/attributes/base_attributes.rb
|
133
|
+
- lib/dmm-crawler/attributes/dojin_attributes.rb
|
132
134
|
- lib/dmm-crawler/client.rb
|
133
|
-
- lib/dmm-crawler/ranking.rb
|
135
|
+
- lib/dmm-crawler/ranking/adult_game_ranking.rb
|
136
|
+
- lib/dmm-crawler/ranking/base_ranking.rb
|
137
|
+
- lib/dmm-crawler/ranking/dojin_ranking.rb
|
134
138
|
- lib/dmm-crawler/version.rb
|
135
|
-
- spec/dmm-crawler/
|
139
|
+
- spec/dmm-crawler/ranking/adult_game_ranking_spec.rb
|
140
|
+
- spec/dmm-crawler/ranking/dojin_ranking_spec.rb
|
136
141
|
- spec/spec_helper.rb
|
137
142
|
homepage: https://github.com/sachin21/dmm-crawler
|
138
143
|
licenses:
|
@@ -154,7 +159,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
154
159
|
version: '0'
|
155
160
|
requirements: []
|
156
161
|
rubyforge_project:
|
157
|
-
rubygems_version: 2.
|
162
|
+
rubygems_version: 2.6.14.3
|
158
163
|
signing_key:
|
159
164
|
specification_version: 4
|
160
165
|
summary: Show DMM and DMM.R18's crawled data
|
@@ -1,103 +0,0 @@
|
|
1
|
-
module DMMCrawler
|
2
|
-
class Attributes
|
3
|
-
HTTP_STATUS_CODE_OF_SUCCESS = 200
|
4
|
-
|
5
|
-
def initialize(url, agent: Agent.instance.agent)
|
6
|
-
@page = agent.get(url)
|
7
|
-
@r_client = Rdmm::Client.new(affiliate_id: ENV['DMM_AFFILIATE_ID'], api_id: ENV['DMM_API_ID'])
|
8
|
-
end
|
9
|
-
|
10
|
-
def to_a
|
11
|
-
[
|
12
|
-
title,
|
13
|
-
title_link,
|
14
|
-
image_url,
|
15
|
-
submedia,
|
16
|
-
author,
|
17
|
-
informations,
|
18
|
-
affiliateable?,
|
19
|
-
tags
|
20
|
-
]
|
21
|
-
end
|
22
|
-
|
23
|
-
def affiliateable?
|
24
|
-
@r_client.list_items(site: 'DMM.R18', keyword: title).body['result']['status'] == HTTP_STATUS_CODE_OF_SUCCESS
|
25
|
-
end
|
26
|
-
|
27
|
-
private
|
28
|
-
|
29
|
-
def title
|
30
|
-
if art_page?
|
31
|
-
@page.search('.productTitle__txt span').remove
|
32
|
-
@page.search('.productTitle__txt').text.strip
|
33
|
-
else
|
34
|
-
@page.search('.rank-name').first.text.strip
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
def title_link
|
39
|
-
if art_page?
|
40
|
-
@page.uri.to_s
|
41
|
-
else
|
42
|
-
File.join(BASE_URL, @page.search('.rank-name').first.search('a').first.attributes.first[1].value)
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
def image_url
|
47
|
-
attrs = @page.search('.productPreview__item img').first.attributes
|
48
|
-
|
49
|
-
if attrs['data-src']
|
50
|
-
attrs['data-src'].value
|
51
|
-
else
|
52
|
-
attrs['src'].value
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def submedia
|
57
|
-
@page
|
58
|
-
.search('.productAttribute-listItem .c_icon_productGenre')
|
59
|
-
.first
|
60
|
-
.attributes['class']
|
61
|
-
.value
|
62
|
-
.gsub('c_icon_productGenre ', '')
|
63
|
-
.delete('-')
|
64
|
-
end
|
65
|
-
|
66
|
-
def author
|
67
|
-
@page.search('div.circleName__item').text.strip
|
68
|
-
end
|
69
|
-
|
70
|
-
def informations
|
71
|
-
keys = extract_text(@page.search('.m-productInformation .productInformation__item .informationList__ttl'))
|
72
|
-
values = extract_text(@page.search('.m-productInformation .productInformation__item .informationList__txt'))
|
73
|
-
|
74
|
-
information = keys.zip(values)
|
75
|
-
series = information.find { |array| array.first == 'シリーズ' }
|
76
|
-
|
77
|
-
if series
|
78
|
-
information = information.reject { |array| array.first == 'シリーズ' }
|
79
|
-
information.push(series)
|
80
|
-
end
|
81
|
-
|
82
|
-
information.map { |key, value| { key: key, value: value } }
|
83
|
-
end
|
84
|
-
|
85
|
-
def tags
|
86
|
-
if art_page?
|
87
|
-
@page.search('.genreTagList .genreTagList__item a').map { |e| e.text.strip }
|
88
|
-
else
|
89
|
-
@page.search('.rank-labelListItem').map { |e| e.search('a').text.strip }
|
90
|
-
end
|
91
|
-
end
|
92
|
-
|
93
|
-
def extract_text(elements)
|
94
|
-
elements
|
95
|
-
.reject { |element| element.text.strip == 'ジャンル' }
|
96
|
-
.map { |element| element.children.text.strip }
|
97
|
-
end
|
98
|
-
|
99
|
-
def art_page?
|
100
|
-
@page.search('.rank-name').empty?
|
101
|
-
end
|
102
|
-
end
|
103
|
-
end
|
data/lib/dmm-crawler/ranking.rb
DELETED
@@ -1,54 +0,0 @@
|
|
1
|
-
module DMMCrawler
|
2
|
-
class Ranking
|
3
|
-
def initialize(arguments)
|
4
|
-
@agent = discriminate_agent(arguments[:agent])
|
5
|
-
@term = discriminate_term(arguments[:term])
|
6
|
-
@submedia = arguments[:submedia]
|
7
|
-
@url = File.join(BASE_URL, "/dc/doujin/-/ranking-all/=/sort=popular/submedia=#{@submedia}/term=#{@term}")
|
8
|
-
end
|
9
|
-
|
10
|
-
def arts
|
11
|
-
arts = page.search('.rank-rankListItem.fn-setPurchaseChange').take(10).map do |element|
|
12
|
-
sleep_each do
|
13
|
-
url = File.join(BASE_URL, element.search('.rank-name a').first.attributes['href'].value)
|
14
|
-
Attributes.new(url, agent: @agent).to_a
|
15
|
-
end
|
16
|
-
end
|
17
|
-
|
18
|
-
arts.map.with_index(1) do |(title, title_link, image_url, submedia, author, informations, affiliateable, tags), rank|
|
19
|
-
{
|
20
|
-
title: title,
|
21
|
-
title_link: title_link,
|
22
|
-
image_url: image_url,
|
23
|
-
submedia: submedia,
|
24
|
-
author: author,
|
25
|
-
informations: informations,
|
26
|
-
rank: rank,
|
27
|
-
affiliateable: affiliateable,
|
28
|
-
tags: tags
|
29
|
-
}
|
30
|
-
end
|
31
|
-
end
|
32
|
-
|
33
|
-
private
|
34
|
-
|
35
|
-
def page
|
36
|
-
@agent.get(@url)
|
37
|
-
end
|
38
|
-
|
39
|
-
def discriminate_term(term)
|
40
|
-
return term if %w[24 weekly monthly total].include?(term)
|
41
|
-
raise TypeError
|
42
|
-
end
|
43
|
-
|
44
|
-
def discriminate_agent(agent)
|
45
|
-
return agent if agent.is_a?(Mechanize)
|
46
|
-
raise TypeError
|
47
|
-
end
|
48
|
-
|
49
|
-
def sleep_each
|
50
|
-
sleep rand(0.7..1.3)
|
51
|
-
yield
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|