dmm-crawler 0.3.5 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA256:
3
- metadata.gz: b9e097c2a504bd209610b13b88938419591d646bc58dc666e7780c19f2d252e4
4
- data.tar.gz: 8c3be56018b75796857bfedf8dbbb7a130bf166013a2d813a5b98a9b03b19942
2
+ SHA1:
3
+ metadata.gz: daae94752568d25d37a9e8f43e791ec58d649f12
4
+ data.tar.gz: 27326b524fbe3f3c55f75b87228f047963eb5566
5
5
  SHA512:
6
- metadata.gz: 9c67e16b9a629013b113d2e838b8f8793c25fc07ee86feb41738a4243e7ffb46f1931a0aea3065f48707e78bc6135665c0f2d441b7f75424dc9d1db9346de8ea
7
- data.tar.gz: ea408fcf68c7c12ced8c2426d5a248ee1acbefb671cc1c5391a56d22443571531ff8a4e6197d06061d7077a9049965dddcfd81473becc7e476e66dcc2d0b567c
6
+ metadata.gz: a0fbbb9d7ef6453ec7515137939bcceff13f2f26398a2aa51a80f6c0db6c5acc256068ccbffaa95aabf34972f185bce4423f9bc735da4d8f32380a9084d43950
7
+ data.tar.gz: 59901768a8928a88bf69df43a084aff0d9cd2689435be47514940c1e45c54ef7f6b642511a8f5bc18a6366d42ef1aae30a57f1de3977ff26ba67885a57f50a4a
@@ -1,5 +1,9 @@
1
1
  # Change logs
2
2
 
3
+ ## 0.4.0
4
+ - Drop support of fetching the art's information.
5
+ - Support adult game's rankings.
6
+
3
7
  ## 0.3.5
4
8
  - Drop support of fetching art's price.
5
9
 
data/README.md CHANGED
@@ -2,6 +2,12 @@
2
2
 
3
3
  # DMM Crawler
4
4
 
5
+ ## :warning: Cation :warning:
6
+
7
+ FANZA does not accepted crawling pages so I recommend to not use this gem.
8
+
9
+ I do not take any responsibility or liability for any damage or loss caused by mine gem.
10
+
5
11
  ## What is DMM Crawler
6
12
 
7
13
  Show DMM and DMM.R18's crawled data. Now, All rankings for doujin is crawlable.
@@ -2,6 +2,11 @@
2
2
 
3
3
  # DMM Crawler
4
4
 
5
+ ## :warning: 注意 :warning:
6
+
7
+ FANZA(旧DMM.R18)はクロールを禁止しているので、使用しないことをおすすめします。
8
+ dmm-crawlerを利用するにあたって不利益や損害が生じたとしても一切の責任を負わないものとします。
9
+
5
10
  ## DMM Crawlerとは
6
11
 
7
12
  DMM.R18のクロールしたデータを取得するgemです。現在、**同人**のランキングにのみ対応しております。
@@ -6,7 +6,11 @@ module DMMCrawler
6
6
  end
7
7
 
8
8
  require 'dmm-crawler/agent'
9
- require 'dmm-crawler/attributes'
10
- require 'dmm-crawler/ranking'
11
9
  require 'dmm-crawler/client'
10
+ require 'dmm-crawler/attributes/base_attributes'
11
+ require 'dmm-crawler/attributes/dojin_attributes.rb'
12
+ require 'dmm-crawler/attributes/adult_game_attributes.rb'
13
+ require 'dmm-crawler/ranking/base_ranking'
14
+ require 'dmm-crawler/ranking/dojin_ranking.rb'
15
+ require 'dmm-crawler/ranking/adult_game_ranking.rb'
12
16
  require 'dmm-crawler/version'
@@ -0,0 +1,56 @@
1
+ module DMMCrawler
2
+ module Attributes
3
+ class AdultGameAttributes < BaseAttributes
4
+ def to_a
5
+ [
6
+ title,
7
+ title_link,
8
+ main_image_url,
9
+ sample_image_urls,
10
+ submedia,
11
+ brand,
12
+ affiliateable?,
13
+ tags
14
+ ]
15
+ end
16
+
17
+ private
18
+
19
+ def title
20
+ @page.search('.page-detail h1').first.children.last.text.strip.gsub(/【.*】/, '')
21
+ end
22
+
23
+ def title_link
24
+ @page.uri.to_s
25
+ end
26
+
27
+ def main_image_url
28
+ @page.search('.area-package-image').search('.package-image-box a').first.attributes['href'].value
29
+ end
30
+
31
+ def sample_image_urls
32
+ @page.search('#item-rotationbnr li span img').take(3).map { |img| img&.attributes&.send(:[], 'src')&.value }.compact
33
+ end
34
+
35
+ def submedia
36
+ 'adult_game'
37
+ end
38
+
39
+ def brand
40
+ @page.search('.head-detail table tr td').take(8).last.text.strip
41
+ end
42
+
43
+ def tags
44
+ item['iteminfo']['genre'].map { |h| h['name'] }
45
+ end
46
+
47
+ def content_id
48
+ @page.uri.to_s.match(/views_\d*/)
49
+ end
50
+
51
+ def item
52
+ @item ||= @r_client.list_items(site: 'DMM.R18', content_id: content_id).body['result']['items'][0]
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,58 @@
1
+ module DMMCrawler
2
+ module Attributes
3
+ class BaseAttributes
4
+ HTTP_STATUS_CODE_OF_SUCCESS = 200
5
+
6
+ def initialize(url, agent: Agent.instance.agent)
7
+ @page = agent.get(url)
8
+ @r_client = Rdmm::Client.new(affiliate_id: ENV['DMM_AFFILIATE_ID'], api_id: ENV['DMM_API_ID'])
9
+ end
10
+
11
+ def to_a
12
+ raise NotImplementedError
13
+ end
14
+
15
+ private
16
+
17
+ def affiliateable?
18
+ @r_client.list_items(site: 'DMM.R18', keyword: title).body['result']['status'] == HTTP_STATUS_CODE_OF_SUCCESS
19
+ end
20
+
21
+ def art_page?
22
+ @page.uri.to_s =~ /doujin/
23
+ end
24
+
25
+ def adult_game?
26
+ @page.uri.to_s =~ /dlsoft/
27
+ end
28
+
29
+ def title
30
+ raise NotImplementedError
31
+ end
32
+
33
+ def title_link
34
+ raise NotImplementedError
35
+ end
36
+
37
+ def image_url
38
+ raise NotImplementedError
39
+ end
40
+
41
+ def submedia
42
+ raise NotImplementedError
43
+ end
44
+
45
+ def author
46
+ raise NotImplementedError
47
+ end
48
+
49
+ def brand
50
+ raise NotImplementedError
51
+ end
52
+
53
+ def tags
54
+ raise NotImplementedError
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,60 @@
1
+ module DMMCrawler
2
+ module Attributes
3
+ class DojinAttributes < BaseAttributes
4
+ def to_a
5
+ [
6
+ title,
7
+ title_link,
8
+ image_url,
9
+ submedia,
10
+ author,
11
+ affiliateable?,
12
+ tags
13
+ ]
14
+ end
15
+
16
+ private
17
+
18
+ def title
19
+ @page.search('.productTitle__txt span').remove
20
+ @page.search('.productTitle__txt').text.strip
21
+ end
22
+
23
+ def title_link
24
+ @page.uri.to_s
25
+ end
26
+
27
+ def image_url
28
+ attrs = @page.search('.productPreview__item img').first.attributes
29
+
30
+ if attrs['data-src']
31
+ attrs['data-src'].value
32
+ else
33
+ attrs['src'].value
34
+ end
35
+ end
36
+
37
+ def submedia
38
+ @page
39
+ .search('.productAttribute-listItem .c_icon_productGenre')
40
+ .first
41
+ .attributes['class']
42
+ .value
43
+ .gsub('c_icon_productGenre ', '')
44
+ .delete('-')
45
+ end
46
+
47
+ def author
48
+ @page.search('div.circleName__item').text.strip
49
+ end
50
+
51
+ def brand
52
+ @page.search('.head-detail table tr td').take(8).last
53
+ end
54
+
55
+ def tags
56
+ @page.search('.genreTagList .genreTagList__item a').map { |e| e.text.strip }
57
+ end
58
+ end
59
+ end
60
+ end
@@ -9,15 +9,19 @@ module DMMCrawler
9
9
  end
10
10
 
11
11
  def rankings(arguments)
12
- Ranking.new(arguments.merge!(agent: @agent)).arts
12
+ Ranking::DojinRanking.new(arguments.merge!(agent: @agent)).arts
13
+ end
14
+
15
+ def adult_game_rankings(arguments)
16
+ Ranking::AdultGameRanking.new(arguments.merge!(agent: @agent)).arts
13
17
  end
14
18
 
15
19
  def get_attributes(url)
16
- Attributes.new(url, agent: @agent).to_a
20
+ Attributes::DojinAttributes.new(url, agent: @agent).to_a
17
21
  end
18
22
 
19
23
  def affiliateable?(url)
20
- Attributes.new(url, agent: @agent).affiliateable?
24
+ Attributes::DojinAttributes.new(url, agent: @agent).affiliateable?
21
25
  end
22
26
  end
23
27
  end
@@ -0,0 +1,53 @@
1
+ module DMMCrawler
2
+ module Ranking
3
+ class AdultGameRanking < BaseRanking
4
+ include Attributes
5
+
6
+ FETCHING_LIMITATION = 20
7
+ DLSOFT_URL = "http://dlsoft.dmm.co.jp/"
8
+
9
+ def initialize(agent: Agent.instance.agent, term: nil)
10
+ @agent = discriminate_agent(agent)
11
+ @term = term
12
+ @url = URI.join(DLSOFT_URL, File.join('ranking', parameterized_term))
13
+ end
14
+
15
+ def arts
16
+ games = page.search('.rankingList-content .rankingList-item.fn-rankListItem').take(FETCHING_LIMITATION)
17
+ arts = games.map do |game|
18
+ sleep_each do
19
+ url = game.search('.rankingList-link').first.attributes['href'].value
20
+ AdultGameAttributes.new(url, agent: @agent).to_a
21
+ end
22
+ end
23
+
24
+ arts.map.with_index(1) do |(title, title_link, main_image_url, sample_image_urls, submedia, author, affiliateable, tags), rank|
25
+ {
26
+ title: title,
27
+ title_link: title_link,
28
+ main_image_url: main_image_url,
29
+ sample_image_urls: sample_image_urls,
30
+ submedia: submedia,
31
+ author: author,
32
+ rank: rank,
33
+ affiliateable: affiliateable,
34
+ tags: tags
35
+ }
36
+ end
37
+ end
38
+
39
+ private
40
+
41
+ def parameterized_term
42
+ case @term
43
+ when 'weekly'
44
+ 'term=weekly'
45
+ when 'monthly'
46
+ nil
47
+ when 'yearly'
48
+ "term=first/year=#{Time.now.year}/"
49
+ end
50
+ end
51
+ end
52
+ end
53
+ end
@@ -0,0 +1,25 @@
1
+ module DMMCrawler
2
+ module Ranking
3
+ class BaseRanking
4
+ def arts
5
+ raise NotImplementedError
6
+ end
7
+
8
+ private
9
+
10
+ def page
11
+ @agent.get(@url)
12
+ end
13
+
14
+ def sleep_each
15
+ sleep rand(0.7..1.3)
16
+ yield
17
+ end
18
+
19
+ def discriminate_agent(agent)
20
+ return agent if agent.is_a?(Mechanize)
21
+ raise TypeError
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,39 @@
1
+ module DMMCrawler
2
+ module Ranking
3
+ class DojinRanking < BaseRanking
4
+ include Attributes
5
+
6
+ FETCHING_LIMITATION = 10
7
+
8
+ def initialize(agent:, submedia: nil, term: nil)
9
+ @agent = discriminate_agent(agent)
10
+ @submedia = submedia
11
+ @term = submedia
12
+ @url = File.join(BASE_URL, "/dc/doujin/-/ranking-all/=/sort=popular/submedia=#{@submedia}/term=#{@term}")
13
+ end
14
+
15
+ def arts
16
+ arts = page.search('.rank-rankListItem.fn-setPurchaseChange').take(FETCHING_LIMITATION).map do |element|
17
+ sleep_each do
18
+ url = File.join(BASE_URL, element.search('.rank-name a').first.attributes['href'].value)
19
+ DojinAttributes.new(url, agent: @agent).to_a
20
+ end
21
+ end
22
+
23
+ arts.map.with_index(1) do |(title, title_link, image_url, submedia, author, price, affiliateable, tags), rank|
24
+ {
25
+ title: title,
26
+ title_link: title_link,
27
+ image_url: image_url,
28
+ submedia: submedia,
29
+ author: author,
30
+ rank: rank,
31
+ price: price,
32
+ affiliateable: affiliateable,
33
+ tags: tags
34
+ }
35
+ end
36
+ end
37
+ end
38
+ end
39
+ end
@@ -1,3 +1,3 @@
1
1
  module DMMCrawler
2
- VERSION = '0.3.5'.freeze
2
+ VERSION = '0.4.0'.freeze
3
3
  end
@@ -0,0 +1,33 @@
1
+ describe DMMCrawler::Ranking::AdultGameRanking do
2
+ let(:agent) { DMMCrawler::Agent.instance.agent }
3
+ let(:arguments) { { agent: agent, term: term } }
4
+
5
+ describe '#arts' do
6
+ subject { attachments }
7
+
8
+ after { sleep 2 }
9
+
10
+ context 'with length' do
11
+ let(:term) { 'weekly' }
12
+ let(:attachments) { described_class.new(arguments).arts.length }
13
+
14
+ it { is_expected.to be 20 }
15
+ end
16
+
17
+ context 'with weekly argument' do
18
+ let(:term) { 'weekly' }
19
+ let(:attachments) { described_class.new(arguments).arts }
20
+
21
+ it { is_expected.to all(include(:title, :title_link, :main_image_url, :sample_image_urls, :submedia, :author, :rank, :affiliateable, :tags)) }
22
+ it { is_expected.to all(satisfy { |art| art.all? { |_k, v| v != '' && v != nil} }) }
23
+ end
24
+
25
+ context 'with not registered argument' do
26
+ let(:agent) { nil }
27
+ let(:term) { 'weekly' }
28
+ let(:attachments) { -> { described_class.new(arguments).arts } }
29
+
30
+ it { is_expected.to raise_error(TypeError) }
31
+ end
32
+ end
33
+ end
@@ -1,4 +1,4 @@
1
- describe DMMCrawler::Ranking do
1
+ describe DMMCrawler::Ranking::DojinRanking do
2
2
  let(:agent) { DMMCrawler::Agent.instance.agent }
3
3
  let(:submedia) { 'cg' }
4
4
  let(:arguments) { { submedia: submedia, term: term, agent: agent } }
@@ -19,13 +19,14 @@ describe DMMCrawler::Ranking do
19
19
  let(:attachments) { described_class.new(arguments).arts }
20
20
  let(:term) { '24' }
21
21
 
22
- it { is_expected.to all(include(:title, :title_link, :image_url, :submedia, :author, :informations, :rank, :affiliateable, :tags)) }
22
+ it { is_expected.to all(include(:title, :title_link, :image_url, :submedia, :author, :rank, :affiliateable, :tags)) }
23
23
  it { is_expected.to all(satisfy { |art| art.all? { |_k, v| v != '' } }) }
24
24
  end
25
25
 
26
26
  context 'with not registered argument' do
27
27
  let(:attachments) { -> { described_class.new(arguments).arts } }
28
- let(:term) { nil }
28
+ let(:term) { '24' }
29
+ let(:agent) { nil }
29
30
 
30
31
  it { is_expected.to raise_error(TypeError) }
31
32
  end
@@ -3,6 +3,8 @@ require 'pry'
3
3
 
4
4
  RSpec.configure do |config|
5
5
  config.order = 'random'
6
+ config.filter_run :focus
7
+ config.run_all_when_everything_filtered = true
6
8
  config.expect_with :rspec do |rspec|
7
9
  rspec.syntax = :expect
8
10
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dmm-crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.5
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Ohmori
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-08-13 00:00:00.000000000 Z
11
+ date: 2018-12-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rdmm
@@ -128,11 +128,16 @@ files:
128
128
  - doc/ja/README.md
129
129
  - lib/dmm-crawler.rb
130
130
  - lib/dmm-crawler/agent.rb
131
- - lib/dmm-crawler/attributes.rb
131
+ - lib/dmm-crawler/attributes/adult_game_attributes.rb
132
+ - lib/dmm-crawler/attributes/base_attributes.rb
133
+ - lib/dmm-crawler/attributes/dojin_attributes.rb
132
134
  - lib/dmm-crawler/client.rb
133
- - lib/dmm-crawler/ranking.rb
135
+ - lib/dmm-crawler/ranking/adult_game_ranking.rb
136
+ - lib/dmm-crawler/ranking/base_ranking.rb
137
+ - lib/dmm-crawler/ranking/dojin_ranking.rb
134
138
  - lib/dmm-crawler/version.rb
135
- - spec/dmm-crawler/ranking_spec.rb
139
+ - spec/dmm-crawler/ranking/adult_game_ranking_spec.rb
140
+ - spec/dmm-crawler/ranking/dojin_ranking_spec.rb
136
141
  - spec/spec_helper.rb
137
142
  homepage: https://github.com/sachin21/dmm-crawler
138
143
  licenses:
@@ -154,7 +159,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
154
159
  version: '0'
155
160
  requirements: []
156
161
  rubyforge_project:
157
- rubygems_version: 2.7.6
162
+ rubygems_version: 2.6.14.3
158
163
  signing_key:
159
164
  specification_version: 4
160
165
  summary: Show DMM and DMM.R18's crawled data
@@ -1,103 +0,0 @@
1
- module DMMCrawler
2
- class Attributes
3
- HTTP_STATUS_CODE_OF_SUCCESS = 200
4
-
5
- def initialize(url, agent: Agent.instance.agent)
6
- @page = agent.get(url)
7
- @r_client = Rdmm::Client.new(affiliate_id: ENV['DMM_AFFILIATE_ID'], api_id: ENV['DMM_API_ID'])
8
- end
9
-
10
- def to_a
11
- [
12
- title,
13
- title_link,
14
- image_url,
15
- submedia,
16
- author,
17
- informations,
18
- affiliateable?,
19
- tags
20
- ]
21
- end
22
-
23
- def affiliateable?
24
- @r_client.list_items(site: 'DMM.R18', keyword: title).body['result']['status'] == HTTP_STATUS_CODE_OF_SUCCESS
25
- end
26
-
27
- private
28
-
29
- def title
30
- if art_page?
31
- @page.search('.productTitle__txt span').remove
32
- @page.search('.productTitle__txt').text.strip
33
- else
34
- @page.search('.rank-name').first.text.strip
35
- end
36
- end
37
-
38
- def title_link
39
- if art_page?
40
- @page.uri.to_s
41
- else
42
- File.join(BASE_URL, @page.search('.rank-name').first.search('a').first.attributes.first[1].value)
43
- end
44
- end
45
-
46
- def image_url
47
- attrs = @page.search('.productPreview__item img').first.attributes
48
-
49
- if attrs['data-src']
50
- attrs['data-src'].value
51
- else
52
- attrs['src'].value
53
- end
54
- end
55
-
56
- def submedia
57
- @page
58
- .search('.productAttribute-listItem .c_icon_productGenre')
59
- .first
60
- .attributes['class']
61
- .value
62
- .gsub('c_icon_productGenre ', '')
63
- .delete('-')
64
- end
65
-
66
- def author
67
- @page.search('div.circleName__item').text.strip
68
- end
69
-
70
- def informations
71
- keys = extract_text(@page.search('.m-productInformation .productInformation__item .informationList__ttl'))
72
- values = extract_text(@page.search('.m-productInformation .productInformation__item .informationList__txt'))
73
-
74
- information = keys.zip(values)
75
- series = information.find { |array| array.first == 'シリーズ' }
76
-
77
- if series
78
- information = information.reject { |array| array.first == 'シリーズ' }
79
- information.push(series)
80
- end
81
-
82
- information.map { |key, value| { key: key, value: value } }
83
- end
84
-
85
- def tags
86
- if art_page?
87
- @page.search('.genreTagList .genreTagList__item a').map { |e| e.text.strip }
88
- else
89
- @page.search('.rank-labelListItem').map { |e| e.search('a').text.strip }
90
- end
91
- end
92
-
93
- def extract_text(elements)
94
- elements
95
- .reject { |element| element.text.strip == 'ジャンル' }
96
- .map { |element| element.children.text.strip }
97
- end
98
-
99
- def art_page?
100
- @page.search('.rank-name').empty?
101
- end
102
- end
103
- end
@@ -1,54 +0,0 @@
1
- module DMMCrawler
2
- class Ranking
3
- def initialize(arguments)
4
- @agent = discriminate_agent(arguments[:agent])
5
- @term = discriminate_term(arguments[:term])
6
- @submedia = arguments[:submedia]
7
- @url = File.join(BASE_URL, "/dc/doujin/-/ranking-all/=/sort=popular/submedia=#{@submedia}/term=#{@term}")
8
- end
9
-
10
- def arts
11
- arts = page.search('.rank-rankListItem.fn-setPurchaseChange').take(10).map do |element|
12
- sleep_each do
13
- url = File.join(BASE_URL, element.search('.rank-name a').first.attributes['href'].value)
14
- Attributes.new(url, agent: @agent).to_a
15
- end
16
- end
17
-
18
- arts.map.with_index(1) do |(title, title_link, image_url, submedia, author, informations, affiliateable, tags), rank|
19
- {
20
- title: title,
21
- title_link: title_link,
22
- image_url: image_url,
23
- submedia: submedia,
24
- author: author,
25
- informations: informations,
26
- rank: rank,
27
- affiliateable: affiliateable,
28
- tags: tags
29
- }
30
- end
31
- end
32
-
33
- private
34
-
35
- def page
36
- @agent.get(@url)
37
- end
38
-
39
- def discriminate_term(term)
40
- return term if %w[24 weekly monthly total].include?(term)
41
- raise TypeError
42
- end
43
-
44
- def discriminate_agent(agent)
45
- return agent if agent.is_a?(Mechanize)
46
- raise TypeError
47
- end
48
-
49
- def sleep_each
50
- sleep rand(0.7..1.3)
51
- yield
52
- end
53
- end
54
- end