RubyGems - dmm-crawler - Versions diffs - 0.3.5 → 0.4.0 - Mend

dmm-crawler 0.3.5 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

checksums.yaml +5 -5
data/CHANGELOG.md +4 -0
data/README.md +6 -0
data/doc/ja/README.md +5 -0
data/lib/dmm-crawler.rb +6 -2
data/lib/dmm-crawler/attributes/adult_game_attributes.rb +56 -0
data/lib/dmm-crawler/attributes/base_attributes.rb +58 -0
data/lib/dmm-crawler/attributes/dojin_attributes.rb +60 -0
data/lib/dmm-crawler/client.rb +7 -3
data/lib/dmm-crawler/ranking/adult_game_ranking.rb +53 -0
data/lib/dmm-crawler/ranking/base_ranking.rb +25 -0
data/lib/dmm-crawler/ranking/dojin_ranking.rb +39 -0
data/lib/dmm-crawler/version.rb +1 -1
data/spec/dmm-crawler/ranking/adult_game_ranking_spec.rb +33 -0
data/spec/dmm-crawler/{ranking_spec.rb → ranking/dojin_ranking_spec.rb} +4 -3
data/spec/spec_helper.rb +2 -0
metadata +11 -6
data/lib/dmm-crawler/attributes.rb +0 -103
data/lib/dmm-crawler/ranking.rb +0 -54

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
-SHA256:
-  metadata.gz: b9e097c2a504bd209610b13b88938419591d646bc58dc666e7780c19f2d252e4
-  data.tar.gz: 8c3be56018b75796857bfedf8dbbb7a130bf166013a2d813a5b98a9b03b19942
+SHA1:
+  metadata.gz: daae94752568d25d37a9e8f43e791ec58d649f12
+  data.tar.gz: 27326b524fbe3f3c55f75b87228f047963eb5566
 SHA512:
-  metadata.gz: 9c67e16b9a629013b113d2e838b8f8793c25fc07ee86feb41738a4243e7ffb46f1931a0aea3065f48707e78bc6135665c0f2d441b7f75424dc9d1db9346de8ea
-  data.tar.gz: ea408fcf68c7c12ced8c2426d5a248ee1acbefb671cc1c5391a56d22443571531ff8a4e6197d06061d7077a9049965dddcfd81473becc7e476e66dcc2d0b567c
+  metadata.gz: a0fbbb9d7ef6453ec7515137939bcceff13f2f26398a2aa51a80f6c0db6c5acc256068ccbffaa95aabf34972f185bce4423f9bc735da4d8f32380a9084d43950
+  data.tar.gz: 59901768a8928a88bf69df43a084aff0d9cd2689435be47514940c1e45c54ef7f6b642511a8f5bc18a6366d42ef1aae30a57f1de3977ff26ba67885a57f50a4a

data/CHANGELOG.md CHANGED

@@ -1,5 +1,9 @@
 # Change logs
+## 0.4.0
+- Drop support of fetching the art's information.
+- Support adult game's rankings.
 ## 0.3.5
 - Drop support of fetching art's price.

data/README.md CHANGED

@@ -2,6 +2,12 @@
 # DMM Crawler
+## :warning: Cation :warning:
+FANZA does not accepted crawling pages so I recommend to not use this gem.
+I do not take any responsibility or liability for any damage or loss caused by mine gem.
 ## What is DMM Crawler
 Show DMM and DMM.R18's crawled data. Now, All rankings for doujin is crawlable.

data/doc/ja/README.md CHANGED

@@ -2,6 +2,11 @@
 # DMM Crawler
+## :warning: 注意 :warning:
+FANZA(旧DMM.R18)はクロールを禁止しているので、使用しないことをおすすめします。
+dmm-crawlerを利用するにあたって不利益や損害が生じたとしても一切の責任を負わないものとします。
 ## DMM Crawlerとは
 DMM.R18のクロールしたデータを取得するgemです。現在、**同人**のランキングにのみ対応しております。

data/lib/dmm-crawler.rb CHANGED

@@ -6,7 +6,11 @@ module DMMCrawler
 end
 require 'dmm-crawler/agent'
-require 'dmm-crawler/attributes'
-require 'dmm-crawler/ranking'
 require 'dmm-crawler/client'
+require 'dmm-crawler/attributes/base_attributes'
+require 'dmm-crawler/attributes/dojin_attributes.rb'
+require 'dmm-crawler/attributes/adult_game_attributes.rb'
+require 'dmm-crawler/ranking/base_ranking'
+require 'dmm-crawler/ranking/dojin_ranking.rb'
+require 'dmm-crawler/ranking/adult_game_ranking.rb'
 require 'dmm-crawler/version'

data/lib/dmm-crawler/attributes/adult_game_attributes.rb ADDED

@@ -0,0 +1,56 @@
+module DMMCrawler
+  module Attributes
+    class AdultGameAttributes < BaseAttributes
+      def to_a
+        [
+          title,
+          title_link,
+          main_image_url,
+          sample_image_urls,
+          submedia,
+          brand,
+          affiliateable?,
+          tags
+        ]
+      end
+      private
+      def title
+        @page.search('.page-detail h1').first.children.last.text.strip.gsub(/【.*】/, '')
+      end
+      def title_link
+        @page.uri.to_s
+      end
+      def main_image_url
+        @page.search('.area-package-image').search('.package-image-box a').first.attributes['href'].value
+      end
+      def sample_image_urls
+        @page.search('#item-rotationbnr li span img').take(3).map { |img| img&.attributes&.send(:[], 'src')&.value }.compact
+      end
+      def submedia
+        'adult_game'
+      end
+      def brand
+        @page.search('.head-detail table tr td').take(8).last.text.strip
+      end
+      def tags
+        item['iteminfo']['genre'].map { |h| h['name'] }
+      end
+      def content_id
+        @page.uri.to_s.match(/views_\d*/)
+      end
+      def item
+        @item ||= @r_client.list_items(site: 'DMM.R18', content_id: content_id).body['result']['items'][0]
+      end
+    end
+  end
+end

data/lib/dmm-crawler/attributes/base_attributes.rb ADDED

@@ -0,0 +1,58 @@
+module DMMCrawler
+  module Attributes
+    class BaseAttributes
+      HTTP_STATUS_CODE_OF_SUCCESS = 200
+      def initialize(url, agent: Agent.instance.agent)
+        @page = agent.get(url)
+        @r_client = Rdmm::Client.new(affiliate_id: ENV['DMM_AFFILIATE_ID'], api_id: ENV['DMM_API_ID'])
+      end
+      def to_a
+        raise NotImplementedError
+      end
+      private
+      def affiliateable?
+        @r_client.list_items(site: 'DMM.R18', keyword: title).body['result']['status'] == HTTP_STATUS_CODE_OF_SUCCESS
+      end
+      def art_page?
+        @page.uri.to_s =~ /doujin/
+      end
+      def adult_game?
+        @page.uri.to_s =~ /dlsoft/
+      end
+      def title
+        raise NotImplementedError
+      end
+      def title_link
+        raise NotImplementedError
+      end
+      def image_url
+        raise NotImplementedError
+      end
+      def submedia
+        raise NotImplementedError
+      end
+      def author
+        raise NotImplementedError
+      end
+      def brand
+        raise NotImplementedError
+      end
+      def tags
+        raise NotImplementedError
+      end
+    end
+  end
+end

data/lib/dmm-crawler/attributes/dojin_attributes.rb ADDED

@@ -0,0 +1,60 @@
+module DMMCrawler
+  module Attributes
+    class DojinAttributes < BaseAttributes
+      def to_a
+        [
+          title,
+          title_link,
+          image_url,
+          submedia,
+          author,
+          affiliateable?,
+          tags
+        ]
+      end
+      private
+      def title
+        @page.search('.productTitle__txt span').remove
+        @page.search('.productTitle__txt').text.strip
+      end
+      def title_link
+        @page.uri.to_s
+      end
+      def image_url
+        attrs = @page.search('.productPreview__item img').first.attributes
+        if attrs['data-src']
+          attrs['data-src'].value
+        else
+          attrs['src'].value
+        end
+      end
+      def submedia
+        @page
+          .search('.productAttribute-listItem .c_icon_productGenre')
+          .first
+          .attributes['class']
+          .value
+          .gsub('c_icon_productGenre ', '')
+          .delete('-')
+      end
+      def author
+        @page.search('div.circleName__item').text.strip
+      end
+      def brand
+        @page.search('.head-detail table tr td').take(8).last
+      end
+      def tags
+        @page.search('.genreTagList .genreTagList__item a').map { |e| e.text.strip }
+      end
+    end
+  end
+end

data/lib/dmm-crawler/client.rb CHANGED

@@ -9,15 +9,19 @@ module DMMCrawler
     end
     def rankings(arguments)
-      Ranking.new(arguments.merge!(agent: @agent)).arts
+      Ranking::DojinRanking.new(arguments.merge!(agent: @agent)).arts
+    end
+    def adult_game_rankings(arguments)
+      Ranking::AdultGameRanking.new(arguments.merge!(agent: @agent)).arts
     end
     def get_attributes(url)
-      Attributes.new(url, agent: @agent).to_a
+      Attributes::DojinAttributes.new(url, agent: @agent).to_a
     end
     def affiliateable?(url)
-      Attributes.new(url, agent: @agent).affiliateable?
+      Attributes::DojinAttributes.new(url, agent: @agent).affiliateable?
     end
   end
 end

data/lib/dmm-crawler/ranking/adult_game_ranking.rb ADDED

@@ -0,0 +1,53 @@
+module DMMCrawler
+  module Ranking
+    class AdultGameRanking < BaseRanking
+      include Attributes
+      FETCHING_LIMITATION = 20
+      DLSOFT_URL = "http://dlsoft.dmm.co.jp/"
+      def initialize(agent: Agent.instance.agent, term: nil)
+        @agent = discriminate_agent(agent)
+        @term = term
+        @url = URI.join(DLSOFT_URL, File.join('ranking', parameterized_term))
+      end
+      def arts
+        games = page.search('.rankingList-content .rankingList-item.fn-rankListItem').take(FETCHING_LIMITATION)
+        arts = games.map do |game|
+          sleep_each do
+            url = game.search('.rankingList-link').first.attributes['href'].value
+            AdultGameAttributes.new(url, agent: @agent).to_a
+          end
+        end
+        arts.map.with_index(1) do |(title, title_link, main_image_url, sample_image_urls, submedia, author, affiliateable, tags), rank|
+          {
+            title: title,
+            title_link: title_link,
+            main_image_url: main_image_url,
+            sample_image_urls: sample_image_urls,
+            submedia: submedia,
+            author: author,
+            rank: rank,
+            affiliateable: affiliateable,
+            tags: tags
+          }
+        end
+      end
+      private
+      def parameterized_term
+        case @term
+        when 'weekly'
+          'term=weekly'
+        when 'monthly'
+          nil
+        when 'yearly'
+          "term=first/year=#{Time.now.year}/"
+        end
+      end
+    end
+  end
+end

data/lib/dmm-crawler/ranking/base_ranking.rb ADDED

@@ -0,0 +1,25 @@
+module DMMCrawler
+  module Ranking
+    class BaseRanking
+      def arts
+        raise NotImplementedError
+      end
+      private
+      def page
+        @agent.get(@url)
+      end
+      def sleep_each
+        sleep rand(0.7..1.3)
+        yield
+      end
+      def discriminate_agent(agent)
+        return agent if agent.is_a?(Mechanize)
+        raise TypeError
+      end
+    end
+  end
+end

data/lib/dmm-crawler/ranking/dojin_ranking.rb ADDED

@@ -0,0 +1,39 @@
+module DMMCrawler
+  module Ranking
+    class DojinRanking < BaseRanking
+      include Attributes
+      FETCHING_LIMITATION = 10
+      def initialize(agent:, submedia: nil, term: nil)
+        @agent = discriminate_agent(agent)
+        @submedia = submedia
+        @term = submedia
+        @url = File.join(BASE_URL, "/dc/doujin/-/ranking-all/=/sort=popular/submedia=#{@submedia}/term=#{@term}")
+      end
+      def arts
+        arts = page.search('.rank-rankListItem.fn-setPurchaseChange').take(FETCHING_LIMITATION).map do |element|
+          sleep_each do
+            url = File.join(BASE_URL, element.search('.rank-name a').first.attributes['href'].value)
+            DojinAttributes.new(url, agent: @agent).to_a
+          end
+        end
+        arts.map.with_index(1) do |(title, title_link, image_url, submedia, author, price, affiliateable, tags), rank|
+          {
+            title: title,
+            title_link: title_link,
+            image_url: image_url,
+            submedia: submedia,
+            author: author,
+            rank: rank,
+            price: price,
+            affiliateable: affiliateable,
+            tags: tags
+          }
+        end
+      end
+    end
+  end
+end

data/lib/dmm-crawler/version.rb CHANGED

@@ -1,3 +1,3 @@
 module DMMCrawler
-  VERSION = '0.3.5'.freeze
+  VERSION = '0.4.0'.freeze
 end

data/spec/dmm-crawler/ranking/adult_game_ranking_spec.rb ADDED

@@ -0,0 +1,33 @@
+describe DMMCrawler::Ranking::AdultGameRanking do
+  let(:agent) { DMMCrawler::Agent.instance.agent }
+  let(:arguments) { { agent: agent, term: term } }
+  describe '#arts' do
+    subject { attachments }
+    after { sleep 2 }
+    context 'with length' do
+      let(:term) { 'weekly' }
+      let(:attachments) { described_class.new(arguments).arts.length }
+      it { is_expected.to be 20 }
+    end
+    context 'with weekly argument' do
+      let(:term) { 'weekly' }
+      let(:attachments) { described_class.new(arguments).arts }
+      it { is_expected.to all(include(:title, :title_link, :main_image_url, :sample_image_urls, :submedia, :author, :rank, :affiliateable, :tags)) }
+      it { is_expected.to all(satisfy { |art| art.all? { |_k, v| v != '' && v != nil} }) }
+    end
+    context 'with not registered argument' do
+      let(:agent) { nil }
+      let(:term) { 'weekly' }
+      let(:attachments) { -> { described_class.new(arguments).arts } }
+      it { is_expected.to raise_error(TypeError) }
+    end
+  end
+end

data/spec/dmm-crawler/{ranking_spec.rb → ranking/dojin_ranking_spec.rb} RENAMED

@@ -1,4 +1,4 @@
-describe DMMCrawler::Ranking do
+describe DMMCrawler::Ranking::DojinRanking do
   let(:agent) { DMMCrawler::Agent.instance.agent }
   let(:submedia) { 'cg' }
   let(:arguments) { { submedia: submedia, term: term, agent: agent } }
@@ -19,13 +19,14 @@ describe DMMCrawler::Ranking do
       let(:attachments) { described_class.new(arguments).arts }
       let(:term) { '24' }
-      it { is_expected.to all(include(:title, :title_link, :image_url, :submedia, :author, :informations, :rank, :affiliateable, :tags)) }
+      it { is_expected.to all(include(:title, :title_link, :image_url, :submedia, :author, :rank, :affiliateable, :tags)) }
       it { is_expected.to all(satisfy { |art| art.all? { |_k, v| v != '' } }) }
     end
     context 'with not registered argument' do
       let(:attachments) { -> { described_class.new(arguments).arts } }
-      let(:term) { nil }
+      let(:term) { '24' }
+      let(:agent) { nil }
       it { is_expected.to raise_error(TypeError) }
     end

data/spec/spec_helper.rb CHANGED

@@ -3,6 +3,8 @@ require 'pry'
 RSpec.configure do |config|
   config.order = 'random'
+  config.filter_run :focus
+  config.run_all_when_everything_filtered = true
   config.expect_with :rspec do |rspec|
     rspec.syntax = :expect
   end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: dmm-crawler
 version: !ruby/object:Gem::Version
-  version: 0.3.5
+  version: 0.4.0
 platform: ruby
 authors:
 - Satoshi Ohmori
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2018-08-13 00:00:00.000000000 Z
+date: 2018-12-16 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: rdmm
@@ -128,11 +128,16 @@ files:
 - doc/ja/README.md
 - lib/dmm-crawler.rb
 - lib/dmm-crawler/agent.rb
-- lib/dmm-crawler/attributes.rb
+- lib/dmm-crawler/attributes/adult_game_attributes.rb
+- lib/dmm-crawler/attributes/base_attributes.rb
+- lib/dmm-crawler/attributes/dojin_attributes.rb
 - lib/dmm-crawler/client.rb
-- lib/dmm-crawler/ranking.rb
+- lib/dmm-crawler/ranking/adult_game_ranking.rb
+- lib/dmm-crawler/ranking/base_ranking.rb
+- lib/dmm-crawler/ranking/dojin_ranking.rb
 - lib/dmm-crawler/version.rb
-- spec/dmm-crawler/ranking_spec.rb
+- spec/dmm-crawler/ranking/adult_game_ranking_spec.rb
+- spec/dmm-crawler/ranking/dojin_ranking_spec.rb
 - spec/spec_helper.rb
 homepage: https://github.com/sachin21/dmm-crawler
 licenses:
@@ -154,7 +159,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
       version: '0'
 requirements: []
 rubyforge_project:
-rubygems_version: 2.7.6
+rubygems_version: 2.6.14.3
 signing_key:
 specification_version: 4
 summary: Show DMM and DMM.R18's crawled data

data/lib/dmm-crawler/attributes.rb DELETED

@@ -1,103 +0,0 @@
-module DMMCrawler
-  class Attributes
-    HTTP_STATUS_CODE_OF_SUCCESS = 200
-    def initialize(url, agent: Agent.instance.agent)
-      @page = agent.get(url)
-      @r_client = Rdmm::Client.new(affiliate_id: ENV['DMM_AFFILIATE_ID'], api_id: ENV['DMM_API_ID'])
-    end
-    def to_a
-      [
-        title,
-        title_link,
-        image_url,
-        submedia,
-        author,
-        informations,
-        affiliateable?,
-        tags
-      ]
-    end
-    def affiliateable?
-      @r_client.list_items(site: 'DMM.R18', keyword: title).body['result']['status'] == HTTP_STATUS_CODE_OF_SUCCESS
-    end
-    private
-    def title
-      if art_page?
-        @page.search('.productTitle__txt span').remove
-        @page.search('.productTitle__txt').text.strip
-      else
-        @page.search('.rank-name').first.text.strip
-      end
-    end
-    def title_link
-      if art_page?
-        @page.uri.to_s
-      else
-        File.join(BASE_URL, @page.search('.rank-name').first.search('a').first.attributes.first[1].value)
-      end
-    end
-    def image_url
-      attrs = @page.search('.productPreview__item img').first.attributes
-      if attrs['data-src']
-        attrs['data-src'].value
-      else
-        attrs['src'].value
-      end
-    end
-    def submedia
-      @page
-        .search('.productAttribute-listItem .c_icon_productGenre')
-        .first
-        .attributes['class']
-        .value
-        .gsub('c_icon_productGenre ', '')
-        .delete('-')
-    end
-    def author
-      @page.search('div.circleName__item').text.strip
-    end
-    def informations
-      keys = extract_text(@page.search('.m-productInformation .productInformation__item .informationList__ttl'))
-      values = extract_text(@page.search('.m-productInformation .productInformation__item .informationList__txt'))
-      information = keys.zip(values)
-      series = information.find { |array| array.first == 'シリーズ' }
-      if series
-        information = information.reject { |array| array.first == 'シリーズ' }
-        information.push(series)
-      end
-      information.map { |key, value| { key: key, value: value } }
-    end
-    def tags
-      if art_page?
-        @page.search('.genreTagList .genreTagList__item a').map { |e| e.text.strip }
-      else
-        @page.search('.rank-labelListItem').map { |e| e.search('a').text.strip }
-      end
-    end
-    def extract_text(elements)
-      elements
-        .reject { |element| element.text.strip == 'ジャンル' }
-        .map { |element| element.children.text.strip }
-    end
-    def art_page?
-      @page.search('.rank-name').empty?
-    end
-  end
-end

data/lib/dmm-crawler/ranking.rb DELETED

@@ -1,54 +0,0 @@
-module DMMCrawler
-  class Ranking
-    def initialize(arguments)
-      @agent = discriminate_agent(arguments[:agent])
-      @term = discriminate_term(arguments[:term])
-      @submedia = arguments[:submedia]
-      @url = File.join(BASE_URL, "/dc/doujin/-/ranking-all/=/sort=popular/submedia=#{@submedia}/term=#{@term}")
-    end
-    def arts
-      arts = page.search('.rank-rankListItem.fn-setPurchaseChange').take(10).map do |element|
-        sleep_each do
-          url = File.join(BASE_URL, element.search('.rank-name a').first.attributes['href'].value)
-          Attributes.new(url, agent: @agent).to_a
-        end
-      end
-      arts.map.with_index(1) do |(title, title_link, image_url, submedia, author, informations, affiliateable, tags), rank|
-        {
-          title: title,
-          title_link: title_link,
-          image_url: image_url,
-          submedia: submedia,
-          author: author,
-          informations: informations,
-          rank: rank,
-          affiliateable: affiliateable,
-          tags: tags
-        }
-      end
-    end
-    private
-    def page
-      @agent.get(@url)
-    end
-    def discriminate_term(term)
-      return term if %w[24 weekly monthly total].include?(term)
-      raise TypeError
-    end
-    def discriminate_agent(agent)
-      return agent if agent.is_a?(Mechanize)
-      raise TypeError
-    end
-    def sleep_each
-      sleep rand(0.7..1.3)
-      yield
-    end
-  end
-end