dmm-crawler 0.1.5 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b6d65185ef93c93354c8eb9967b6b60ca64650b2
4
- data.tar.gz: e9043312de49e3a70f54bf18d6c5fc64b1772a07
3
+ metadata.gz: bde3c371a800c5ea7438e38d62c21792771899c3
4
+ data.tar.gz: f446060552d5900de241ce4785201e9c612b72d1
5
5
  SHA512:
6
- metadata.gz: 38e501d44bbef30dbfe1efdadf5e696bcfd9d4cc7580c03bf7b9feff1eabef385b5a8751d45219ddba6da1c659f08120f203dc3cc0359d242696ee452fd5582f
7
- data.tar.gz: 48720a7e5b861dfc505da33779f8f1c43cb1b0b31b2ff385a8ed92a3fe696879bf98191ed60a3aae89eb8329782d963bb0f8f34c47768f2f237233530d9e2e10
6
+ metadata.gz: ea920b8c0258998de73cb1cb2973fe09c9f4272424751dee6c1a5ba27f0f327f379a8d13449f761ea5fdfe1d7e7d6e1e9e312a0a5d170d8a241907d27ea5e205
7
+ data.tar.gz: c425232a8d22c6f1a7b6e476211fb7c4b483952f04f9ddeeb30fe6eae1a2b73688c25bf7659e06f8601e11a4ded8ca700a35442a6adc2ec736badd27c9ee374b
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- dmm-crawler (0.1.5)
4
+ dmm-crawler (0.2.0)
5
5
  mechanize
6
6
 
7
7
  GEM
@@ -10,7 +10,7 @@ GEM
10
10
  ast (2.3.0)
11
11
  coderay (1.1.1)
12
12
  diff-lcs (1.3)
13
- domain_name (0.5.20170223)
13
+ domain_name (0.5.20170404)
14
14
  unf (>= 0.0.5, < 1.0.0)
15
15
  http-cookie (1.0.3)
16
16
  domain_name (~> 0.5)
@@ -27,11 +27,11 @@ GEM
27
27
  mime-types (3.1)
28
28
  mime-types-data (~> 3.2015)
29
29
  mime-types-data (3.2016.0521)
30
- mini_portile2 (2.1.0)
30
+ mini_portile2 (2.2.0)
31
31
  net-http-digest_auth (1.4.1)
32
32
  net-http-persistent (2.9.4)
33
- nokogiri (1.7.0.1)
34
- mini_portile2 (~> 2.1.0)
33
+ nokogiri (1.8.0)
34
+ mini_portile2 (~> 2.2.0)
35
35
  ntlm-http (0.1.1)
36
36
  parser (2.4.0.0)
37
37
  ast (~> 2.2)
@@ -65,7 +65,7 @@ GEM
65
65
  slop (3.6.0)
66
66
  unf (0.1.4)
67
67
  unf_ext
68
- unf_ext (0.0.7.2)
68
+ unf_ext (0.0.7.4)
69
69
  unicode-display_width (1.1.3)
70
70
  webrobots (0.1.2)
71
71
 
@@ -81,4 +81,4 @@ DEPENDENCIES
81
81
  rubocop (~> 0.47)
82
82
 
83
83
  BUNDLED WITH
84
- 1.15.1
84
+ 1.16.0.pre.2
data/README.md CHANGED
@@ -15,6 +15,7 @@ gem 'dmm-crawler'
15
15
  ```
16
16
 
17
17
  ## Usage
18
+
18
19
  From the bot in invited Slack's room.
19
20
 
20
21
  ```ruby
@@ -22,14 +23,19 @@ require 'dmm-crawler'
22
23
 
23
24
  include DMMCrawler
24
25
 
25
- Ranking.new(term: '24', submedia: 'cg').arts
26
+ client = Client.new do |agent|
27
+ agent.ignore_bad_chunking = false
28
+ end
26
29
 
30
+ client.rankings(term: '24', submedia: 'cg')
27
31
  # =>
28
32
  # {
29
33
  # title: "title",
30
- # url: 'URL for title',
31
- # image_url: 'Link to title's main image',
32
- # tags: ['tag1', 'tag2']
34
+ # title_link: "title url",
35
+ # image_url: "Link to title"s main image",
36
+ # submedia: "cg",
37
+ # informations: [{key: 'key', value: 'value'}],
38
+ # tags: ["tag1", "tag2"]
33
39
  # }
34
40
  ```
35
41
 
@@ -3,6 +3,7 @@
3
3
  # DMM Crawler
4
4
 
5
5
  ## DMM Crawlerとは
6
+
6
7
  DMM.R18のクロールしたデータを取得するgemです。現在、**同人**のランキングにのみ対応しております。
7
8
 
8
9
  ## インストール
@@ -14,6 +15,7 @@ gem 'dmm-crawler'
14
15
  ```
15
16
 
16
17
  ## 使い方
18
+
17
19
  データを使いたい`.rb`ファイルで以下を実行したらクロールしたデータが取得出来ます。
18
20
 
19
21
  ```ruby
@@ -21,14 +23,19 @@ require 'dmm-crawler'
21
23
 
22
24
  include DMMCrawler
23
25
 
24
- Ranking.new(term: '24', submedia: 'cg').arts
26
+ client = Client.new do |agent|
27
+ agent.ignore_bad_chunking = false
28
+ end
25
29
 
30
+ client.rankings(term: '24', submedia: 'cg')
26
31
  # =>
27
32
  # {
28
33
  # title: "タイトル",
29
- # url: '作品のURL',
30
- # image_url: '作品のメイン画像へのURL',
31
- # tags: ['タグ1', 'タグ2']
34
+ # title_link: "タイトルURL",
35
+ # image_url: "画像URL",
36
+ # submedia: "cg",
37
+ # informations: [{key: 'key', value: 'value'}],
38
+ # tags: ["タグ1", "タグ2"]
32
39
  # }
33
40
  ```
34
41
 
@@ -7,4 +7,5 @@ end
7
7
  require 'dmm-crawler/agent'
8
8
  require 'dmm-crawler/attributes'
9
9
  require 'dmm-crawler/ranking'
10
+ require 'dmm-crawler/client'
10
11
  require 'dmm-crawler/version'
@@ -1,10 +1,7 @@
1
1
  module DMMCrawler
2
2
  class Attributes
3
- def initialize(element, submedia = nil)
4
- @agent = Agent.instance.agent
5
- @element = element
6
-
7
- @submedia = submedia
3
+ def initialize(url)
4
+ @element = Agent.instance.agent.get(url)
8
5
  end
9
6
 
10
7
  def to_a
@@ -31,7 +28,13 @@ module DMMCrawler
31
28
 
32
29
  def image_url
33
30
  if art_page?
34
- @element.search('.productPreview__item img').last.attributes['data-src'].value
31
+ attrs = @element.search('.productPreview__item img').last.attributes
32
+
33
+ if attrs['data-src']
34
+ attrs['data-src'].value
35
+ else
36
+ attrs['src'].value
37
+ end
35
38
  else
36
39
  @element.search('img').last.attributes['src'].value
37
40
  end
@@ -46,8 +49,6 @@ module DMMCrawler
46
49
  end
47
50
 
48
51
  def submedia
49
- return @submedia if @submedia
50
-
51
52
  @element
52
53
  .search('.productAttribute-listItem .c_icon_productGenre')
53
54
  .first
@@ -0,0 +1,13 @@
1
+ module DMMCrawler
2
+ class Client
3
+ def initialize
4
+ @agent = Agent.instance.agent
5
+
6
+ yield @agent if block_given?
7
+ end
8
+
9
+ def rankings(arguments)
10
+ Ranking.new(arguments.merge!(agent: @agent)).arts
11
+ end
12
+ end
13
+ end
@@ -1,18 +1,17 @@
1
1
  module DMMCrawler
2
2
  class Ranking
3
3
  def initialize(arguments)
4
+ @agent = discriminate_agent(arguments[:agent])
4
5
  @term = discriminate_term(arguments[:term])
5
- @submedia = discriminate_submedia(arguments[:submedia])
6
+ @submedia = arguments[:submedia]
6
7
  @url = File.join(BASE_URL, "/dc/doujin/-/ranking-all/=/sort=popular/submedia=#{@submedia}/term=#{@term}")
7
- @agent = Agent.instance.agent
8
8
  end
9
9
 
10
10
  def arts
11
11
  arts = page.search('.rank-rankListItem.fn-setPurchaseChange').map do |element|
12
12
  sleep 1
13
13
  url = File.join(BASE_URL, element.search('.rank-name a').first.attributes['href'].value)
14
- page = @agent.get(url)
15
- Attributes.new(page, @submedia).to_a
14
+ Attributes.new(url).to_a
16
15
  end
17
16
 
18
17
  arts.map.with_index(1) do |(title, title_link, image_url, submedia, informations, tags), rank|
@@ -38,8 +37,8 @@ module DMMCrawler
38
37
  raise TypeError
39
38
  end
40
39
 
41
- def discriminate_submedia(submedia)
42
- return submedia if %w(all comic cg game voice).include?(submedia)
40
+ def discriminate_agent(agent)
41
+ return agent if agent.is_a?(Mechanize)
43
42
  raise TypeError
44
43
  end
45
44
  end
@@ -1,3 +1,3 @@
1
1
  module DMMCrawler
2
- VERSION = '0.1.5'.freeze
2
+ VERSION = '0.2.0'.freeze
3
3
  end
@@ -1,8 +1,9 @@
1
1
  describe DMMCrawler::Ranking do
2
2
  let(:attachments) { described_class.new(arguments).arts }
3
3
 
4
+ let(:agent) { DMMCrawler::Agent.instance.agent }
4
5
  let(:submedia) { 'cg' }
5
- let(:arguments) { { submedia: submedia, term: term } }
6
+ let(:arguments) { { submedia: submedia, term: term, agent: agent } }
6
7
 
7
8
  describe '#arts' do
8
9
  after { sleep 2 }
@@ -26,7 +27,7 @@ describe DMMCrawler::Ranking do
26
27
  context 'with not registered argument' do
27
28
  subject { -> { attachments } }
28
29
 
29
- let(:term) { 'hoge' }
30
+ let(:term) { nil }
30
31
 
31
32
  it { is_expected.to raise_error(TypeError) }
32
33
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dmm-crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Ohmori
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-06-24 00:00:00.000000000 Z
11
+ date: 2017-09-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mechanize
@@ -116,6 +116,7 @@ files:
116
116
  - lib/dmm-crawler.rb
117
117
  - lib/dmm-crawler/agent.rb
118
118
  - lib/dmm-crawler/attributes.rb
119
+ - lib/dmm-crawler/client.rb
119
120
  - lib/dmm-crawler/ranking.rb
120
121
  - lib/dmm-crawler/version.rb
121
122
  - spec/dmm-crawler/ranking_spec.rb
@@ -140,7 +141,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
140
141
  version: '0'
141
142
  requirements: []
142
143
  rubyforge_project:
143
- rubygems_version: 2.6.10
144
+ rubygems_version: 2.5.2
144
145
  signing_key:
145
146
  specification_version: 4
146
147
  summary: Show DMM and DMM.R18's crawled data