dmm-crawler 0.1.5 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b6d65185ef93c93354c8eb9967b6b60ca64650b2
4
- data.tar.gz: e9043312de49e3a70f54bf18d6c5fc64b1772a07
3
+ metadata.gz: bde3c371a800c5ea7438e38d62c21792771899c3
4
+ data.tar.gz: f446060552d5900de241ce4785201e9c612b72d1
5
5
  SHA512:
6
- metadata.gz: 38e501d44bbef30dbfe1efdadf5e696bcfd9d4cc7580c03bf7b9feff1eabef385b5a8751d45219ddba6da1c659f08120f203dc3cc0359d242696ee452fd5582f
7
- data.tar.gz: 48720a7e5b861dfc505da33779f8f1c43cb1b0b31b2ff385a8ed92a3fe696879bf98191ed60a3aae89eb8329782d963bb0f8f34c47768f2f237233530d9e2e10
6
+ metadata.gz: ea920b8c0258998de73cb1cb2973fe09c9f4272424751dee6c1a5ba27f0f327f379a8d13449f761ea5fdfe1d7e7d6e1e9e312a0a5d170d8a241907d27ea5e205
7
+ data.tar.gz: c425232a8d22c6f1a7b6e476211fb7c4b483952f04f9ddeeb30fe6eae1a2b73688c25bf7659e06f8601e11a4ded8ca700a35442a6adc2ec736badd27c9ee374b
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- dmm-crawler (0.1.5)
4
+ dmm-crawler (0.2.0)
5
5
  mechanize
6
6
 
7
7
  GEM
@@ -10,7 +10,7 @@ GEM
10
10
  ast (2.3.0)
11
11
  coderay (1.1.1)
12
12
  diff-lcs (1.3)
13
- domain_name (0.5.20170223)
13
+ domain_name (0.5.20170404)
14
14
  unf (>= 0.0.5, < 1.0.0)
15
15
  http-cookie (1.0.3)
16
16
  domain_name (~> 0.5)
@@ -27,11 +27,11 @@ GEM
27
27
  mime-types (3.1)
28
28
  mime-types-data (~> 3.2015)
29
29
  mime-types-data (3.2016.0521)
30
- mini_portile2 (2.1.0)
30
+ mini_portile2 (2.2.0)
31
31
  net-http-digest_auth (1.4.1)
32
32
  net-http-persistent (2.9.4)
33
- nokogiri (1.7.0.1)
34
- mini_portile2 (~> 2.1.0)
33
+ nokogiri (1.8.0)
34
+ mini_portile2 (~> 2.2.0)
35
35
  ntlm-http (0.1.1)
36
36
  parser (2.4.0.0)
37
37
  ast (~> 2.2)
@@ -65,7 +65,7 @@ GEM
65
65
  slop (3.6.0)
66
66
  unf (0.1.4)
67
67
  unf_ext
68
- unf_ext (0.0.7.2)
68
+ unf_ext (0.0.7.4)
69
69
  unicode-display_width (1.1.3)
70
70
  webrobots (0.1.2)
71
71
 
@@ -81,4 +81,4 @@ DEPENDENCIES
81
81
  rubocop (~> 0.47)
82
82
 
83
83
  BUNDLED WITH
84
- 1.15.1
84
+ 1.16.0.pre.2
data/README.md CHANGED
@@ -15,6 +15,7 @@ gem 'dmm-crawler'
15
15
  ```
16
16
 
17
17
  ## Usage
18
+
18
19
  From the bot in invited Slack's room.
19
20
 
20
21
  ```ruby
@@ -22,14 +23,19 @@ require 'dmm-crawler'
22
23
 
23
24
  include DMMCrawler
24
25
 
25
- Ranking.new(term: '24', submedia: 'cg').arts
26
+ client = Client.new do |agent|
27
+ agent.ignore_bad_chunking = false
28
+ end
26
29
 
30
+ client.rankings(term: '24', submedia: 'cg')
27
31
  # =>
28
32
  # {
29
33
  # title: "title",
30
- # url: 'URL for title',
31
- # image_url: 'Link to title's main image',
32
- # tags: ['tag1', 'tag2']
34
+ # title_link: "title url",
35
+ # image_url: "Link to title"s main image",
36
+ # submedia: "cg",
37
+ # informations: [{key: 'key', value: 'value'}],
38
+ # tags: ["tag1", "tag2"]
33
39
  # }
34
40
  ```
35
41
 
@@ -3,6 +3,7 @@
3
3
  # DMM Crawler
4
4
 
5
5
  ## DMM Crawlerとは
6
+
6
7
  DMM.R18のクロールしたデータを取得するgemです。現在、**同人**のランキングにのみ対応しております。
7
8
 
8
9
  ## インストール
@@ -14,6 +15,7 @@ gem 'dmm-crawler'
14
15
  ```
15
16
 
16
17
  ## 使い方
18
+
17
19
  データを使いたい`.rb`ファイルで以下を実行したらクロールしたデータが取得出来ます。
18
20
 
19
21
  ```ruby
@@ -21,14 +23,19 @@ require 'dmm-crawler'
21
23
 
22
24
  include DMMCrawler
23
25
 
24
- Ranking.new(term: '24', submedia: 'cg').arts
26
+ client = Client.new do |agent|
27
+ agent.ignore_bad_chunking = false
28
+ end
25
29
 
30
+ client.rankings(term: '24', submedia: 'cg')
26
31
  # =>
27
32
  # {
28
33
  # title: "タイトル",
29
- # url: '作品のURL',
30
- # image_url: '作品のメイン画像へのURL',
31
- # tags: ['タグ1', 'タグ2']
34
+ # title_link: "タイトルURL",
35
+ # image_url: "画像URL",
36
+ # submedia: "cg",
37
+ # informations: [{key: 'key', value: 'value'}],
38
+ # tags: ["タグ1", "タグ2"]
32
39
  # }
33
40
  ```
34
41
 
@@ -7,4 +7,5 @@ end
7
7
  require 'dmm-crawler/agent'
8
8
  require 'dmm-crawler/attributes'
9
9
  require 'dmm-crawler/ranking'
10
+ require 'dmm-crawler/client'
10
11
  require 'dmm-crawler/version'
@@ -1,10 +1,7 @@
1
1
  module DMMCrawler
2
2
  class Attributes
3
- def initialize(element, submedia = nil)
4
- @agent = Agent.instance.agent
5
- @element = element
6
-
7
- @submedia = submedia
3
+ def initialize(url)
4
+ @element = Agent.instance.agent.get(url)
8
5
  end
9
6
 
10
7
  def to_a
@@ -31,7 +28,13 @@ module DMMCrawler
31
28
 
32
29
  def image_url
33
30
  if art_page?
34
- @element.search('.productPreview__item img').last.attributes['data-src'].value
31
+ attrs = @element.search('.productPreview__item img').last.attributes
32
+
33
+ if attrs['data-src']
34
+ attrs['data-src'].value
35
+ else
36
+ attrs['src'].value
37
+ end
35
38
  else
36
39
  @element.search('img').last.attributes['src'].value
37
40
  end
@@ -46,8 +49,6 @@ module DMMCrawler
46
49
  end
47
50
 
48
51
  def submedia
49
- return @submedia if @submedia
50
-
51
52
  @element
52
53
  .search('.productAttribute-listItem .c_icon_productGenre')
53
54
  .first
@@ -0,0 +1,13 @@
1
+ module DMMCrawler
2
+ class Client
3
+ def initialize
4
+ @agent = Agent.instance.agent
5
+
6
+ yield @agent if block_given?
7
+ end
8
+
9
+ def rankings(arguments)
10
+ Ranking.new(arguments.merge!(agent: @agent)).arts
11
+ end
12
+ end
13
+ end
@@ -1,18 +1,17 @@
1
1
  module DMMCrawler
2
2
  class Ranking
3
3
  def initialize(arguments)
4
+ @agent = discriminate_agent(arguments[:agent])
4
5
  @term = discriminate_term(arguments[:term])
5
- @submedia = discriminate_submedia(arguments[:submedia])
6
+ @submedia = arguments[:submedia]
6
7
  @url = File.join(BASE_URL, "/dc/doujin/-/ranking-all/=/sort=popular/submedia=#{@submedia}/term=#{@term}")
7
- @agent = Agent.instance.agent
8
8
  end
9
9
 
10
10
  def arts
11
11
  arts = page.search('.rank-rankListItem.fn-setPurchaseChange').map do |element|
12
12
  sleep 1
13
13
  url = File.join(BASE_URL, element.search('.rank-name a').first.attributes['href'].value)
14
- page = @agent.get(url)
15
- Attributes.new(page, @submedia).to_a
14
+ Attributes.new(url).to_a
16
15
  end
17
16
 
18
17
  arts.map.with_index(1) do |(title, title_link, image_url, submedia, informations, tags), rank|
@@ -38,8 +37,8 @@ module DMMCrawler
38
37
  raise TypeError
39
38
  end
40
39
 
41
- def discriminate_submedia(submedia)
42
- return submedia if %w(all comic cg game voice).include?(submedia)
40
+ def discriminate_agent(agent)
41
+ return agent if agent.is_a?(Mechanize)
43
42
  raise TypeError
44
43
  end
45
44
  end
@@ -1,3 +1,3 @@
1
1
  module DMMCrawler
2
- VERSION = '0.1.5'.freeze
2
+ VERSION = '0.2.0'.freeze
3
3
  end
@@ -1,8 +1,9 @@
1
1
  describe DMMCrawler::Ranking do
2
2
  let(:attachments) { described_class.new(arguments).arts }
3
3
 
4
+ let(:agent) { DMMCrawler::Agent.instance.agent }
4
5
  let(:submedia) { 'cg' }
5
- let(:arguments) { { submedia: submedia, term: term } }
6
+ let(:arguments) { { submedia: submedia, term: term, agent: agent } }
6
7
 
7
8
  describe '#arts' do
8
9
  after { sleep 2 }
@@ -26,7 +27,7 @@ describe DMMCrawler::Ranking do
26
27
  context 'with not registered argument' do
27
28
  subject { -> { attachments } }
28
29
 
29
- let(:term) { 'hoge' }
30
+ let(:term) { nil }
30
31
 
31
32
  it { is_expected.to raise_error(TypeError) }
32
33
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dmm-crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.5
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Ohmori
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-06-24 00:00:00.000000000 Z
11
+ date: 2017-09-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mechanize
@@ -116,6 +116,7 @@ files:
116
116
  - lib/dmm-crawler.rb
117
117
  - lib/dmm-crawler/agent.rb
118
118
  - lib/dmm-crawler/attributes.rb
119
+ - lib/dmm-crawler/client.rb
119
120
  - lib/dmm-crawler/ranking.rb
120
121
  - lib/dmm-crawler/version.rb
121
122
  - spec/dmm-crawler/ranking_spec.rb
@@ -140,7 +141,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
140
141
  version: '0'
141
142
  requirements: []
142
143
  rubyforge_project:
143
- rubygems_version: 2.6.10
144
+ rubygems_version: 2.5.2
144
145
  signing_key:
145
146
  specification_version: 4
146
147
  summary: Show DMM and DMM.R18's crawled data