dmm-crawler 0.1.5 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +7 -7
- data/README.md +10 -4
- data/doc/ja/README.md +11 -4
- data/lib/dmm-crawler.rb +1 -0
- data/lib/dmm-crawler/attributes.rb +9 -8
- data/lib/dmm-crawler/client.rb +13 -0
- data/lib/dmm-crawler/ranking.rb +5 -6
- data/lib/dmm-crawler/version.rb +1 -1
- data/spec/dmm-crawler/ranking_spec.rb +3 -2
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bde3c371a800c5ea7438e38d62c21792771899c3
|
4
|
+
data.tar.gz: f446060552d5900de241ce4785201e9c612b72d1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ea920b8c0258998de73cb1cb2973fe09c9f4272424751dee6c1a5ba27f0f327f379a8d13449f761ea5fdfe1d7e7d6e1e9e312a0a5d170d8a241907d27ea5e205
|
7
|
+
data.tar.gz: c425232a8d22c6f1a7b6e476211fb7c4b483952f04f9ddeeb30fe6eae1a2b73688c25bf7659e06f8601e11a4ded8ca700a35442a6adc2ec736badd27c9ee374b
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
dmm-crawler (0.
|
4
|
+
dmm-crawler (0.2.0)
|
5
5
|
mechanize
|
6
6
|
|
7
7
|
GEM
|
@@ -10,7 +10,7 @@ GEM
|
|
10
10
|
ast (2.3.0)
|
11
11
|
coderay (1.1.1)
|
12
12
|
diff-lcs (1.3)
|
13
|
-
domain_name (0.5.
|
13
|
+
domain_name (0.5.20170404)
|
14
14
|
unf (>= 0.0.5, < 1.0.0)
|
15
15
|
http-cookie (1.0.3)
|
16
16
|
domain_name (~> 0.5)
|
@@ -27,11 +27,11 @@ GEM
|
|
27
27
|
mime-types (3.1)
|
28
28
|
mime-types-data (~> 3.2015)
|
29
29
|
mime-types-data (3.2016.0521)
|
30
|
-
mini_portile2 (2.
|
30
|
+
mini_portile2 (2.2.0)
|
31
31
|
net-http-digest_auth (1.4.1)
|
32
32
|
net-http-persistent (2.9.4)
|
33
|
-
nokogiri (1.
|
34
|
-
mini_portile2 (~> 2.
|
33
|
+
nokogiri (1.8.0)
|
34
|
+
mini_portile2 (~> 2.2.0)
|
35
35
|
ntlm-http (0.1.1)
|
36
36
|
parser (2.4.0.0)
|
37
37
|
ast (~> 2.2)
|
@@ -65,7 +65,7 @@ GEM
|
|
65
65
|
slop (3.6.0)
|
66
66
|
unf (0.1.4)
|
67
67
|
unf_ext
|
68
|
-
unf_ext (0.0.7.
|
68
|
+
unf_ext (0.0.7.4)
|
69
69
|
unicode-display_width (1.1.3)
|
70
70
|
webrobots (0.1.2)
|
71
71
|
|
@@ -81,4 +81,4 @@ DEPENDENCIES
|
|
81
81
|
rubocop (~> 0.47)
|
82
82
|
|
83
83
|
BUNDLED WITH
|
84
|
-
1.
|
84
|
+
1.16.0.pre.2
|
data/README.md
CHANGED
@@ -15,6 +15,7 @@ gem 'dmm-crawler'
|
|
15
15
|
```
|
16
16
|
|
17
17
|
## Usage
|
18
|
+
|
18
19
|
From the bot in invited Slack's room.
|
19
20
|
|
20
21
|
```ruby
|
@@ -22,14 +23,19 @@ require 'dmm-crawler'
|
|
22
23
|
|
23
24
|
include DMMCrawler
|
24
25
|
|
25
|
-
|
26
|
+
client = Client.new do |agent|
|
27
|
+
agent.ignore_bad_chunking = false
|
28
|
+
end
|
26
29
|
|
30
|
+
client.rankings(term: '24', submedia: 'cg')
|
27
31
|
# =>
|
28
32
|
# {
|
29
33
|
# title: "title",
|
30
|
-
#
|
31
|
-
# image_url:
|
32
|
-
#
|
34
|
+
# title_link: "title url",
|
35
|
+
# image_url: "Link to title"s main image",
|
36
|
+
# submedia: "cg",
|
37
|
+
# informations: [{key: 'key', value: 'value'}],
|
38
|
+
# tags: ["tag1", "tag2"]
|
33
39
|
# }
|
34
40
|
```
|
35
41
|
|
data/doc/ja/README.md
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
# DMM Crawler
|
4
4
|
|
5
5
|
## DMM Crawlerとは
|
6
|
+
|
6
7
|
DMM.R18のクロールしたデータを取得するgemです。現在、**同人**のランキングにのみ対応しております。
|
7
8
|
|
8
9
|
## インストール
|
@@ -14,6 +15,7 @@ gem 'dmm-crawler'
|
|
14
15
|
```
|
15
16
|
|
16
17
|
## 使い方
|
18
|
+
|
17
19
|
データを使いたい`.rb`ファイルで以下を実行したらクロールしたデータが取得出来ます。
|
18
20
|
|
19
21
|
```ruby
|
@@ -21,14 +23,19 @@ require 'dmm-crawler'
|
|
21
23
|
|
22
24
|
include DMMCrawler
|
23
25
|
|
24
|
-
|
26
|
+
client = Client.new do |agent|
|
27
|
+
agent.ignore_bad_chunking = false
|
28
|
+
end
|
25
29
|
|
30
|
+
client.rankings(term: '24', submedia: 'cg')
|
26
31
|
# =>
|
27
32
|
# {
|
28
33
|
# title: "タイトル",
|
29
|
-
#
|
30
|
-
# image_url:
|
31
|
-
#
|
34
|
+
# title_link: "タイトルURL",
|
35
|
+
# image_url: "画像URL",
|
36
|
+
# submedia: "cg",
|
37
|
+
# informations: [{key: 'key', value: 'value'}],
|
38
|
+
# tags: ["タグ1", "タグ2"]
|
32
39
|
# }
|
33
40
|
```
|
34
41
|
|
data/lib/dmm-crawler.rb
CHANGED
@@ -1,10 +1,7 @@
|
|
1
1
|
module DMMCrawler
|
2
2
|
class Attributes
|
3
|
-
def initialize(
|
4
|
-
@
|
5
|
-
@element = element
|
6
|
-
|
7
|
-
@submedia = submedia
|
3
|
+
def initialize(url)
|
4
|
+
@element = Agent.instance.agent.get(url)
|
8
5
|
end
|
9
6
|
|
10
7
|
def to_a
|
@@ -31,7 +28,13 @@ module DMMCrawler
|
|
31
28
|
|
32
29
|
def image_url
|
33
30
|
if art_page?
|
34
|
-
@element.search('.productPreview__item img').last.attributes
|
31
|
+
attrs = @element.search('.productPreview__item img').last.attributes
|
32
|
+
|
33
|
+
if attrs['data-src']
|
34
|
+
attrs['data-src'].value
|
35
|
+
else
|
36
|
+
attrs['src'].value
|
37
|
+
end
|
35
38
|
else
|
36
39
|
@element.search('img').last.attributes['src'].value
|
37
40
|
end
|
@@ -46,8 +49,6 @@ module DMMCrawler
|
|
46
49
|
end
|
47
50
|
|
48
51
|
def submedia
|
49
|
-
return @submedia if @submedia
|
50
|
-
|
51
52
|
@element
|
52
53
|
.search('.productAttribute-listItem .c_icon_productGenre')
|
53
54
|
.first
|
data/lib/dmm-crawler/ranking.rb
CHANGED
@@ -1,18 +1,17 @@
|
|
1
1
|
module DMMCrawler
|
2
2
|
class Ranking
|
3
3
|
def initialize(arguments)
|
4
|
+
@agent = discriminate_agent(arguments[:agent])
|
4
5
|
@term = discriminate_term(arguments[:term])
|
5
|
-
@submedia =
|
6
|
+
@submedia = arguments[:submedia]
|
6
7
|
@url = File.join(BASE_URL, "/dc/doujin/-/ranking-all/=/sort=popular/submedia=#{@submedia}/term=#{@term}")
|
7
|
-
@agent = Agent.instance.agent
|
8
8
|
end
|
9
9
|
|
10
10
|
def arts
|
11
11
|
arts = page.search('.rank-rankListItem.fn-setPurchaseChange').map do |element|
|
12
12
|
sleep 1
|
13
13
|
url = File.join(BASE_URL, element.search('.rank-name a').first.attributes['href'].value)
|
14
|
-
|
15
|
-
Attributes.new(page, @submedia).to_a
|
14
|
+
Attributes.new(url).to_a
|
16
15
|
end
|
17
16
|
|
18
17
|
arts.map.with_index(1) do |(title, title_link, image_url, submedia, informations, tags), rank|
|
@@ -38,8 +37,8 @@ module DMMCrawler
|
|
38
37
|
raise TypeError
|
39
38
|
end
|
40
39
|
|
41
|
-
def
|
42
|
-
return
|
40
|
+
def discriminate_agent(agent)
|
41
|
+
return agent if agent.is_a?(Mechanize)
|
43
42
|
raise TypeError
|
44
43
|
end
|
45
44
|
end
|
data/lib/dmm-crawler/version.rb
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
describe DMMCrawler::Ranking do
|
2
2
|
let(:attachments) { described_class.new(arguments).arts }
|
3
3
|
|
4
|
+
let(:agent) { DMMCrawler::Agent.instance.agent }
|
4
5
|
let(:submedia) { 'cg' }
|
5
|
-
let(:arguments) { { submedia: submedia, term: term } }
|
6
|
+
let(:arguments) { { submedia: submedia, term: term, agent: agent } }
|
6
7
|
|
7
8
|
describe '#arts' do
|
8
9
|
after { sleep 2 }
|
@@ -26,7 +27,7 @@ describe DMMCrawler::Ranking do
|
|
26
27
|
context 'with not registered argument' do
|
27
28
|
subject { -> { attachments } }
|
28
29
|
|
29
|
-
let(:term) {
|
30
|
+
let(:term) { nil }
|
30
31
|
|
31
32
|
it { is_expected.to raise_error(TypeError) }
|
32
33
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dmm-crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Ohmori
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-09-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mechanize
|
@@ -116,6 +116,7 @@ files:
|
|
116
116
|
- lib/dmm-crawler.rb
|
117
117
|
- lib/dmm-crawler/agent.rb
|
118
118
|
- lib/dmm-crawler/attributes.rb
|
119
|
+
- lib/dmm-crawler/client.rb
|
119
120
|
- lib/dmm-crawler/ranking.rb
|
120
121
|
- lib/dmm-crawler/version.rb
|
121
122
|
- spec/dmm-crawler/ranking_spec.rb
|
@@ -140,7 +141,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
140
141
|
version: '0'
|
141
142
|
requirements: []
|
142
143
|
rubyforge_project:
|
143
|
-
rubygems_version: 2.
|
144
|
+
rubygems_version: 2.5.2
|
144
145
|
signing_key:
|
145
146
|
specification_version: 4
|
146
147
|
summary: Show DMM and DMM.R18's crawled data
|