dmm-crawler 0.1.5 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +7 -7
- data/README.md +10 -4
- data/doc/ja/README.md +11 -4
- data/lib/dmm-crawler.rb +1 -0
- data/lib/dmm-crawler/attributes.rb +9 -8
- data/lib/dmm-crawler/client.rb +13 -0
- data/lib/dmm-crawler/ranking.rb +5 -6
- data/lib/dmm-crawler/version.rb +1 -1
- data/spec/dmm-crawler/ranking_spec.rb +3 -2
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bde3c371a800c5ea7438e38d62c21792771899c3
|
4
|
+
data.tar.gz: f446060552d5900de241ce4785201e9c612b72d1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ea920b8c0258998de73cb1cb2973fe09c9f4272424751dee6c1a5ba27f0f327f379a8d13449f761ea5fdfe1d7e7d6e1e9e312a0a5d170d8a241907d27ea5e205
|
7
|
+
data.tar.gz: c425232a8d22c6f1a7b6e476211fb7c4b483952f04f9ddeeb30fe6eae1a2b73688c25bf7659e06f8601e11a4ded8ca700a35442a6adc2ec736badd27c9ee374b
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
dmm-crawler (0.
|
4
|
+
dmm-crawler (0.2.0)
|
5
5
|
mechanize
|
6
6
|
|
7
7
|
GEM
|
@@ -10,7 +10,7 @@ GEM
|
|
10
10
|
ast (2.3.0)
|
11
11
|
coderay (1.1.1)
|
12
12
|
diff-lcs (1.3)
|
13
|
-
domain_name (0.5.
|
13
|
+
domain_name (0.5.20170404)
|
14
14
|
unf (>= 0.0.5, < 1.0.0)
|
15
15
|
http-cookie (1.0.3)
|
16
16
|
domain_name (~> 0.5)
|
@@ -27,11 +27,11 @@ GEM
|
|
27
27
|
mime-types (3.1)
|
28
28
|
mime-types-data (~> 3.2015)
|
29
29
|
mime-types-data (3.2016.0521)
|
30
|
-
mini_portile2 (2.
|
30
|
+
mini_portile2 (2.2.0)
|
31
31
|
net-http-digest_auth (1.4.1)
|
32
32
|
net-http-persistent (2.9.4)
|
33
|
-
nokogiri (1.
|
34
|
-
mini_portile2 (~> 2.
|
33
|
+
nokogiri (1.8.0)
|
34
|
+
mini_portile2 (~> 2.2.0)
|
35
35
|
ntlm-http (0.1.1)
|
36
36
|
parser (2.4.0.0)
|
37
37
|
ast (~> 2.2)
|
@@ -65,7 +65,7 @@ GEM
|
|
65
65
|
slop (3.6.0)
|
66
66
|
unf (0.1.4)
|
67
67
|
unf_ext
|
68
|
-
unf_ext (0.0.7.
|
68
|
+
unf_ext (0.0.7.4)
|
69
69
|
unicode-display_width (1.1.3)
|
70
70
|
webrobots (0.1.2)
|
71
71
|
|
@@ -81,4 +81,4 @@ DEPENDENCIES
|
|
81
81
|
rubocop (~> 0.47)
|
82
82
|
|
83
83
|
BUNDLED WITH
|
84
|
-
1.
|
84
|
+
1.16.0.pre.2
|
data/README.md
CHANGED
@@ -15,6 +15,7 @@ gem 'dmm-crawler'
|
|
15
15
|
```
|
16
16
|
|
17
17
|
## Usage
|
18
|
+
|
18
19
|
From the bot in invited Slack's room.
|
19
20
|
|
20
21
|
```ruby
|
@@ -22,14 +23,19 @@ require 'dmm-crawler'
|
|
22
23
|
|
23
24
|
include DMMCrawler
|
24
25
|
|
25
|
-
|
26
|
+
client = Client.new do |agent|
|
27
|
+
agent.ignore_bad_chunking = false
|
28
|
+
end
|
26
29
|
|
30
|
+
client.rankings(term: '24', submedia: 'cg')
|
27
31
|
# =>
|
28
32
|
# {
|
29
33
|
# title: "title",
|
30
|
-
#
|
31
|
-
# image_url:
|
32
|
-
#
|
34
|
+
# title_link: "title url",
|
35
|
+
# image_url: "Link to title"s main image",
|
36
|
+
# submedia: "cg",
|
37
|
+
# informations: [{key: 'key', value: 'value'}],
|
38
|
+
# tags: ["tag1", "tag2"]
|
33
39
|
# }
|
34
40
|
```
|
35
41
|
|
data/doc/ja/README.md
CHANGED
@@ -3,6 +3,7 @@
|
|
3
3
|
# DMM Crawler
|
4
4
|
|
5
5
|
## DMM Crawlerとは
|
6
|
+
|
6
7
|
DMM.R18のクロールしたデータを取得するgemです。現在、**同人**のランキングにのみ対応しております。
|
7
8
|
|
8
9
|
## インストール
|
@@ -14,6 +15,7 @@ gem 'dmm-crawler'
|
|
14
15
|
```
|
15
16
|
|
16
17
|
## 使い方
|
18
|
+
|
17
19
|
データを使いたい`.rb`ファイルで以下を実行したらクロールしたデータが取得出来ます。
|
18
20
|
|
19
21
|
```ruby
|
@@ -21,14 +23,19 @@ require 'dmm-crawler'
|
|
21
23
|
|
22
24
|
include DMMCrawler
|
23
25
|
|
24
|
-
|
26
|
+
client = Client.new do |agent|
|
27
|
+
agent.ignore_bad_chunking = false
|
28
|
+
end
|
25
29
|
|
30
|
+
client.rankings(term: '24', submedia: 'cg')
|
26
31
|
# =>
|
27
32
|
# {
|
28
33
|
# title: "タイトル",
|
29
|
-
#
|
30
|
-
# image_url:
|
31
|
-
#
|
34
|
+
# title_link: "タイトルURL",
|
35
|
+
# image_url: "画像URL",
|
36
|
+
# submedia: "cg",
|
37
|
+
# informations: [{key: 'key', value: 'value'}],
|
38
|
+
# tags: ["タグ1", "タグ2"]
|
32
39
|
# }
|
33
40
|
```
|
34
41
|
|
data/lib/dmm-crawler.rb
CHANGED
@@ -1,10 +1,7 @@
|
|
1
1
|
module DMMCrawler
|
2
2
|
class Attributes
|
3
|
-
def initialize(
|
4
|
-
@
|
5
|
-
@element = element
|
6
|
-
|
7
|
-
@submedia = submedia
|
3
|
+
def initialize(url)
|
4
|
+
@element = Agent.instance.agent.get(url)
|
8
5
|
end
|
9
6
|
|
10
7
|
def to_a
|
@@ -31,7 +28,13 @@ module DMMCrawler
|
|
31
28
|
|
32
29
|
def image_url
|
33
30
|
if art_page?
|
34
|
-
@element.search('.productPreview__item img').last.attributes
|
31
|
+
attrs = @element.search('.productPreview__item img').last.attributes
|
32
|
+
|
33
|
+
if attrs['data-src']
|
34
|
+
attrs['data-src'].value
|
35
|
+
else
|
36
|
+
attrs['src'].value
|
37
|
+
end
|
35
38
|
else
|
36
39
|
@element.search('img').last.attributes['src'].value
|
37
40
|
end
|
@@ -46,8 +49,6 @@ module DMMCrawler
|
|
46
49
|
end
|
47
50
|
|
48
51
|
def submedia
|
49
|
-
return @submedia if @submedia
|
50
|
-
|
51
52
|
@element
|
52
53
|
.search('.productAttribute-listItem .c_icon_productGenre')
|
53
54
|
.first
|
data/lib/dmm-crawler/ranking.rb
CHANGED
@@ -1,18 +1,17 @@
|
|
1
1
|
module DMMCrawler
|
2
2
|
class Ranking
|
3
3
|
def initialize(arguments)
|
4
|
+
@agent = discriminate_agent(arguments[:agent])
|
4
5
|
@term = discriminate_term(arguments[:term])
|
5
|
-
@submedia =
|
6
|
+
@submedia = arguments[:submedia]
|
6
7
|
@url = File.join(BASE_URL, "/dc/doujin/-/ranking-all/=/sort=popular/submedia=#{@submedia}/term=#{@term}")
|
7
|
-
@agent = Agent.instance.agent
|
8
8
|
end
|
9
9
|
|
10
10
|
def arts
|
11
11
|
arts = page.search('.rank-rankListItem.fn-setPurchaseChange').map do |element|
|
12
12
|
sleep 1
|
13
13
|
url = File.join(BASE_URL, element.search('.rank-name a').first.attributes['href'].value)
|
14
|
-
|
15
|
-
Attributes.new(page, @submedia).to_a
|
14
|
+
Attributes.new(url).to_a
|
16
15
|
end
|
17
16
|
|
18
17
|
arts.map.with_index(1) do |(title, title_link, image_url, submedia, informations, tags), rank|
|
@@ -38,8 +37,8 @@ module DMMCrawler
|
|
38
37
|
raise TypeError
|
39
38
|
end
|
40
39
|
|
41
|
-
def
|
42
|
-
return
|
40
|
+
def discriminate_agent(agent)
|
41
|
+
return agent if agent.is_a?(Mechanize)
|
43
42
|
raise TypeError
|
44
43
|
end
|
45
44
|
end
|
data/lib/dmm-crawler/version.rb
CHANGED
@@ -1,8 +1,9 @@
|
|
1
1
|
describe DMMCrawler::Ranking do
|
2
2
|
let(:attachments) { described_class.new(arguments).arts }
|
3
3
|
|
4
|
+
let(:agent) { DMMCrawler::Agent.instance.agent }
|
4
5
|
let(:submedia) { 'cg' }
|
5
|
-
let(:arguments) { { submedia: submedia, term: term } }
|
6
|
+
let(:arguments) { { submedia: submedia, term: term, agent: agent } }
|
6
7
|
|
7
8
|
describe '#arts' do
|
8
9
|
after { sleep 2 }
|
@@ -26,7 +27,7 @@ describe DMMCrawler::Ranking do
|
|
26
27
|
context 'with not registered argument' do
|
27
28
|
subject { -> { attachments } }
|
28
29
|
|
29
|
-
let(:term) {
|
30
|
+
let(:term) { nil }
|
30
31
|
|
31
32
|
it { is_expected.to raise_error(TypeError) }
|
32
33
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dmm-crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Ohmori
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-
|
11
|
+
date: 2017-09-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mechanize
|
@@ -116,6 +116,7 @@ files:
|
|
116
116
|
- lib/dmm-crawler.rb
|
117
117
|
- lib/dmm-crawler/agent.rb
|
118
118
|
- lib/dmm-crawler/attributes.rb
|
119
|
+
- lib/dmm-crawler/client.rb
|
119
120
|
- lib/dmm-crawler/ranking.rb
|
120
121
|
- lib/dmm-crawler/version.rb
|
121
122
|
- spec/dmm-crawler/ranking_spec.rb
|
@@ -140,7 +141,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
140
141
|
version: '0'
|
141
142
|
requirements: []
|
142
143
|
rubyforge_project:
|
143
|
-
rubygems_version: 2.
|
144
|
+
rubygems_version: 2.5.2
|
144
145
|
signing_key:
|
145
146
|
specification_version: 4
|
146
147
|
summary: Show DMM and DMM.R18's crawled data
|