dmm-crawler 0.0.5 → 0.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/Gemfile.lock +1 -1
- data/lib/dmm-crawler/agent.rb +8 -5
- data/lib/dmm-crawler/ranking.rb +50 -9
- data/lib/dmm-crawler/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9348e5bbc1d9fdd3687fa75aad5f99c5ddd26eef
|
4
|
+
data.tar.gz: acda09a89a2d16b311422d54e61f5df157b0b2f4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: be3d69e605d26e625eff51f373e5c6cbf4f9ac0182f93ad981d56fb366e441a4c062b0e0b187725679589c75bf90c11cdcc019385b6faa582bd5612011aa8332
|
7
|
+
data.tar.gz: 5695dd753f88ba79580a6258e40b00d6e98b37f031717831966c398d0a2751dac04b7f001db94dc928d5d2e2b30c9500c2a7143c09eb06042cce145859e36830
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
data/lib/dmm-crawler/agent.rb
CHANGED
@@ -1,11 +1,14 @@
|
|
1
|
+
require 'singleton'
|
2
|
+
|
1
3
|
module DMMCrawler
|
2
4
|
class Agent
|
3
|
-
|
5
|
+
include Singleton
|
4
6
|
|
5
|
-
def
|
6
|
-
|
7
|
-
|
8
|
-
|
7
|
+
def agent
|
8
|
+
agent = ::Mechanize.new
|
9
|
+
agent.request_headers = { 'Accept-Encoding' => '' }
|
10
|
+
agent.ignore_bad_chunking = true
|
11
|
+
agent
|
9
12
|
end
|
10
13
|
end
|
11
14
|
end
|
data/lib/dmm-crawler/ranking.rb
CHANGED
@@ -5,19 +5,13 @@ module DMMCrawler
|
|
5
5
|
def initialize(arguments)
|
6
6
|
@term = discriminate_term(arguments[:term])
|
7
7
|
@submedia = discriminate_submedia(arguments[:submedia])
|
8
|
-
@url = "
|
9
|
-
@agent = Agent.
|
8
|
+
@url = File.join(BASE_URL, "/dc/doujin/-/ranking-all/=/sort=popular/submedia=#{@submedia}/term=#{@term}")
|
9
|
+
@agent = Agent.instance.agent
|
10
10
|
end
|
11
11
|
|
12
12
|
def arts
|
13
13
|
arts = page.search('.rank-rankListItem.fn-setPurchaseChange').map do |element|
|
14
|
-
|
15
|
-
element.search('.rank-name').first.text.strip,
|
16
|
-
element.search('img').last.attributes['src'].value,
|
17
|
-
"#{BASE_URL}#{element.search('.rank-name').first.search('a').first.attributes.first[1].value}",
|
18
|
-
element.search('.rank-desc').text,
|
19
|
-
element.search('.rank-labelListItem').map { |e| e.search('a').text.strip }
|
20
|
-
]
|
14
|
+
Attribute.new(element).to_a
|
21
15
|
end
|
22
16
|
|
23
17
|
arts.map.with_index(1) do |(title, image_url, title_link, description, tags), rank|
|
@@ -31,6 +25,53 @@ module DMMCrawler
|
|
31
25
|
end
|
32
26
|
end
|
33
27
|
|
28
|
+
class Attribute
|
29
|
+
def initialize(element)
|
30
|
+
@element = element
|
31
|
+
@agent = Agent.instance.agent
|
32
|
+
end
|
33
|
+
|
34
|
+
def to_a
|
35
|
+
[
|
36
|
+
title,
|
37
|
+
title_link,
|
38
|
+
image_url,
|
39
|
+
description,
|
40
|
+
tags
|
41
|
+
]
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
|
46
|
+
def title
|
47
|
+
@element.search('.rank-name').first.text.strip
|
48
|
+
end
|
49
|
+
|
50
|
+
def image_url
|
51
|
+
@element.search('img').last.attributes['src'].value
|
52
|
+
end
|
53
|
+
|
54
|
+
def title_link
|
55
|
+
File.join(BASE_URL, @element.search('.rank-name').first.search('a').first.attributes.first[1].value)
|
56
|
+
end
|
57
|
+
|
58
|
+
def description
|
59
|
+
@element.search('.rank-desc').text.nil? ? fetch_description : @element.search('.rank-desc').text
|
60
|
+
end
|
61
|
+
|
62
|
+
def fetch_description
|
63
|
+
url = File.join(BASE_URL, @element.search('.rank-name a').first.attributes['href'].value)
|
64
|
+
page = @agent.get(url)
|
65
|
+
page.search('.summary .summary__txt').to_s.gsub(/\sclass=".*"/, '')
|
66
|
+
end
|
67
|
+
|
68
|
+
def tags
|
69
|
+
@element.search('.rank-labelListItem').map { |e| e.search('a').text.strip }
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
private_constant :Attribute
|
74
|
+
|
34
75
|
private
|
35
76
|
|
36
77
|
def page
|
data/lib/dmm-crawler/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: dmm-crawler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Satoshi Ohmori
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-05-
|
11
|
+
date: 2017-05-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: mechanize
|
@@ -139,7 +139,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
139
139
|
version: '0'
|
140
140
|
requirements: []
|
141
141
|
rubyforge_project:
|
142
|
-
rubygems_version: 2.
|
142
|
+
rubygems_version: 2.6.12
|
143
143
|
signing_key:
|
144
144
|
specification_version: 4
|
145
145
|
summary: Show DMM and DMM.R18's crawled data
|