dmm-crawler 0.0.5 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 99738f48c076aa4e2569f97fab7c9ad20500aa0f
4
- data.tar.gz: aaa5cf428459e5310449a03cfce45fcf87310de9
3
+ metadata.gz: 9348e5bbc1d9fdd3687fa75aad5f99c5ddd26eef
4
+ data.tar.gz: acda09a89a2d16b311422d54e61f5df157b0b2f4
5
5
  SHA512:
6
- metadata.gz: 58ca9321e140ba4327b2e9d49a062fe37a23a0c0feeb2d8c914054f1f9888fb151ffa6e87f3977268f63c3e024ad380275c011107b99f9189f70474a83600162
7
- data.tar.gz: f8b08887860332a742ab5301edaf48361a72e2cd85da2e6a7e5a915978e00712c65600f64e00d1e0791cae3ce1102f0d7f495b2512e140518bd27e30d92ce9f3
6
+ metadata.gz: be3d69e605d26e625eff51f373e5c6cbf4f9ac0182f93ad981d56fb366e441a4c062b0e0b187725679589c75bf90c11cdcc019385b6faa582bd5612011aa8332
7
+ data.tar.gz: 5695dd753f88ba79580a6258e40b00d6e98b37f031717831966c398d0a2751dac04b7f001db94dc928d5d2e2b30c9500c2a7143c09eb06042cce145859e36830
@@ -1,5 +1,9 @@
1
1
  # Change logs
2
2
 
3
+ ## 0.0.6
4
+ - Fix the bug where description is empty.
5
+ - Refactor codes related to attributes for the art.
6
+
3
7
  ## 0.0.5
4
8
  - Add a function to fetch description.
5
9
 
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- dmm-crawler (0.0.5)
4
+ dmm-crawler (0.0.6)
5
5
  mechanize
6
6
 
7
7
  GEM
@@ -1,11 +1,14 @@
1
+ require 'singleton'
2
+
1
3
  module DMMCrawler
2
4
  class Agent
3
- attr_accessor :agent
5
+ include Singleton
4
6
 
5
- def initialize
6
- @agent = ::Mechanize.new
7
- @agent.request_headers = { 'Accept-Encoding' => '' }
8
- @agent.ignore_bad_chunking = true
7
+ def agent
8
+ agent = ::Mechanize.new
9
+ agent.request_headers = { 'Accept-Encoding' => '' }
10
+ agent.ignore_bad_chunking = true
11
+ agent
9
12
  end
10
13
  end
11
14
  end
@@ -5,19 +5,13 @@ module DMMCrawler
5
5
  def initialize(arguments)
6
6
  @term = discriminate_term(arguments[:term])
7
7
  @submedia = discriminate_submedia(arguments[:submedia])
8
- @url = "#{BASE_URL}/dc/doujin/-/ranking-all/=/sort=popular/submedia=#{@submedia}/term=#{@term}"
9
- @agent = Agent.new.agent
8
+ @url = File.join(BASE_URL, "/dc/doujin/-/ranking-all/=/sort=popular/submedia=#{@submedia}/term=#{@term}")
9
+ @agent = Agent.instance.agent
10
10
  end
11
11
 
12
12
  def arts
13
13
  arts = page.search('.rank-rankListItem.fn-setPurchaseChange').map do |element|
14
- [
15
- element.search('.rank-name').first.text.strip,
16
- element.search('img').last.attributes['src'].value,
17
- "#{BASE_URL}#{element.search('.rank-name').first.search('a').first.attributes.first[1].value}",
18
- element.search('.rank-desc').text,
19
- element.search('.rank-labelListItem').map { |e| e.search('a').text.strip }
20
- ]
14
+ Attribute.new(element).to_a
21
15
  end
22
16
 
23
17
  arts.map.with_index(1) do |(title, image_url, title_link, description, tags), rank|
@@ -31,6 +25,53 @@ module DMMCrawler
31
25
  end
32
26
  end
33
27
 
28
+ class Attribute
29
+ def initialize(element)
30
+ @element = element
31
+ @agent = Agent.instance.agent
32
+ end
33
+
34
+ def to_a
35
+ [
36
+ title,
37
+ title_link,
38
+ image_url,
39
+ description,
40
+ tags
41
+ ]
42
+ end
43
+
44
+ private
45
+
46
+ def title
47
+ @element.search('.rank-name').first.text.strip
48
+ end
49
+
50
+ def image_url
51
+ @element.search('img').last.attributes['src'].value
52
+ end
53
+
54
+ def title_link
55
+ File.join(BASE_URL, @element.search('.rank-name').first.search('a').first.attributes.first[1].value)
56
+ end
57
+
58
+ def description
59
+ @element.search('.rank-desc').text.nil? ? fetch_description : @element.search('.rank-desc').text
60
+ end
61
+
62
+ def fetch_description
63
+ url = File.join(BASE_URL, @element.search('.rank-name a').first.attributes['href'].value)
64
+ page = @agent.get(url)
65
+ page.search('.summary .summary__txt').to_s.gsub(/\sclass=".*"/, '')
66
+ end
67
+
68
+ def tags
69
+ @element.search('.rank-labelListItem').map { |e| e.search('a').text.strip }
70
+ end
71
+ end
72
+
73
+ private_constant :Attribute
74
+
34
75
  private
35
76
 
36
77
  def page
@@ -1,3 +1,3 @@
1
1
  module DMMCrawler
2
- VERSION = '0.0.5'.freeze
2
+ VERSION = '0.0.6'.freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dmm-crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Ohmori
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-05-20 00:00:00.000000000 Z
11
+ date: 2017-05-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: mechanize
@@ -139,7 +139,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
139
139
  version: '0'
140
140
  requirements: []
141
141
  rubyforge_project:
142
- rubygems_version: 2.5.1
142
+ rubygems_version: 2.6.12
143
143
  signing_key:
144
144
  specification_version: 4
145
145
  summary: Show DMM and DMM.R18's crawled data