dmm-crawler 0.2.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: bde3c371a800c5ea7438e38d62c21792771899c3
4
- data.tar.gz: f446060552d5900de241ce4785201e9c612b72d1
3
+ metadata.gz: 0fbdb558032e848d11224c9030642f04dfee44bf
4
+ data.tar.gz: ccb2cc27b9aafe47ca5331ae092889345fd177f9
5
5
  SHA512:
6
- metadata.gz: ea920b8c0258998de73cb1cb2973fe09c9f4272424751dee6c1a5ba27f0f327f379a8d13449f761ea5fdfe1d7e7d6e1e9e312a0a5d170d8a241907d27ea5e205
7
- data.tar.gz: c425232a8d22c6f1a7b6e476211fb7c4b483952f04f9ddeeb30fe6eae1a2b73688c25bf7659e06f8601e11a4ded8ca700a35442a6adc2ec736badd27c9ee374b
6
+ metadata.gz: 72f5cd5902904c4fe4100fbacf5374bc28070406112d5ca82a5abd909a5eb3c5d7882b77ca905ce60254c9a8b0daf56d74a7df7fc7c7ae77d54c631cad0e50d0
7
+ data.tar.gz: 70f8c48093ab24a88c1923312db6c6e2131c2704c04f1d6d20ac603d90b7efa9df1b237313cdfb6be1357064aec89a1c15c4fe831112ae78ea8b4ef8a39be200
@@ -1,5 +1,17 @@
1
1
  # Change logs
2
2
 
3
+ ## 0.2.1
4
+ - Add an attribute for author.
5
+
6
+ ## 0.2.0
7
+ - Make optional configuration settable for Mechanize.
8
+ - Fix a bug where data-src is not found
9
+ - Do not fix types of submedia.
10
+ - Update `README.md`.
11
+
12
+ ### Breaking Changes
13
+ - Do not use passed value of submedia type.
14
+
3
15
  ## 0.1.5
4
16
  - Do not crawl columns related to description.
5
17
 
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- dmm-crawler (0.2.0)
4
+ dmm-crawler (0.2.1)
5
5
  mechanize
6
6
 
7
7
  GEM
@@ -27,11 +27,11 @@ GEM
27
27
  mime-types (3.1)
28
28
  mime-types-data (~> 3.2015)
29
29
  mime-types-data (3.2016.0521)
30
- mini_portile2 (2.2.0)
30
+ mini_portile2 (2.3.0)
31
31
  net-http-digest_auth (1.4.1)
32
32
  net-http-persistent (2.9.4)
33
- nokogiri (1.8.0)
34
- mini_portile2 (~> 2.2.0)
33
+ nokogiri (1.8.1)
34
+ mini_portile2 (~> 2.3.0)
35
35
  ntlm-http (0.1.1)
36
36
  parser (2.4.0.0)
37
37
  ast (~> 2.2)
@@ -1,7 +1,7 @@
1
1
  module DMMCrawler
2
2
  class Attributes
3
3
  def initialize(url)
4
- @element = Agent.instance.agent.get(url)
4
+ @page = Agent.instance.agent.get(url)
5
5
  end
6
6
 
7
7
  def to_a
@@ -10,6 +10,7 @@ module DMMCrawler
10
10
  title_link,
11
11
  image_url,
12
12
  submedia,
13
+ author,
13
14
  informations,
14
15
  tags
15
16
  ]
@@ -19,16 +20,24 @@ module DMMCrawler
19
20
 
20
21
  def title
21
22
  if art_page?
22
- @element.search('.productTitle__txt span').remove
23
- @element.search('.productTitle__txt').text.strip
23
+ @page.search('.productTitle__txt span').remove
24
+ @page.search('.productTitle__txt').text.strip
24
25
  else
25
- @element.search('.rank-name').first.text.strip
26
+ @page.search('.rank-name').first.text.strip
27
+ end
28
+ end
29
+
30
+ def title_link
31
+ if art_page?
32
+ @page.uri.to_s
33
+ else
34
+ File.join(BASE_URL, @page.search('.rank-name').first.search('a').first.attributes.first[1].value)
26
35
  end
27
36
  end
28
37
 
29
38
  def image_url
30
39
  if art_page?
31
- attrs = @element.search('.productPreview__item img').last.attributes
40
+ attrs = @page.search('.productPreview__item img').last.attributes
32
41
 
33
42
  if attrs['data-src']
34
43
  attrs['data-src'].value
@@ -36,20 +45,12 @@ module DMMCrawler
36
45
  attrs['src'].value
37
46
  end
38
47
  else
39
- @element.search('img').last.attributes['src'].value
40
- end
41
- end
42
-
43
- def title_link
44
- if art_page?
45
- @element.uri.to_s
46
- else
47
- File.join(BASE_URL, @element.search('.rank-name').first.search('a').first.attributes.first[1].value)
48
+ @page.search('img').last.attributes['src'].value
48
49
  end
49
50
  end
50
51
 
51
52
  def submedia
52
- @element
53
+ @page
53
54
  .search('.productAttribute-listItem .c_icon_productGenre')
54
55
  .first
55
56
  .attributes['class']
@@ -58,9 +59,13 @@ module DMMCrawler
58
59
  .delete('-')
59
60
  end
60
61
 
62
+ def author
63
+ @page.search('p.circleProductTitle__main').text.gsub('作品一覧', '')
64
+ end
65
+
61
66
  def informations
62
- keys = extract_text(@element.search('.m-productInformation .productInformation__item .informationList__ttl'))
63
- values = extract_text(@element.search('.m-productInformation .productInformation__item .informationList__txt'))
67
+ keys = extract_text(@page.search('.m-productInformation .productInformation__item .informationList__ttl'))
68
+ values = extract_text(@page.search('.m-productInformation .productInformation__item .informationList__txt'))
64
69
 
65
70
  information = keys.zip(values)
66
71
  series = information.find { |array| array.first == 'シリーズ' }
@@ -73,22 +78,22 @@ module DMMCrawler
73
78
  information.map { |key, value| { key: key, value: value } }
74
79
  end
75
80
 
76
- def extract_text(elements)
77
- elements
78
- .select { |element| element.text.strip != 'ジャンル' }
79
- .map { |element| element.children.text.strip }
80
- end
81
-
82
81
  def tags
83
82
  if art_page?
84
- @element.search('.genreTagList .genreTagList__item a').map { |e| e.text.strip }
83
+ @page.search('.genreTagList .genreTagList__item a').map { |e| e.text.strip }
85
84
  else
86
- @element.search('.rank-labelListItem').map { |e| e.search('a').text.strip }
85
+ @page.search('.rank-labelListItem').map { |e| e.search('a').text.strip }
87
86
  end
88
87
  end
89
88
 
89
+ def extract_text(elements)
90
+ elements
91
+ .select { |element| element.text.strip != 'ジャンル' }
92
+ .map { |element| element.children.text.strip }
93
+ end
94
+
90
95
  def art_page?
91
- @element.search('.rank-name').empty?
96
+ @page.search('.rank-name').empty?
92
97
  end
93
98
  end
94
99
  end
@@ -14,12 +14,13 @@ module DMMCrawler
14
14
  Attributes.new(url).to_a
15
15
  end
16
16
 
17
- arts.map.with_index(1) do |(title, title_link, image_url, submedia, informations, tags), rank|
17
+ arts.map.with_index(1) do |(title, title_link, image_url, submedia, author, informations, tags), rank|
18
18
  {
19
19
  title: "#{rank}位: #{title}",
20
20
  title_link: title_link,
21
21
  image_url: image_url,
22
22
  submedia: submedia,
23
+ author: author,
23
24
  informations: informations,
24
25
  tags: tags
25
26
  }
@@ -1,3 +1,3 @@
1
1
  module DMMCrawler
2
- VERSION = '0.2.0'.freeze
2
+ VERSION = '0.2.1'.freeze
3
3
  end
@@ -21,7 +21,7 @@ describe DMMCrawler::Ranking do
21
21
 
22
22
  let(:term) { '24' }
23
23
 
24
- it { is_expected.to all(include(:title, :title_link, :image_url, :submedia, :informations, :tags)) }
24
+ it { is_expected.to all(include(:title, :title_link, :image_url, :submedia, :author, :informations, :tags)) }
25
25
  end
26
26
 
27
27
  context 'with not registered argument' do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dmm-crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Ohmori