dmm-crawler 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: bde3c371a800c5ea7438e38d62c21792771899c3
4
- data.tar.gz: f446060552d5900de241ce4785201e9c612b72d1
3
+ metadata.gz: 0fbdb558032e848d11224c9030642f04dfee44bf
4
+ data.tar.gz: ccb2cc27b9aafe47ca5331ae092889345fd177f9
5
5
  SHA512:
6
- metadata.gz: ea920b8c0258998de73cb1cb2973fe09c9f4272424751dee6c1a5ba27f0f327f379a8d13449f761ea5fdfe1d7e7d6e1e9e312a0a5d170d8a241907d27ea5e205
7
- data.tar.gz: c425232a8d22c6f1a7b6e476211fb7c4b483952f04f9ddeeb30fe6eae1a2b73688c25bf7659e06f8601e11a4ded8ca700a35442a6adc2ec736badd27c9ee374b
6
+ metadata.gz: 72f5cd5902904c4fe4100fbacf5374bc28070406112d5ca82a5abd909a5eb3c5d7882b77ca905ce60254c9a8b0daf56d74a7df7fc7c7ae77d54c631cad0e50d0
7
+ data.tar.gz: 70f8c48093ab24a88c1923312db6c6e2131c2704c04f1d6d20ac603d90b7efa9df1b237313cdfb6be1357064aec89a1c15c4fe831112ae78ea8b4ef8a39be200
@@ -1,5 +1,17 @@
1
1
  # Change logs
2
2
 
3
+ ## 0.2.1
4
+ - Add an attribute for author.
5
+
6
+ ## 0.2.0
7
+ - Make optional configuration settable for Mechanize.
8
+ - Fix a bug where data-src is not found
9
+ - Do not fix types of submedia.
10
+ - Update `README.md`.
11
+
12
+ ### Breaking Changes
13
+ - Do not use passed value of submedia type.
14
+
3
15
  ## 0.1.5
4
16
  - Do not crawl columns related to description.
5
17
 
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- dmm-crawler (0.2.0)
4
+ dmm-crawler (0.2.1)
5
5
  mechanize
6
6
 
7
7
  GEM
@@ -27,11 +27,11 @@ GEM
27
27
  mime-types (3.1)
28
28
  mime-types-data (~> 3.2015)
29
29
  mime-types-data (3.2016.0521)
30
- mini_portile2 (2.2.0)
30
+ mini_portile2 (2.3.0)
31
31
  net-http-digest_auth (1.4.1)
32
32
  net-http-persistent (2.9.4)
33
- nokogiri (1.8.0)
34
- mini_portile2 (~> 2.2.0)
33
+ nokogiri (1.8.1)
34
+ mini_portile2 (~> 2.3.0)
35
35
  ntlm-http (0.1.1)
36
36
  parser (2.4.0.0)
37
37
  ast (~> 2.2)
@@ -1,7 +1,7 @@
1
1
  module DMMCrawler
2
2
  class Attributes
3
3
  def initialize(url)
4
- @element = Agent.instance.agent.get(url)
4
+ @page = Agent.instance.agent.get(url)
5
5
  end
6
6
 
7
7
  def to_a
@@ -10,6 +10,7 @@ module DMMCrawler
10
10
  title_link,
11
11
  image_url,
12
12
  submedia,
13
+ author,
13
14
  informations,
14
15
  tags
15
16
  ]
@@ -19,16 +20,24 @@ module DMMCrawler
19
20
 
20
21
  def title
21
22
  if art_page?
22
- @element.search('.productTitle__txt span').remove
23
- @element.search('.productTitle__txt').text.strip
23
+ @page.search('.productTitle__txt span').remove
24
+ @page.search('.productTitle__txt').text.strip
24
25
  else
25
- @element.search('.rank-name').first.text.strip
26
+ @page.search('.rank-name').first.text.strip
27
+ end
28
+ end
29
+
30
+ def title_link
31
+ if art_page?
32
+ @page.uri.to_s
33
+ else
34
+ File.join(BASE_URL, @page.search('.rank-name').first.search('a').first.attributes.first[1].value)
26
35
  end
27
36
  end
28
37
 
29
38
  def image_url
30
39
  if art_page?
31
- attrs = @element.search('.productPreview__item img').last.attributes
40
+ attrs = @page.search('.productPreview__item img').last.attributes
32
41
 
33
42
  if attrs['data-src']
34
43
  attrs['data-src'].value
@@ -36,20 +45,12 @@ module DMMCrawler
36
45
  attrs['src'].value
37
46
  end
38
47
  else
39
- @element.search('img').last.attributes['src'].value
40
- end
41
- end
42
-
43
- def title_link
44
- if art_page?
45
- @element.uri.to_s
46
- else
47
- File.join(BASE_URL, @element.search('.rank-name').first.search('a').first.attributes.first[1].value)
48
+ @page.search('img').last.attributes['src'].value
48
49
  end
49
50
  end
50
51
 
51
52
  def submedia
52
- @element
53
+ @page
53
54
  .search('.productAttribute-listItem .c_icon_productGenre')
54
55
  .first
55
56
  .attributes['class']
@@ -58,9 +59,13 @@ module DMMCrawler
58
59
  .delete('-')
59
60
  end
60
61
 
62
+ def author
63
+ @page.search('p.circleProductTitle__main').text.gsub('作品一覧', '')
64
+ end
65
+
61
66
  def informations
62
- keys = extract_text(@element.search('.m-productInformation .productInformation__item .informationList__ttl'))
63
- values = extract_text(@element.search('.m-productInformation .productInformation__item .informationList__txt'))
67
+ keys = extract_text(@page.search('.m-productInformation .productInformation__item .informationList__ttl'))
68
+ values = extract_text(@page.search('.m-productInformation .productInformation__item .informationList__txt'))
64
69
 
65
70
  information = keys.zip(values)
66
71
  series = information.find { |array| array.first == 'シリーズ' }
@@ -73,22 +78,22 @@ module DMMCrawler
73
78
  information.map { |key, value| { key: key, value: value } }
74
79
  end
75
80
 
76
- def extract_text(elements)
77
- elements
78
- .select { |element| element.text.strip != 'ジャンル' }
79
- .map { |element| element.children.text.strip }
80
- end
81
-
82
81
  def tags
83
82
  if art_page?
84
- @element.search('.genreTagList .genreTagList__item a').map { |e| e.text.strip }
83
+ @page.search('.genreTagList .genreTagList__item a').map { |e| e.text.strip }
85
84
  else
86
- @element.search('.rank-labelListItem').map { |e| e.search('a').text.strip }
85
+ @page.search('.rank-labelListItem').map { |e| e.search('a').text.strip }
87
86
  end
88
87
  end
89
88
 
89
+ def extract_text(elements)
90
+ elements
91
+ .select { |element| element.text.strip != 'ジャンル' }
92
+ .map { |element| element.children.text.strip }
93
+ end
94
+
90
95
  def art_page?
91
- @element.search('.rank-name').empty?
96
+ @page.search('.rank-name').empty?
92
97
  end
93
98
  end
94
99
  end
@@ -14,12 +14,13 @@ module DMMCrawler
14
14
  Attributes.new(url).to_a
15
15
  end
16
16
 
17
- arts.map.with_index(1) do |(title, title_link, image_url, submedia, informations, tags), rank|
17
+ arts.map.with_index(1) do |(title, title_link, image_url, submedia, author, informations, tags), rank|
18
18
  {
19
19
  title: "#{rank}位: #{title}",
20
20
  title_link: title_link,
21
21
  image_url: image_url,
22
22
  submedia: submedia,
23
+ author: author,
23
24
  informations: informations,
24
25
  tags: tags
25
26
  }
@@ -1,3 +1,3 @@
1
1
  module DMMCrawler
2
- VERSION = '0.2.0'.freeze
2
+ VERSION = '0.2.1'.freeze
3
3
  end
@@ -21,7 +21,7 @@ describe DMMCrawler::Ranking do
21
21
 
22
22
  let(:term) { '24' }
23
23
 
24
- it { is_expected.to all(include(:title, :title_link, :image_url, :submedia, :informations, :tags)) }
24
+ it { is_expected.to all(include(:title, :title_link, :image_url, :submedia, :author, :informations, :tags)) }
25
25
  end
26
26
 
27
27
  context 'with not registered argument' do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dmm-crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Ohmori