dmm-crawler 0.1.1 → 0.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: de55b4e7acaeea3d391451c3d2c7f52c70ee69f2
4
- data.tar.gz: 78e43451a03c47801ad2d79389ef873df6055822
3
+ metadata.gz: '06925a3fd6d470aaaf623d6b2da213c5b8ff631f'
4
+ data.tar.gz: 40dfc7eb3824eade889b6218d8c2042c4c62ae02
5
5
  SHA512:
6
- metadata.gz: 007f0a66ef96674827c9dbdee83bec84304e6932ecf3f9821a2c377a2904ac288cbfcbe56026c580a62d92a734af1dcf8d57bf4e19b4575e50f5b4051a7a915d
7
- data.tar.gz: cf63c892f60a777ce5ede4c2fb1746809eddbed0c273112e3120067fc800d94b810ff17fd3c7b0d5ae91ba6fe294e39889b8fd042293dadb49b906db481be339
6
+ metadata.gz: 32e43bd62c3af54c67b60eda38d7455f2209a9d3b2351b7893b94ec9329964c0a38c1ac9975fb3d6b720d2cd5d8be3c0c00a3d927c6021b1c2a9849f64bc3520
7
+ data.tar.gz: 42b519cd6268171d10634953c9392b5fdd1b220067dd7ca36c2e8ca1b2496c8d3b147026eb965417c52d7bde2a656caf0d39679d5c3a0fe18b79653f82008767
@@ -1,5 +1,10 @@
1
1
  # Change logs
2
2
 
3
+ ## 0.1.2
4
+ - Extract Attributes klass from Ranking klass.
5
+ - Support art page.
6
+ - Add an attribute for submedia.
7
+
3
8
  ## 0.1.1
4
9
  - Convert double quotes to single qoutes for description_raw.
5
10
 
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- dmm-crawler (0.1.1)
4
+ dmm-crawler (0.1.2)
5
5
  mechanize
6
6
 
7
7
  GEM
@@ -1,8 +1,10 @@
1
1
  require 'mechanize'
2
2
 
3
3
  module DMMCrawler
4
+ BASE_URL = 'http://www.dmm.co.jp'.freeze
4
5
  end
5
6
 
6
7
  require 'dmm-crawler/agent'
8
+ require 'dmm-crawler/attributes'
7
9
  require 'dmm-crawler/ranking'
8
10
  require 'dmm-crawler/version'
@@ -0,0 +1,89 @@
1
+ module DMMCrawler
2
+ class Attributes
3
+ def initialize(element, submedia = nil)
4
+ @agent = Agent.instance.agent
5
+ @element = element
6
+
7
+ @submedia = submedia
8
+ end
9
+
10
+ def to_a
11
+ [
12
+ title,
13
+ title_link,
14
+ image_url,
15
+ description,
16
+ description_raw,
17
+ submedia,
18
+ tags
19
+ ]
20
+ end
21
+
22
+ private
23
+
24
+ def title
25
+ if art_page?
26
+ @element.search('.productTitle__txt span').remove
27
+ @element.search('.productTitle__txt').text.strip
28
+ else
29
+ @element.search('.rank-name').first.text.strip
30
+ end
31
+ end
32
+
33
+ def image_url
34
+ if art_page?
35
+ @element.search('.productPreview__item img').last.attributes['data-src'].value
36
+ else
37
+ @element.search('img').last.attributes['src'].value
38
+ end
39
+ end
40
+
41
+ def title_link
42
+ if art_page?
43
+ @element.uri.to_s
44
+ else
45
+ File.join(BASE_URL, @element.search('.rank-name').first.search('a').first.attributes.first[1].value)
46
+ end
47
+ end
48
+
49
+ def description
50
+ if art_page?
51
+ @element.search('.summary .summary__txt').text
52
+ else
53
+ @element.search('.rank-desc').text
54
+ end
55
+ end
56
+
57
+ def description_raw
58
+ if art_page?
59
+ @element.search('.summary .summary__txt').to_s.gsub(/\sclass=".*"/, '').tr('"', "'")
60
+ else
61
+ @element.search('.rank-desc').to_s.tr('"', "'")
62
+ end
63
+ end
64
+
65
+ def submedia
66
+ return @submedia if @submedia
67
+
68
+ @element
69
+ .search('.productAttribute-listItem .c_icon_productGenre')
70
+ .first
71
+ .attributes['class']
72
+ .value
73
+ .gsub('c_icon_productGenre ', '')
74
+ .delete('-')
75
+ end
76
+
77
+ def tags
78
+ if art_page?
79
+ @element.search('.genreTagList .genreTagList__item a').map { |e| e.text.strip }
80
+ else
81
+ @element.search('.rank-labelListItem').map { |e| e.search('a').text.strip }
82
+ end
83
+ end
84
+
85
+ def art_page?
86
+ @element.search('.rank-name').empty?
87
+ end
88
+ end
89
+ end
@@ -1,7 +1,5 @@
1
1
  module DMMCrawler
2
2
  class Ranking
3
- BASE_URL = 'http://www.dmm.co.jp'.freeze
4
-
5
3
  def initialize(arguments)
6
4
  @term = discriminate_term(arguments[:term])
7
5
  @submedia = discriminate_submedia(arguments[:submedia])
@@ -11,79 +9,22 @@ module DMMCrawler
11
9
 
12
10
  def arts
13
11
  arts = page.search('.rank-rankListItem.fn-setPurchaseChange').map do |element|
14
- Attribute.new(element).to_a
12
+ Attributes.new(element, @submedia).to_a
15
13
  end
16
14
 
17
- arts.map.with_index(1) do |(title, title_link, image_url, description, description_raw, tags), rank|
15
+ arts.map.with_index(1) do |(title, title_link, image_url, description, description_raw, submedia, tags), rank|
18
16
  {
19
17
  title: "#{rank}位: #{title}",
20
18
  title_link: title_link,
21
19
  image_url: image_url,
22
20
  description: description,
23
21
  description_raw: description_raw,
22
+ submedia: submedia,
24
23
  tags: tags
25
24
  }
26
25
  end
27
26
  end
28
27
 
29
- class Attribute
30
- def initialize(element)
31
- @element = element
32
- @agent = Agent.instance.agent
33
- end
34
-
35
- def to_a
36
- [
37
- title,
38
- title_link,
39
- image_url,
40
- description,
41
- description_raw,
42
- tags
43
- ]
44
- end
45
-
46
- private
47
-
48
- def title
49
- @element.search('.rank-name').first.text.strip
50
- end
51
-
52
- def image_url
53
- @element.search('img').last.attributes['src'].value
54
- end
55
-
56
- def title_link
57
- File.join(BASE_URL, @element.search('.rank-name').first.search('a').first.attributes.first[1].value)
58
- end
59
-
60
- def description
61
- @element.search('.rank-desc').text.nil? ? fetch_description : @element.search('.rank-desc').text
62
- end
63
-
64
- def description_raw
65
- @element.search('.rank-desc').text.nil? ? fetch_description_raw : @element.search('.rank-desc').to_s.tr('"', "'")
66
- end
67
-
68
- def fetch_description
69
- url = File.join(BASE_URL, @element.search('.rank-name a').first.attributes['href'].value)
70
- page = @agent.get(url)
71
- page.search('.summary .summary__txt').text
72
- end
73
-
74
- def fetch_description_raw
75
- url = File.join(BASE_URL, @element.search('.rank-name a').first.attributes['href'].value)
76
- page = @agent.get(url)
77
- page.search('.summary .summary__txt').to_s.gsub(/\sclass=".*"/, '').tr('"', "'")
78
- end
79
-
80
- def tags
81
- @element.search('.rank-labelListItem').map { |e| e.search('a').text.strip }
82
- end
83
- end
84
-
85
- private_constant :Attribute
86
-
87
28
  private
88
29
 
89
30
  def page
@@ -1,3 +1,3 @@
1
1
  module DMMCrawler
2
- VERSION = '0.1.1'.freeze
2
+ VERSION = '0.1.2'.freeze
3
3
  end
@@ -21,7 +21,7 @@ describe DMMCrawler::Ranking do
21
21
  let(:term) { '24' }
22
22
 
23
23
  it { is_expected.not_to be_empty }
24
- it { is_expected.to all(include(:title, :title_link, :image_url, :description, :description_raw, :tags)) }
24
+ it { is_expected.to all(include(:title, :title_link, :image_url, :description, :description_raw, :submedia, :tags)) }
25
25
  end
26
26
 
27
27
  context 'with not registered argument' do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dmm-crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Ohmori
@@ -115,6 +115,7 @@ files:
115
115
  - doc/ja/README.md
116
116
  - lib/dmm-crawler.rb
117
117
  - lib/dmm-crawler/agent.rb
118
+ - lib/dmm-crawler/attributes.rb
118
119
  - lib/dmm-crawler/ranking.rb
119
120
  - lib/dmm-crawler/version.rb
120
121
  - spec/dmm-crawler/ranking_spec.rb