dmm-crawler 0.1.1 → 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: de55b4e7acaeea3d391451c3d2c7f52c70ee69f2
4
- data.tar.gz: 78e43451a03c47801ad2d79389ef873df6055822
3
+ metadata.gz: '06925a3fd6d470aaaf623d6b2da213c5b8ff631f'
4
+ data.tar.gz: 40dfc7eb3824eade889b6218d8c2042c4c62ae02
5
5
  SHA512:
6
- metadata.gz: 007f0a66ef96674827c9dbdee83bec84304e6932ecf3f9821a2c377a2904ac288cbfcbe56026c580a62d92a734af1dcf8d57bf4e19b4575e50f5b4051a7a915d
7
- data.tar.gz: cf63c892f60a777ce5ede4c2fb1746809eddbed0c273112e3120067fc800d94b810ff17fd3c7b0d5ae91ba6fe294e39889b8fd042293dadb49b906db481be339
6
+ metadata.gz: 32e43bd62c3af54c67b60eda38d7455f2209a9d3b2351b7893b94ec9329964c0a38c1ac9975fb3d6b720d2cd5d8be3c0c00a3d927c6021b1c2a9849f64bc3520
7
+ data.tar.gz: 42b519cd6268171d10634953c9392b5fdd1b220067dd7ca36c2e8ca1b2496c8d3b147026eb965417c52d7bde2a656caf0d39679d5c3a0fe18b79653f82008767
@@ -1,5 +1,10 @@
1
1
  # Change logs
2
2
 
3
+ ## 0.1.2
4
+ - Extract Attributes klass from Ranking klass.
5
+ - Support art page.
6
+ - Add an attribute for submedia.
7
+
3
8
  ## 0.1.1
4
9
  - Convert double quotes to single qoutes for description_raw.
5
10
 
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- dmm-crawler (0.1.1)
4
+ dmm-crawler (0.1.2)
5
5
  mechanize
6
6
 
7
7
  GEM
@@ -1,8 +1,10 @@
1
1
  require 'mechanize'
2
2
 
3
3
  module DMMCrawler
4
+ BASE_URL = 'http://www.dmm.co.jp'.freeze
4
5
  end
5
6
 
6
7
  require 'dmm-crawler/agent'
8
+ require 'dmm-crawler/attributes'
7
9
  require 'dmm-crawler/ranking'
8
10
  require 'dmm-crawler/version'
@@ -0,0 +1,89 @@
1
+ module DMMCrawler
2
+ class Attributes
3
+ def initialize(element, submedia = nil)
4
+ @agent = Agent.instance.agent
5
+ @element = element
6
+
7
+ @submedia = submedia
8
+ end
9
+
10
+ def to_a
11
+ [
12
+ title,
13
+ title_link,
14
+ image_url,
15
+ description,
16
+ description_raw,
17
+ submedia,
18
+ tags
19
+ ]
20
+ end
21
+
22
+ private
23
+
24
+ def title
25
+ if art_page?
26
+ @element.search('.productTitle__txt span').remove
27
+ @element.search('.productTitle__txt').text.strip
28
+ else
29
+ @element.search('.rank-name').first.text.strip
30
+ end
31
+ end
32
+
33
+ def image_url
34
+ if art_page?
35
+ @element.search('.productPreview__item img').last.attributes['data-src'].value
36
+ else
37
+ @element.search('img').last.attributes['src'].value
38
+ end
39
+ end
40
+
41
+ def title_link
42
+ if art_page?
43
+ @element.uri.to_s
44
+ else
45
+ File.join(BASE_URL, @element.search('.rank-name').first.search('a').first.attributes.first[1].value)
46
+ end
47
+ end
48
+
49
+ def description
50
+ if art_page?
51
+ @element.search('.summary .summary__txt').text
52
+ else
53
+ @element.search('.rank-desc').text
54
+ end
55
+ end
56
+
57
+ def description_raw
58
+ if art_page?
59
+ @element.search('.summary .summary__txt').to_s.gsub(/\sclass=".*"/, '').tr('"', "'")
60
+ else
61
+ @element.search('.rank-desc').to_s.tr('"', "'")
62
+ end
63
+ end
64
+
65
+ def submedia
66
+ return @submedia if @submedia
67
+
68
+ @element
69
+ .search('.productAttribute-listItem .c_icon_productGenre')
70
+ .first
71
+ .attributes['class']
72
+ .value
73
+ .gsub('c_icon_productGenre ', '')
74
+ .delete('-')
75
+ end
76
+
77
+ def tags
78
+ if art_page?
79
+ @element.search('.genreTagList .genreTagList__item a').map { |e| e.text.strip }
80
+ else
81
+ @element.search('.rank-labelListItem').map { |e| e.search('a').text.strip }
82
+ end
83
+ end
84
+
85
+ def art_page?
86
+ @element.search('.rank-name').empty?
87
+ end
88
+ end
89
+ end
@@ -1,7 +1,5 @@
1
1
  module DMMCrawler
2
2
  class Ranking
3
- BASE_URL = 'http://www.dmm.co.jp'.freeze
4
-
5
3
  def initialize(arguments)
6
4
  @term = discriminate_term(arguments[:term])
7
5
  @submedia = discriminate_submedia(arguments[:submedia])
@@ -11,79 +9,22 @@ module DMMCrawler
11
9
 
12
10
  def arts
13
11
  arts = page.search('.rank-rankListItem.fn-setPurchaseChange').map do |element|
14
- Attribute.new(element).to_a
12
+ Attributes.new(element, @submedia).to_a
15
13
  end
16
14
 
17
- arts.map.with_index(1) do |(title, title_link, image_url, description, description_raw, tags), rank|
15
+ arts.map.with_index(1) do |(title, title_link, image_url, description, description_raw, submedia, tags), rank|
18
16
  {
19
17
  title: "#{rank}位: #{title}",
20
18
  title_link: title_link,
21
19
  image_url: image_url,
22
20
  description: description,
23
21
  description_raw: description_raw,
22
+ submedia: submedia,
24
23
  tags: tags
25
24
  }
26
25
  end
27
26
  end
28
27
 
29
- class Attribute
30
- def initialize(element)
31
- @element = element
32
- @agent = Agent.instance.agent
33
- end
34
-
35
- def to_a
36
- [
37
- title,
38
- title_link,
39
- image_url,
40
- description,
41
- description_raw,
42
- tags
43
- ]
44
- end
45
-
46
- private
47
-
48
- def title
49
- @element.search('.rank-name').first.text.strip
50
- end
51
-
52
- def image_url
53
- @element.search('img').last.attributes['src'].value
54
- end
55
-
56
- def title_link
57
- File.join(BASE_URL, @element.search('.rank-name').first.search('a').first.attributes.first[1].value)
58
- end
59
-
60
- def description
61
- @element.search('.rank-desc').text.nil? ? fetch_description : @element.search('.rank-desc').text
62
- end
63
-
64
- def description_raw
65
- @element.search('.rank-desc').text.nil? ? fetch_description_raw : @element.search('.rank-desc').to_s.tr('"', "'")
66
- end
67
-
68
- def fetch_description
69
- url = File.join(BASE_URL, @element.search('.rank-name a').first.attributes['href'].value)
70
- page = @agent.get(url)
71
- page.search('.summary .summary__txt').text
72
- end
73
-
74
- def fetch_description_raw
75
- url = File.join(BASE_URL, @element.search('.rank-name a').first.attributes['href'].value)
76
- page = @agent.get(url)
77
- page.search('.summary .summary__txt').to_s.gsub(/\sclass=".*"/, '').tr('"', "'")
78
- end
79
-
80
- def tags
81
- @element.search('.rank-labelListItem').map { |e| e.search('a').text.strip }
82
- end
83
- end
84
-
85
- private_constant :Attribute
86
-
87
28
  private
88
29
 
89
30
  def page
@@ -1,3 +1,3 @@
1
1
  module DMMCrawler
2
- VERSION = '0.1.1'.freeze
2
+ VERSION = '0.1.2'.freeze
3
3
  end
@@ -21,7 +21,7 @@ describe DMMCrawler::Ranking do
21
21
  let(:term) { '24' }
22
22
 
23
23
  it { is_expected.not_to be_empty }
24
- it { is_expected.to all(include(:title, :title_link, :image_url, :description, :description_raw, :tags)) }
24
+ it { is_expected.to all(include(:title, :title_link, :image_url, :description, :description_raw, :submedia, :tags)) }
25
25
  end
26
26
 
27
27
  context 'with not registered argument' do
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: dmm-crawler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Satoshi Ohmori
@@ -115,6 +115,7 @@ files:
115
115
  - doc/ja/README.md
116
116
  - lib/dmm-crawler.rb
117
117
  - lib/dmm-crawler/agent.rb
118
+ - lib/dmm-crawler/attributes.rb
118
119
  - lib/dmm-crawler/ranking.rb
119
120
  - lib/dmm-crawler/version.rb
120
121
  - spec/dmm-crawler/ranking_spec.rb