harunica 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ee4c6d19662c8be0e7fd0ffe472f9b3426f6d35d
4
- data.tar.gz: b30bac0a332e19a8a5e22fd8ff848065836bd7ee
3
+ metadata.gz: cc57b37d7333f8c6ffc61d6ea007c3f531fb1d37
4
+ data.tar.gz: e0b398ca645b1fafb851d7c71fb587bbdbb89c4c
5
5
  SHA512:
6
- metadata.gz: d74519046fd2672bc76784914f0720dbe8bee6c3ae0bc1f8d27a37d52ec26f6491c16c06b197ba3ed0425710972bbf17485113ed9b62c67728e8433d907ab7fa
7
- data.tar.gz: 61284f26b82837aeb2a9329ffcf39e0ccaed18f4b3a7d7d153418a5e534992cb099fc319f04a20b95b02685a1085f303740a50faf723aff73cdf38427d8e0bd7
6
+ metadata.gz: c564b55142637589809e7c135a3dbb2f294129ffe25dba7e2cbcc50efd8201f906063b41bb71cd6fbc7f9c67e1fa3d97a630fd8281790e36e539eea5370a6b37
7
+ data.tar.gz: 416d39557acfcb5951fb6b15453d60f4e032c5f626e322c9fa67e9a9b962c18479d3995beca13b9500d5a8737c8ede56d478a5af0ab26f718a30d41825e68094
data/harunica.gemspec CHANGED
@@ -24,6 +24,7 @@ Gem::Specification.new do |spec|
24
24
  spec.add_development_dependency "bundler", "~> 1.10"
25
25
  spec.add_development_dependency "rake", "~> 10.0"
26
26
  spec.add_development_dependency "rspec"
27
+ spec.add_development_dependency "glint"
27
28
 
28
29
  spec.add_dependency "nokogiri"
29
30
  end
@@ -1,13 +1,17 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+
1
4
  module Harunica
2
5
  module Scraping
3
6
  class Html
4
- def initialize(charset, text)
5
- @charset = charset
6
- @text = text
7
+ attr_reader :url
8
+
9
+ def initialize(url)
10
+ @url = Url.new(url)
7
11
  end
8
12
 
9
13
  def doc
10
- @doc ||= ::Nokogiri::HTML.parse(@text, nil, @charset)
14
+ @doc ||= ::Nokogiri::HTML(::Kernel.open(url, &:read), nil, 'utf-8')
11
15
  end
12
16
  end
13
17
  end
@@ -1,4 +1,3 @@
1
- require 'open-uri'
2
1
  require 'nokogiri'
3
2
 
4
3
  module Harunica
@@ -23,31 +22,44 @@ module Harunica
23
22
  'a.pagerBtn.switchingBtn'
24
23
  ]
25
24
 
26
- attr_reader :url
25
+ attr_reader :html
27
26
 
28
- def initialize(url = INITIAL_URL)
29
- @url = url
27
+ def initialize(html)
28
+ @html = html || Html.new(url: INITIAL_URL)
30
29
  end
31
30
 
32
- def html
33
- @html ||= ::Kernel.open(url) { |f| Html.new(f.charset, f.read) }
31
+ def video_pages
32
+ @video_pages ||= html.doc.css(VIDEO_SELECTORS.join(' ')).map do |e|
33
+ url = html.url.base + e.css('.itemContent .itemTitle a')[0].attr('href')
34
+ VideoPage.new(Html.new(url))
35
+ end
34
36
  end
35
37
 
36
- def videos
37
- @videos ||= html.doc.css(VIDEO_SELECTORS.join(' '))
38
+ # 現在のページ以降のすべてのページの検索結果に含まれる動画のリストを取得する.
39
+ def all_video_pages
40
+ current = self
41
+ a = []
42
+ while current
43
+ a += current.video_pages
44
+ current = current.next
45
+ end
46
+ a
38
47
  end
39
48
 
40
49
  def next_link
41
- @next_link ||= html.doc.css(NEXT_LINK_SELECTOR.join(' ')).select { |e| e.content == '次へ' }
50
+ @next_link ||= begin
51
+ e = html.doc.css(NEXT_LINK_SELECTOR.join(' ')).select { |e| e.content == '次へ' }[0]
52
+ e && e.attr('href')
53
+ end
42
54
  end
43
55
 
44
56
  def next?
45
- !next_link.empty?
57
+ !!next_link
46
58
  end
47
59
 
48
60
  def next
49
61
  if next?
50
- self.class.new(next_link[0].attr('href'))
62
+ self.class.new(Html.new(next_link))
51
63
  else
52
64
  nil
53
65
  end
@@ -0,0 +1,16 @@
1
+ module Harunica
2
+ module Scraping
3
+ class Url < ::String
4
+ def initialize(s)
5
+ super s.to_s
6
+ end
7
+
8
+ def base
9
+ @base ||= ::URI.split(self).instance_exec do
10
+ base = "#{self[0]}://#{self[2]}"
11
+ Url.new(self[3] ? "#{base}:#{self[3]}" : base)
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,23 @@
1
+ module Harunica
2
+ module Scraping
3
+ class VideoPage
4
+ AUTHOR_SELECTORS = [
5
+ '#PAGEBODY',
6
+ '.wAdjust',
7
+ '.score-wrap',
8
+ '.score-item[itemprop="author"]',
9
+ 'strong[itemprop="name"]'
10
+ ]
11
+
12
+ attr_reader :html
13
+
14
+ def initialize(html)
15
+ @html = html
16
+ end
17
+
18
+ def author
19
+ @author ||= html.doc.css(AUTHOR_SELECTORS.join(' '))[0].content
20
+ end
21
+ end
22
+ end
23
+ end
@@ -2,6 +2,7 @@ module Harunica
2
2
  module Scraping
3
3
  autoload :Html, __dir__ + '/scraping/html'
4
4
  autoload :ListPage, __dir__ + '/scraping/list_page'
5
- autoload :Videoset, __dir__ + '/scraping/videoset'
5
+ autoload :Url, __dir__ + '/scraping/url'
6
+ autoload :VideoPage, __dir__ + '/scraping/video_page'
6
7
  end
7
8
  end
@@ -1,3 +1,3 @@
1
1
  module Harunica
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: harunica
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - mosop
@@ -53,6 +53,20 @@ dependencies:
53
53
  - - ">="
54
54
  - !ruby/object:Gem::Version
55
55
  version: '0'
56
+ - !ruby/object:Gem::Dependency
57
+ name: glint
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ type: :development
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
56
70
  - !ruby/object:Gem::Dependency
57
71
  name: nokogiri
58
72
  requirement: !ruby/object:Gem::Requirement
@@ -88,7 +102,8 @@ files:
88
102
  - lib/harunica/scraping.rb
89
103
  - lib/harunica/scraping/html.rb
90
104
  - lib/harunica/scraping/list_page.rb
91
- - lib/harunica/scraping/videoset.rb
105
+ - lib/harunica/scraping/url.rb
106
+ - lib/harunica/scraping/video_page.rb
92
107
  - lib/harunica/version.rb
93
108
  homepage: https://github.com/mosop/harunica-gem
94
109
  licenses:
@@ -1,16 +0,0 @@
1
- module Harunica
2
- module Scraping
3
- class Videoset
4
- def items
5
- @videos ||= begin
6
- page = ListPage.new
7
- a = page.videos
8
- while page = page.next
9
- a += page.videos
10
- end
11
- a
12
- end
13
- end
14
- end
15
- end
16
- end