harunica 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ee4c6d19662c8be0e7fd0ffe472f9b3426f6d35d
4
- data.tar.gz: b30bac0a332e19a8a5e22fd8ff848065836bd7ee
3
+ metadata.gz: cc57b37d7333f8c6ffc61d6ea007c3f531fb1d37
4
+ data.tar.gz: e0b398ca645b1fafb851d7c71fb587bbdbb89c4c
5
5
  SHA512:
6
- metadata.gz: d74519046fd2672bc76784914f0720dbe8bee6c3ae0bc1f8d27a37d52ec26f6491c16c06b197ba3ed0425710972bbf17485113ed9b62c67728e8433d907ab7fa
7
- data.tar.gz: 61284f26b82837aeb2a9329ffcf39e0ccaed18f4b3a7d7d153418a5e534992cb099fc319f04a20b95b02685a1085f303740a50faf723aff73cdf38427d8e0bd7
6
+ metadata.gz: c564b55142637589809e7c135a3dbb2f294129ffe25dba7e2cbcc50efd8201f906063b41bb71cd6fbc7f9c67e1fa3d97a630fd8281790e36e539eea5370a6b37
7
+ data.tar.gz: 416d39557acfcb5951fb6b15453d60f4e032c5f626e322c9fa67e9a9b962c18479d3995beca13b9500d5a8737c8ede56d478a5af0ab26f718a30d41825e68094
data/harunica.gemspec CHANGED
@@ -24,6 +24,7 @@ Gem::Specification.new do |spec|
24
24
  spec.add_development_dependency "bundler", "~> 1.10"
25
25
  spec.add_development_dependency "rake", "~> 10.0"
26
26
  spec.add_development_dependency "rspec"
27
+ spec.add_development_dependency "glint"
27
28
 
28
29
  spec.add_dependency "nokogiri"
29
30
  end
@@ -1,13 +1,17 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+
1
4
  module Harunica
2
5
  module Scraping
3
6
  class Html
4
- def initialize(charset, text)
5
- @charset = charset
6
- @text = text
7
+ attr_reader :url
8
+
9
+ def initialize(url)
10
+ @url = Url.new(url)
7
11
  end
8
12
 
9
13
  def doc
10
- @doc ||= ::Nokogiri::HTML.parse(@text, nil, @charset)
14
+ @doc ||= ::Nokogiri::HTML(::Kernel.open(url, &:read), nil, 'utf-8')
11
15
  end
12
16
  end
13
17
  end
@@ -1,4 +1,3 @@
1
- require 'open-uri'
2
1
  require 'nokogiri'
3
2
 
4
3
  module Harunica
@@ -23,31 +22,44 @@ module Harunica
23
22
  'a.pagerBtn.switchingBtn'
24
23
  ]
25
24
 
26
- attr_reader :url
25
+ attr_reader :html
27
26
 
28
- def initialize(url = INITIAL_URL)
29
- @url = url
27
+ def initialize(html)
28
+ @html = html || Html.new(url: INITIAL_URL)
30
29
  end
31
30
 
32
- def html
33
- @html ||= ::Kernel.open(url) { |f| Html.new(f.charset, f.read) }
31
+ def video_pages
32
+ @video_pages ||= html.doc.css(VIDEO_SELECTORS.join(' ')).map do |e|
33
+ url = html.url.base + e.css('.itemContent .itemTitle a')[0].attr('href')
34
+ VideoPage.new(Html.new(url))
35
+ end
34
36
  end
35
37
 
36
- def videos
37
- @videos ||= html.doc.css(VIDEO_SELECTORS.join(' '))
38
+ # 現在のページ以降のすべてのページの検索結果に含まれる動画のリストを取得する.
39
+ def all_video_pages
40
+ current = self
41
+ a = []
42
+ while current
43
+ a += current.video_pages
44
+ current = current.next
45
+ end
46
+ a
38
47
  end
39
48
 
40
49
  def next_link
41
- @next_link ||= html.doc.css(NEXT_LINK_SELECTOR.join(' ')).select { |e| e.content == '次へ' }
50
+ @next_link ||= begin
51
+ e = html.doc.css(NEXT_LINK_SELECTOR.join(' ')).select { |e| e.content == '次へ' }[0]
52
+ e && e.attr('href')
53
+ end
42
54
  end
43
55
 
44
56
  def next?
45
- !next_link.empty?
57
+ !!next_link
46
58
  end
47
59
 
48
60
  def next
49
61
  if next?
50
- self.class.new(next_link[0].attr('href'))
62
+ self.class.new(Html.new(next_link))
51
63
  else
52
64
  nil
53
65
  end
@@ -0,0 +1,16 @@
1
+ module Harunica
2
+ module Scraping
3
+ class Url < ::String
4
+ def initialize(s)
5
+ super s.to_s
6
+ end
7
+
8
+ def base
9
+ @base ||= ::URI.split(self).instance_exec do
10
+ base = "#{self[0]}://#{self[2]}"
11
+ Url.new(self[3] ? "#{base}:#{self[3]}" : base)
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,23 @@
1
+ module Harunica
2
+ module Scraping
3
+ class VideoPage
4
+ AUTHOR_SELECTORS = [
5
+ '#PAGEBODY',
6
+ '.wAdjust',
7
+ '.score-wrap',
8
+ '.score-item[itemprop="author"]',
9
+ 'strong[itemprop="name"]'
10
+ ]
11
+
12
+ attr_reader :html
13
+
14
+ def initialize(html)
15
+ @html = html
16
+ end
17
+
18
+ def author
19
+ @author ||= html.doc.css(AUTHOR_SELECTORS.join(' '))[0].content
20
+ end
21
+ end
22
+ end
23
+ end
@@ -2,6 +2,7 @@ module Harunica
2
2
  module Scraping
3
3
  autoload :Html, __dir__ + '/scraping/html'
4
4
  autoload :ListPage, __dir__ + '/scraping/list_page'
5
- autoload :Videoset, __dir__ + '/scraping/videoset'
5
+ autoload :Url, __dir__ + '/scraping/url'
6
+ autoload :VideoPage, __dir__ + '/scraping/video_page'
6
7
  end
7
8
  end
@@ -1,3 +1,3 @@
1
1
  module Harunica
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: harunica
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - mosop
@@ -53,6 +53,20 @@ dependencies:
53
53
  - - ">="
54
54
  - !ruby/object:Gem::Version
55
55
  version: '0'
56
+ - !ruby/object:Gem::Dependency
57
+ name: glint
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: '0'
63
+ type: :development
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - ">="
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
56
70
  - !ruby/object:Gem::Dependency
57
71
  name: nokogiri
58
72
  requirement: !ruby/object:Gem::Requirement
@@ -88,7 +102,8 @@ files:
88
102
  - lib/harunica/scraping.rb
89
103
  - lib/harunica/scraping/html.rb
90
104
  - lib/harunica/scraping/list_page.rb
91
- - lib/harunica/scraping/videoset.rb
105
+ - lib/harunica/scraping/url.rb
106
+ - lib/harunica/scraping/video_page.rb
92
107
  - lib/harunica/version.rb
93
108
  homepage: https://github.com/mosop/harunica-gem
94
109
  licenses:
@@ -1,16 +0,0 @@
1
- module Harunica
2
- module Scraping
3
- class Videoset
4
- def items
5
- @videos ||= begin
6
- page = ListPage.new
7
- a = page.videos
8
- while page = page.next
9
- a += page.videos
10
- end
11
- a
12
- end
13
- end
14
- end
15
- end
16
- end