harunica 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/harunica.gemspec +1 -0
- data/lib/harunica/scraping/html.rb +8 -4
- data/lib/harunica/scraping/list_page.rb +23 -11
- data/lib/harunica/scraping/url.rb +16 -0
- data/lib/harunica/scraping/video_page.rb +23 -0
- data/lib/harunica/scraping.rb +2 -1
- data/lib/harunica/version.rb +1 -1
- metadata +17 -2
- data/lib/harunica/scraping/videoset.rb +0 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cc57b37d7333f8c6ffc61d6ea007c3f531fb1d37
|
4
|
+
data.tar.gz: e0b398ca645b1fafb851d7c71fb587bbdbb89c4c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c564b55142637589809e7c135a3dbb2f294129ffe25dba7e2cbcc50efd8201f906063b41bb71cd6fbc7f9c67e1fa3d97a630fd8281790e36e539eea5370a6b37
|
7
|
+
data.tar.gz: 416d39557acfcb5951fb6b15453d60f4e032c5f626e322c9fa67e9a9b962c18479d3995beca13b9500d5a8737c8ede56d478a5af0ab26f718a30d41825e68094
|
data/harunica.gemspec
CHANGED
@@ -24,6 +24,7 @@ Gem::Specification.new do |spec|
|
|
24
24
|
spec.add_development_dependency "bundler", "~> 1.10"
|
25
25
|
spec.add_development_dependency "rake", "~> 10.0"
|
26
26
|
spec.add_development_dependency "rspec"
|
27
|
+
spec.add_development_dependency "glint"
|
27
28
|
|
28
29
|
spec.add_dependency "nokogiri"
|
29
30
|
end
|
@@ -1,13 +1,17 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'open-uri'
|
3
|
+
|
1
4
|
module Harunica
|
2
5
|
module Scraping
|
3
6
|
class Html
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
+
attr_reader :url
|
8
|
+
|
9
|
+
def initialize(url)
|
10
|
+
@url = Url.new(url)
|
7
11
|
end
|
8
12
|
|
9
13
|
def doc
|
10
|
-
@doc ||= ::Nokogiri::HTML.
|
14
|
+
@doc ||= ::Nokogiri::HTML(::Kernel.open(url, &:read), nil, 'utf-8')
|
11
15
|
end
|
12
16
|
end
|
13
17
|
end
|
@@ -1,4 +1,3 @@
|
|
1
|
-
require 'open-uri'
|
2
1
|
require 'nokogiri'
|
3
2
|
|
4
3
|
module Harunica
|
@@ -23,31 +22,44 @@ module Harunica
|
|
23
22
|
'a.pagerBtn.switchingBtn'
|
24
23
|
]
|
25
24
|
|
26
|
-
attr_reader :
|
25
|
+
attr_reader :html
|
27
26
|
|
28
|
-
def initialize(
|
29
|
-
@
|
27
|
+
def initialize(html)
|
28
|
+
@html = html || Html.new(url: INITIAL_URL)
|
30
29
|
end
|
31
30
|
|
32
|
-
def
|
33
|
-
@
|
31
|
+
def video_pages
|
32
|
+
@video_pages ||= html.doc.css(VIDEO_SELECTORS.join(' ')).map do |e|
|
33
|
+
url = html.url.base + e.css('.itemContent .itemTitle a')[0].attr('href')
|
34
|
+
VideoPage.new(Html.new(url))
|
35
|
+
end
|
34
36
|
end
|
35
37
|
|
36
|
-
|
37
|
-
|
38
|
+
# 現在のページ以降のすべてのページの検索結果に含まれる動画のリストを取得する.
|
39
|
+
def all_video_pages
|
40
|
+
current = self
|
41
|
+
a = []
|
42
|
+
while current
|
43
|
+
a += current.video_pages
|
44
|
+
current = current.next
|
45
|
+
end
|
46
|
+
a
|
38
47
|
end
|
39
48
|
|
40
49
|
def next_link
|
41
|
-
@next_link ||=
|
50
|
+
@next_link ||= begin
|
51
|
+
e = html.doc.css(NEXT_LINK_SELECTOR.join(' ')).select { |e| e.content == '次へ' }[0]
|
52
|
+
e && e.attr('href')
|
53
|
+
end
|
42
54
|
end
|
43
55
|
|
44
56
|
def next?
|
45
|
-
|
57
|
+
!!next_link
|
46
58
|
end
|
47
59
|
|
48
60
|
def next
|
49
61
|
if next?
|
50
|
-
self.class.new(
|
62
|
+
self.class.new(Html.new(next_link))
|
51
63
|
else
|
52
64
|
nil
|
53
65
|
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Harunica
|
2
|
+
module Scraping
|
3
|
+
class Url < ::String
|
4
|
+
def initialize(s)
|
5
|
+
super s.to_s
|
6
|
+
end
|
7
|
+
|
8
|
+
def base
|
9
|
+
@base ||= ::URI.split(self).instance_exec do
|
10
|
+
base = "#{self[0]}://#{self[2]}"
|
11
|
+
Url.new(self[3] ? "#{base}:#{self[3]}" : base)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Harunica
|
2
|
+
module Scraping
|
3
|
+
class VideoPage
|
4
|
+
AUTHOR_SELECTORS = [
|
5
|
+
'#PAGEBODY',
|
6
|
+
'.wAdjust',
|
7
|
+
'.score-wrap',
|
8
|
+
'.score-item[itemprop="author"]',
|
9
|
+
'strong[itemprop="name"]'
|
10
|
+
]
|
11
|
+
|
12
|
+
attr_reader :html
|
13
|
+
|
14
|
+
def initialize(html)
|
15
|
+
@html = html
|
16
|
+
end
|
17
|
+
|
18
|
+
def author
|
19
|
+
@author ||= html.doc.css(AUTHOR_SELECTORS.join(' '))[0].content
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/lib/harunica/scraping.rb
CHANGED
@@ -2,6 +2,7 @@ module Harunica
|
|
2
2
|
module Scraping
|
3
3
|
autoload :Html, __dir__ + '/scraping/html'
|
4
4
|
autoload :ListPage, __dir__ + '/scraping/list_page'
|
5
|
-
autoload :
|
5
|
+
autoload :Url, __dir__ + '/scraping/url'
|
6
|
+
autoload :VideoPage, __dir__ + '/scraping/video_page'
|
6
7
|
end
|
7
8
|
end
|
data/lib/harunica/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: harunica
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- mosop
|
@@ -53,6 +53,20 @@ dependencies:
|
|
53
53
|
- - ">="
|
54
54
|
- !ruby/object:Gem::Version
|
55
55
|
version: '0'
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: glint
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
type: :development
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
56
70
|
- !ruby/object:Gem::Dependency
|
57
71
|
name: nokogiri
|
58
72
|
requirement: !ruby/object:Gem::Requirement
|
@@ -88,7 +102,8 @@ files:
|
|
88
102
|
- lib/harunica/scraping.rb
|
89
103
|
- lib/harunica/scraping/html.rb
|
90
104
|
- lib/harunica/scraping/list_page.rb
|
91
|
-
- lib/harunica/scraping/
|
105
|
+
- lib/harunica/scraping/url.rb
|
106
|
+
- lib/harunica/scraping/video_page.rb
|
92
107
|
- lib/harunica/version.rb
|
93
108
|
homepage: https://github.com/mosop/harunica-gem
|
94
109
|
licenses:
|