harunica 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/harunica.gemspec +1 -0
- data/lib/harunica/scraping/html.rb +8 -4
- data/lib/harunica/scraping/list_page.rb +23 -11
- data/lib/harunica/scraping/url.rb +16 -0
- data/lib/harunica/scraping/video_page.rb +23 -0
- data/lib/harunica/scraping.rb +2 -1
- data/lib/harunica/version.rb +1 -1
- metadata +17 -2
- data/lib/harunica/scraping/videoset.rb +0 -16
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: cc57b37d7333f8c6ffc61d6ea007c3f531fb1d37
|
4
|
+
data.tar.gz: e0b398ca645b1fafb851d7c71fb587bbdbb89c4c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c564b55142637589809e7c135a3dbb2f294129ffe25dba7e2cbcc50efd8201f906063b41bb71cd6fbc7f9c67e1fa3d97a630fd8281790e36e539eea5370a6b37
|
7
|
+
data.tar.gz: 416d39557acfcb5951fb6b15453d60f4e032c5f626e322c9fa67e9a9b962c18479d3995beca13b9500d5a8737c8ede56d478a5af0ab26f718a30d41825e68094
|
data/harunica.gemspec
CHANGED
@@ -24,6 +24,7 @@ Gem::Specification.new do |spec|
|
|
24
24
|
spec.add_development_dependency "bundler", "~> 1.10"
|
25
25
|
spec.add_development_dependency "rake", "~> 10.0"
|
26
26
|
spec.add_development_dependency "rspec"
|
27
|
+
spec.add_development_dependency "glint"
|
27
28
|
|
28
29
|
spec.add_dependency "nokogiri"
|
29
30
|
end
|
@@ -1,13 +1,17 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'open-uri'
|
3
|
+
|
1
4
|
module Harunica
|
2
5
|
module Scraping
|
3
6
|
class Html
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
+
attr_reader :url
|
8
|
+
|
9
|
+
def initialize(url)
|
10
|
+
@url = Url.new(url)
|
7
11
|
end
|
8
12
|
|
9
13
|
def doc
|
10
|
-
@doc ||= ::Nokogiri::HTML.
|
14
|
+
@doc ||= ::Nokogiri::HTML(::Kernel.open(url, &:read), nil, 'utf-8')
|
11
15
|
end
|
12
16
|
end
|
13
17
|
end
|
@@ -1,4 +1,3 @@
|
|
1
|
-
require 'open-uri'
|
2
1
|
require 'nokogiri'
|
3
2
|
|
4
3
|
module Harunica
|
@@ -23,31 +22,44 @@ module Harunica
|
|
23
22
|
'a.pagerBtn.switchingBtn'
|
24
23
|
]
|
25
24
|
|
26
|
-
attr_reader :
|
25
|
+
attr_reader :html
|
27
26
|
|
28
|
-
def initialize(
|
29
|
-
@
|
27
|
+
def initialize(html)
|
28
|
+
@html = html || Html.new(url: INITIAL_URL)
|
30
29
|
end
|
31
30
|
|
32
|
-
def
|
33
|
-
@
|
31
|
+
def video_pages
|
32
|
+
@video_pages ||= html.doc.css(VIDEO_SELECTORS.join(' ')).map do |e|
|
33
|
+
url = html.url.base + e.css('.itemContent .itemTitle a')[0].attr('href')
|
34
|
+
VideoPage.new(Html.new(url))
|
35
|
+
end
|
34
36
|
end
|
35
37
|
|
36
|
-
|
37
|
-
|
38
|
+
# 現在のページ以降のすべてのページの検索結果に含まれる動画のリストを取得する.
|
39
|
+
def all_video_pages
|
40
|
+
current = self
|
41
|
+
a = []
|
42
|
+
while current
|
43
|
+
a += current.video_pages
|
44
|
+
current = current.next
|
45
|
+
end
|
46
|
+
a
|
38
47
|
end
|
39
48
|
|
40
49
|
def next_link
|
41
|
-
@next_link ||=
|
50
|
+
@next_link ||= begin
|
51
|
+
e = html.doc.css(NEXT_LINK_SELECTOR.join(' ')).select { |e| e.content == '次へ' }[0]
|
52
|
+
e && e.attr('href')
|
53
|
+
end
|
42
54
|
end
|
43
55
|
|
44
56
|
def next?
|
45
|
-
|
57
|
+
!!next_link
|
46
58
|
end
|
47
59
|
|
48
60
|
def next
|
49
61
|
if next?
|
50
|
-
self.class.new(
|
62
|
+
self.class.new(Html.new(next_link))
|
51
63
|
else
|
52
64
|
nil
|
53
65
|
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
module Harunica
|
2
|
+
module Scraping
|
3
|
+
class Url < ::String
|
4
|
+
def initialize(s)
|
5
|
+
super s.to_s
|
6
|
+
end
|
7
|
+
|
8
|
+
def base
|
9
|
+
@base ||= ::URI.split(self).instance_exec do
|
10
|
+
base = "#{self[0]}://#{self[2]}"
|
11
|
+
Url.new(self[3] ? "#{base}:#{self[3]}" : base)
|
12
|
+
end
|
13
|
+
end
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Harunica
|
2
|
+
module Scraping
|
3
|
+
class VideoPage
|
4
|
+
AUTHOR_SELECTORS = [
|
5
|
+
'#PAGEBODY',
|
6
|
+
'.wAdjust',
|
7
|
+
'.score-wrap',
|
8
|
+
'.score-item[itemprop="author"]',
|
9
|
+
'strong[itemprop="name"]'
|
10
|
+
]
|
11
|
+
|
12
|
+
attr_reader :html
|
13
|
+
|
14
|
+
def initialize(html)
|
15
|
+
@html = html
|
16
|
+
end
|
17
|
+
|
18
|
+
def author
|
19
|
+
@author ||= html.doc.css(AUTHOR_SELECTORS.join(' '))[0].content
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
data/lib/harunica/scraping.rb
CHANGED
@@ -2,6 +2,7 @@ module Harunica
|
|
2
2
|
module Scraping
|
3
3
|
autoload :Html, __dir__ + '/scraping/html'
|
4
4
|
autoload :ListPage, __dir__ + '/scraping/list_page'
|
5
|
-
autoload :
|
5
|
+
autoload :Url, __dir__ + '/scraping/url'
|
6
|
+
autoload :VideoPage, __dir__ + '/scraping/video_page'
|
6
7
|
end
|
7
8
|
end
|
data/lib/harunica/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: harunica
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- mosop
|
@@ -53,6 +53,20 @@ dependencies:
|
|
53
53
|
- - ">="
|
54
54
|
- !ruby/object:Gem::Version
|
55
55
|
version: '0'
|
56
|
+
- !ruby/object:Gem::Dependency
|
57
|
+
name: glint
|
58
|
+
requirement: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
type: :development
|
64
|
+
prerelease: false
|
65
|
+
version_requirements: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
56
70
|
- !ruby/object:Gem::Dependency
|
57
71
|
name: nokogiri
|
58
72
|
requirement: !ruby/object:Gem::Requirement
|
@@ -88,7 +102,8 @@ files:
|
|
88
102
|
- lib/harunica/scraping.rb
|
89
103
|
- lib/harunica/scraping/html.rb
|
90
104
|
- lib/harunica/scraping/list_page.rb
|
91
|
-
- lib/harunica/scraping/
|
105
|
+
- lib/harunica/scraping/url.rb
|
106
|
+
- lib/harunica/scraping/video_page.rb
|
92
107
|
- lib/harunica/version.rb
|
93
108
|
homepage: https://github.com/mosop/harunica-gem
|
94
109
|
licenses:
|