javlibrary 0.1.5 → 0.1.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/javlibrary.rb +259 -7
- data/lib/javlibrary/database.rb +4 -0
- data/lib/javlibrary/downloader.rb +13 -0
- data/lib/javlibrary/info_hash.rb +2 -0
- data/lib/javlibrary/init.rb +6 -0
- data/lib/javlibrary/name.rb +6 -0
- data/lib/javlibrary/version.rb +1 -1
- data/lib/javlibrary/video.rb +9 -0
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a9f057e97a5ad367420e48abd74ce00766698321
|
4
|
+
data.tar.gz: ff8adfe4c7e0cc48c27df322f0a87185eaf419ca
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cf3bfbf99a201d5110e4228b041a3de500ab0047d4208b22787afbd375fe5ff55eab8331ced816e25dcf7199126d885d140aff610ec3ecff0cbdc2a4584f357e
|
7
|
+
data.tar.gz: 8a64494d675a6242c1e01b75d8176065d68b4a521b1b971c59b38a28ce4725cb481205ef0882e618388970c04cb52740b648dd1823d7f4b544a730c988eb2d15
|
data/lib/javlibrary.rb
CHANGED
@@ -6,13 +6,265 @@ require 'nokogiri'
|
|
6
6
|
require 'mysql2'
|
7
7
|
require 'pp'
|
8
8
|
|
9
|
-
require_relative 'javlibrary/init'
|
10
|
-
require_relative 'javlibrary/database'
|
11
|
-
require_relative 'javlibrary/downloader'
|
12
|
-
require_relative 'javlibrary/info_hash'
|
13
|
-
require_relative 'javlibrary/name'
|
14
|
-
require_relative 'javlibrary/video'
|
15
|
-
|
16
9
|
module Javlibrary
|
10
|
+
def client
|
11
|
+
client = Mysql2::Client.new(:host => "127.0.0.1",
|
12
|
+
:username => "root",
|
13
|
+
:password => "XuHefeng",
|
14
|
+
:database => "javlibrary_new")
|
15
|
+
end
|
16
|
+
|
17
|
+
def Javlibrary.downloader(identifer)
|
18
|
+
baseurl = "http://www.jav11b.com/cn/?v=#{identifer}"
|
19
|
+
response = Mechanize.new
|
20
|
+
response.user_agent = Mechanize::AGENT_ALIASES.values[rand(21)]
|
21
|
+
begin
|
22
|
+
response.get baseurl
|
23
|
+
rescue
|
24
|
+
retry
|
25
|
+
end
|
26
|
+
|
27
|
+
doc = Nokogiri::HTML(response.page.body)
|
28
|
+
|
29
|
+
video_title, details, video_genres, video_jacket_img = String.new, Array.new, String.new, String.new
|
30
|
+
|
31
|
+
video_title = doc.search('div[@id="video_title"]/h3/a').children.text
|
32
|
+
doc.search('//div[@id="video_info"]/div[@class="item"]/table/tr/td[@class="text"]').map do |row|
|
33
|
+
details << row.children.text
|
34
|
+
end
|
35
|
+
|
36
|
+
doc.search('//div[@id="video_genres"]/table/tr/td[@class="text"]/span[@class="genre"]/a').each do |row|
|
37
|
+
video_genres << row.children.text << " "
|
38
|
+
end
|
39
|
+
|
40
|
+
doc.search('//img[@id="video_jacket_img"]').each do |row|
|
41
|
+
video_jacket_img = row['src']
|
42
|
+
end
|
43
|
+
|
44
|
+
# return data format: title$id$date$director$maker$label$cast$genres$img_url
|
45
|
+
"#{video_title}$#{details[0]}$#{details[1]}$#{details[2]}$#{details[3]}$#{details[4]}$#{details[-1]}$#{video_genres}$#{video_jacket_img}"
|
46
|
+
#result = Hash.new
|
47
|
+
#result["title"] = video_title; result["id"] = details[0]; result["date"] = details[1]
|
48
|
+
#result["director"] = details[2]; result["maker"] = details[3]; result["label"] = details[4]
|
49
|
+
#result["cast"] = details[-1]; result["genres"] = video_genres; result["img_url"] = video_jacket_img
|
50
|
+
end
|
51
|
+
|
52
|
+
def Javlibrary.video_info_insert(client, index, identifer, actor_hash, genres_hash)
|
53
|
+
result = downloader(identifer)
|
54
|
+
title, id, date, director, maker, label, cast_tmp, genres_tmp, img_url = downloader(identifer).split('$')
|
55
|
+
cast = cast_tmp.split.reject(&:empty?)
|
56
|
+
genres = genres_tmp.split.reject(&:empty?)
|
57
|
+
begin
|
58
|
+
client.query("INSERT INTO video (video_id,video_name,license,url,director,label,date,maker)
|
59
|
+
VALUES (#{index},'#{title}','#{id}','#{img_url}','#{director}','#{label}','#{date}','#{maker}')")
|
60
|
+
rescue
|
61
|
+
return
|
62
|
+
end
|
63
|
+
cast.each do |a|
|
64
|
+
a_tmp = actor_hash[a]
|
65
|
+
next if a_tmp == nil
|
66
|
+
client.query("INSERT INTO v2a (v2a_fk_video,v2a_fk_actor) VALUES(#{index}, #{a_tmp.to_i})")
|
67
|
+
end
|
68
|
+
|
69
|
+
genres.each do |g|
|
70
|
+
g_tmp = genres_hash[g]
|
71
|
+
next if g_tmp == nil
|
72
|
+
client.query("INSERT INTO v2c (v2c_fk_video,v2c_fk_category) VALUES(#{index}, #{g_tmp.to_i})")
|
73
|
+
end
|
74
|
+
|
75
|
+
client.query("UPDATE label SET video_download=1 WHERE video_num=#{index}")
|
76
|
+
return nil
|
77
|
+
end
|
78
|
+
|
79
|
+
def download_all_video
|
80
|
+
client = Javlibrary.client
|
81
|
+
result = client.query("SELECT video_num, video_label FROM label WHERE video_download=0")
|
82
|
+
client.close
|
83
|
+
|
84
|
+
video_array = Array.new
|
85
|
+
result.each do |item|
|
86
|
+
video_array << item
|
87
|
+
end
|
88
|
+
|
89
|
+
video_array = video_array.each_slice(5000).to_a
|
90
|
+
actor_hash = Javlibrary::actor_hash
|
91
|
+
genre_hash = Javlibrary::genre_hash
|
92
|
+
thread_pool = Array::new
|
93
|
+
|
94
|
+
video_array.each do |group|
|
95
|
+
# Create a download thread
|
96
|
+
thread_temp = Thread.new {
|
97
|
+
client = Javlibrary.client
|
98
|
+
group.each do |item|
|
99
|
+
begin
|
100
|
+
video_info_insert(client, item['video_num'], item['video_label'],
|
101
|
+
actor_hash, genre_hash)
|
102
|
+
rescue
|
103
|
+
next
|
104
|
+
end
|
105
|
+
end
|
106
|
+
client.close
|
107
|
+
}
|
108
|
+
thread_pool << thread_temp
|
109
|
+
end
|
110
|
+
thread_pool.map(&:join)
|
111
|
+
end
|
112
|
+
|
113
|
+
def actor_hash
|
114
|
+
client = Javlibrary.client
|
115
|
+
actor_hash = Hash.new
|
116
|
+
client.query("SELECT * FROM actor").each do |item|
|
117
|
+
actor_hash["#{item['actor_name']}"] = item['actor_id']
|
118
|
+
end
|
119
|
+
client.close
|
120
|
+
|
121
|
+
actor_hash
|
122
|
+
end
|
123
|
+
|
124
|
+
def genre_hash
|
125
|
+
client = Javlibrary.client
|
126
|
+
category_hash = Hash.new
|
127
|
+
client.query("SELECT * FROM category").each do |item|
|
128
|
+
category_hash["#{item['category_name']}"] = item['category_id']
|
129
|
+
end
|
130
|
+
client.close
|
131
|
+
|
132
|
+
category_hash
|
133
|
+
end
|
134
|
+
|
135
|
+
def Javlibrary.genres
|
136
|
+
response = Mechanize.new; genres = Array.new
|
137
|
+
begin
|
138
|
+
response.get "http://www.jav11b.com/cn/genres.php"
|
139
|
+
rescue
|
140
|
+
retry
|
141
|
+
end
|
142
|
+
|
143
|
+
Nokogiri::HTML(response.page.body).search('//div[@class="genreitem"]/a').each do |row|
|
144
|
+
genres << row.children.text
|
145
|
+
end
|
146
|
+
genres.uniq
|
147
|
+
end
|
148
|
+
|
149
|
+
def genres_insert
|
150
|
+
client = Javlibrary.client
|
151
|
+
genres = genres()
|
152
|
+
genres.each do |e|
|
153
|
+
begin
|
154
|
+
client.query("INSERT INTO category (category_name) VALUES ('#{e}')")
|
155
|
+
rescue
|
156
|
+
next
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
client.close
|
161
|
+
end
|
162
|
+
|
163
|
+
def Javlibrary.author_page_num(nokogiri_doc)
|
164
|
+
last_page = 1
|
165
|
+
nokogiri_doc.search('//div[@class="page_selector"]/a[@class="page last"]').each do |row|
|
166
|
+
last_page = row['href'].split("=")[-1].to_i
|
167
|
+
end
|
168
|
+
last_page
|
169
|
+
end
|
170
|
+
|
171
|
+
def get_all_actor
|
172
|
+
firsturl = "http://www.jav11b.com/cn/star_list.php?prefix="
|
173
|
+
|
174
|
+
client = Javlibrary.client
|
175
|
+
'A'.upto('Z') do |alphabet|
|
176
|
+
tempurl = firsturl + alphabet
|
177
|
+
response = RestClient.get tempurl
|
178
|
+
|
179
|
+
doc = Nokogiri::HTML(response.body)
|
180
|
+
last_page = author_page_num(doc)
|
181
|
+
|
182
|
+
1.upto(last_page) do |page_num|
|
183
|
+
temp_page_url = tempurl + "&page=#{page_num.to_s}"
|
184
|
+
response_page = RestClient.get temp_page_url
|
185
|
+
doc_page = Nokogiri::HTML(response_page.body)
|
186
|
+
doc_page.search('//div[@class="starbox"]/div[@class="searchitem"]/a').each do |row|
|
187
|
+
# row.text Actor.name
|
188
|
+
# row['href'].split("=")[-1] Actor.label
|
189
|
+
name = row.text; label = row['href'].split("=")[-1]
|
190
|
+
begin
|
191
|
+
client.query("INSERT INTO actor (actor_name, actor_label, type)
|
192
|
+
VALUES ('#{name}', '#{label}', '#{alphabet}')")
|
193
|
+
rescue
|
194
|
+
next
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
client.close
|
201
|
+
end
|
202
|
+
|
203
|
+
def Javlibrary.download_video_label(actor_id)
|
204
|
+
firsturl = "http://www.jav11b.com/ja/vl_star.php?s=#{actor_id}"
|
205
|
+
baseurl = "http://www.jav11b.com/ja/vl_star.php?&mode=&s=#{actor_id}&page="
|
206
|
+
|
207
|
+
begin
|
208
|
+
response = RestClient.get firsturl
|
209
|
+
rescue
|
210
|
+
retry
|
211
|
+
end
|
212
|
+
|
213
|
+
doc = Nokogiri::HTML(response.body)
|
214
|
+
last_page = 1
|
215
|
+
doc.search('//div[@class="page_selector"]/a[@class="page last"]').each do |row|
|
216
|
+
last_page = row['href'].split("=")[-1].to_i
|
217
|
+
end
|
218
|
+
|
219
|
+
result = []
|
220
|
+
1.upto(last_page) do |page|
|
221
|
+
tempurl = baseurl + page.to_s
|
222
|
+
response = RestClient.get tempurl
|
223
|
+
Nokogiri::HTML(response.body).search('//div[@class="video"]/a').each do |row|
|
224
|
+
# Data:
|
225
|
+
# Video_label: row['href'].split("=")[-1]
|
226
|
+
# Video_title: row['title']
|
227
|
+
# client.query("INSERT INTO label (lable) VALUES ('#{row['href'].split("=")[-1]}')")
|
228
|
+
result << row['href'].split("=")[-1]
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
client = Javlibrary.client
|
233
|
+
result.each do |e|
|
234
|
+
begin
|
235
|
+
client.query("INSERT INTO label (video_label, video_download) VALUES ('#{e}', '0')")
|
236
|
+
rescue
|
237
|
+
next
|
238
|
+
end
|
239
|
+
end
|
240
|
+
client.close
|
241
|
+
end
|
242
|
+
|
243
|
+
def Javlibrary.select_actor(type)
|
244
|
+
client = Javlibrary.client
|
245
|
+
result = client.query("SELECT actor_label FROM actor WHERE type='#{type}'")
|
246
|
+
client.close
|
247
|
+
|
248
|
+
result.each do |e|
|
249
|
+
download_video_label(e["actor_label"])
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
def download_all_video_label
|
254
|
+
thread_pool =[]
|
255
|
+
'A'.upto('Z').each do |alphabet|
|
256
|
+
thread_temp = Thread.new{
|
257
|
+
select_actor(alphabet)
|
258
|
+
}
|
259
|
+
thread_pool << thread_temp
|
260
|
+
end
|
261
|
+
thread_pool.map(&:join)
|
262
|
+
end
|
17
263
|
|
264
|
+
module_function :client
|
265
|
+
module_function :download_all_video
|
266
|
+
module_function :actor_hash, :genre_hash
|
267
|
+
module_function :genres_insert
|
268
|
+
module_function :get_all_actor
|
269
|
+
module_function :download_all_video_label
|
18
270
|
end
|
data/lib/javlibrary/database.rb
CHANGED
@@ -1,5 +1,13 @@
|
|
1
1
|
# encoding:utf-8
|
2
2
|
|
3
|
+
require 'mysql2'
|
4
|
+
require 'mechanize'
|
5
|
+
require 'mechanize'
|
6
|
+
require 'pp'
|
7
|
+
|
8
|
+
require_relative 'database'
|
9
|
+
require_relative 'info_hash'
|
10
|
+
|
3
11
|
module Javlibrary
|
4
12
|
def Javlibrary.downloader(identifer)
|
5
13
|
baseurl = "http://www.jav11b.com/cn/?v=#{identifer}"
|
@@ -97,6 +105,11 @@ module Javlibrary
|
|
97
105
|
thread_pool.map(&:join)
|
98
106
|
end
|
99
107
|
|
108
|
+
def test
|
109
|
+
pp downloader('javlia322m')
|
110
|
+
end
|
111
|
+
|
112
|
+
module_function :test
|
100
113
|
module_function :download_all_video
|
101
114
|
end
|
102
115
|
|
data/lib/javlibrary/info_hash.rb
CHANGED
data/lib/javlibrary/init.rb
CHANGED
data/lib/javlibrary/name.rb
CHANGED
data/lib/javlibrary/version.rb
CHANGED
data/lib/javlibrary/video.rb
CHANGED
@@ -1,5 +1,14 @@
|
|
1
1
|
# encoding:utf-8
|
2
2
|
|
3
|
+
require 'rest-client'
|
4
|
+
require 'nokogiri'
|
5
|
+
require 'mysql2'
|
6
|
+
require 'redis'
|
7
|
+
require 'pp'
|
8
|
+
|
9
|
+
require_relative 'database'
|
10
|
+
|
11
|
+
|
3
12
|
module Javlibrary
|
4
13
|
def Javlibrary.download_video_label(actor_id)
|
5
14
|
firsturl = "http://www.jav11b.com/ja/vl_star.php?s=#{actor_id}"
|