javlibrary 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/javlibrary.rb +259 -7
- data/lib/javlibrary/database.rb +4 -0
- data/lib/javlibrary/downloader.rb +13 -0
- data/lib/javlibrary/info_hash.rb +2 -0
- data/lib/javlibrary/init.rb +6 -0
- data/lib/javlibrary/name.rb +6 -0
- data/lib/javlibrary/version.rb +1 -1
- data/lib/javlibrary/video.rb +9 -0
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a9f057e97a5ad367420e48abd74ce00766698321
|
4
|
+
data.tar.gz: ff8adfe4c7e0cc48c27df322f0a87185eaf419ca
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cf3bfbf99a201d5110e4228b041a3de500ab0047d4208b22787afbd375fe5ff55eab8331ced816e25dcf7199126d885d140aff610ec3ecff0cbdc2a4584f357e
|
7
|
+
data.tar.gz: 8a64494d675a6242c1e01b75d8176065d68b4a521b1b971c59b38a28ce4725cb481205ef0882e618388970c04cb52740b648dd1823d7f4b544a730c988eb2d15
|
data/lib/javlibrary.rb
CHANGED
@@ -6,13 +6,265 @@ require 'nokogiri'
|
|
6
6
|
require 'mysql2'
|
7
7
|
require 'pp'
|
8
8
|
|
9
|
-
require_relative 'javlibrary/init'
|
10
|
-
require_relative 'javlibrary/database'
|
11
|
-
require_relative 'javlibrary/downloader'
|
12
|
-
require_relative 'javlibrary/info_hash'
|
13
|
-
require_relative 'javlibrary/name'
|
14
|
-
require_relative 'javlibrary/video'
|
15
|
-
|
16
9
|
module Javlibrary
|
10
|
+
def client
|
11
|
+
client = Mysql2::Client.new(:host => "127.0.0.1",
|
12
|
+
:username => "root",
|
13
|
+
:password => "XuHefeng",
|
14
|
+
:database => "javlibrary_new")
|
15
|
+
end
|
16
|
+
|
17
|
+
def Javlibrary.downloader(identifer)
|
18
|
+
baseurl = "http://www.jav11b.com/cn/?v=#{identifer}"
|
19
|
+
response = Mechanize.new
|
20
|
+
response.user_agent = Mechanize::AGENT_ALIASES.values[rand(21)]
|
21
|
+
begin
|
22
|
+
response.get baseurl
|
23
|
+
rescue
|
24
|
+
retry
|
25
|
+
end
|
26
|
+
|
27
|
+
doc = Nokogiri::HTML(response.page.body)
|
28
|
+
|
29
|
+
video_title, details, video_genres, video_jacket_img = String.new, Array.new, String.new, String.new
|
30
|
+
|
31
|
+
video_title = doc.search('div[@id="video_title"]/h3/a').children.text
|
32
|
+
doc.search('//div[@id="video_info"]/div[@class="item"]/table/tr/td[@class="text"]').map do |row|
|
33
|
+
details << row.children.text
|
34
|
+
end
|
35
|
+
|
36
|
+
doc.search('//div[@id="video_genres"]/table/tr/td[@class="text"]/span[@class="genre"]/a').each do |row|
|
37
|
+
video_genres << row.children.text << " "
|
38
|
+
end
|
39
|
+
|
40
|
+
doc.search('//img[@id="video_jacket_img"]').each do |row|
|
41
|
+
video_jacket_img = row['src']
|
42
|
+
end
|
43
|
+
|
44
|
+
# return data format: title$id$date$director$maker$label$cast$genres$img_url
|
45
|
+
"#{video_title}$#{details[0]}$#{details[1]}$#{details[2]}$#{details[3]}$#{details[4]}$#{details[-1]}$#{video_genres}$#{video_jacket_img}"
|
46
|
+
#result = Hash.new
|
47
|
+
#result["title"] = video_title; result["id"] = details[0]; result["date"] = details[1]
|
48
|
+
#result["director"] = details[2]; result["maker"] = details[3]; result["label"] = details[4]
|
49
|
+
#result["cast"] = details[-1]; result["genres"] = video_genres; result["img_url"] = video_jacket_img
|
50
|
+
end
|
51
|
+
|
52
|
+
def Javlibrary.video_info_insert(client, index, identifer, actor_hash, genres_hash)
|
53
|
+
result = downloader(identifer)
|
54
|
+
title, id, date, director, maker, label, cast_tmp, genres_tmp, img_url = downloader(identifer).split('$')
|
55
|
+
cast = cast_tmp.split.reject(&:empty?)
|
56
|
+
genres = genres_tmp.split.reject(&:empty?)
|
57
|
+
begin
|
58
|
+
client.query("INSERT INTO video (video_id,video_name,license,url,director,label,date,maker)
|
59
|
+
VALUES (#{index},'#{title}','#{id}','#{img_url}','#{director}','#{label}','#{date}','#{maker}')")
|
60
|
+
rescue
|
61
|
+
return
|
62
|
+
end
|
63
|
+
cast.each do |a|
|
64
|
+
a_tmp = actor_hash[a]
|
65
|
+
next if a_tmp == nil
|
66
|
+
client.query("INSERT INTO v2a (v2a_fk_video,v2a_fk_actor) VALUES(#{index}, #{a_tmp.to_i})")
|
67
|
+
end
|
68
|
+
|
69
|
+
genres.each do |g|
|
70
|
+
g_tmp = genres_hash[g]
|
71
|
+
next if g_tmp == nil
|
72
|
+
client.query("INSERT INTO v2c (v2c_fk_video,v2c_fk_category) VALUES(#{index}, #{g_tmp.to_i})")
|
73
|
+
end
|
74
|
+
|
75
|
+
client.query("UPDATE label SET video_download=1 WHERE video_num=#{index}")
|
76
|
+
return nil
|
77
|
+
end
|
78
|
+
|
79
|
+
def download_all_video
|
80
|
+
client = Javlibrary.client
|
81
|
+
result = client.query("SELECT video_num, video_label FROM label WHERE video_download=0")
|
82
|
+
client.close
|
83
|
+
|
84
|
+
video_array = Array.new
|
85
|
+
result.each do |item|
|
86
|
+
video_array << item
|
87
|
+
end
|
88
|
+
|
89
|
+
video_array = video_array.each_slice(5000).to_a
|
90
|
+
actor_hash = Javlibrary::actor_hash
|
91
|
+
genre_hash = Javlibrary::genre_hash
|
92
|
+
thread_pool = Array::new
|
93
|
+
|
94
|
+
video_array.each do |group|
|
95
|
+
# Create a download thread
|
96
|
+
thread_temp = Thread.new {
|
97
|
+
client = Javlibrary.client
|
98
|
+
group.each do |item|
|
99
|
+
begin
|
100
|
+
video_info_insert(client, item['video_num'], item['video_label'],
|
101
|
+
actor_hash, genre_hash)
|
102
|
+
rescue
|
103
|
+
next
|
104
|
+
end
|
105
|
+
end
|
106
|
+
client.close
|
107
|
+
}
|
108
|
+
thread_pool << thread_temp
|
109
|
+
end
|
110
|
+
thread_pool.map(&:join)
|
111
|
+
end
|
112
|
+
|
113
|
+
def actor_hash
|
114
|
+
client = Javlibrary.client
|
115
|
+
actor_hash = Hash.new
|
116
|
+
client.query("SELECT * FROM actor").each do |item|
|
117
|
+
actor_hash["#{item['actor_name']}"] = item['actor_id']
|
118
|
+
end
|
119
|
+
client.close
|
120
|
+
|
121
|
+
actor_hash
|
122
|
+
end
|
123
|
+
|
124
|
+
def genre_hash
|
125
|
+
client = Javlibrary.client
|
126
|
+
category_hash = Hash.new
|
127
|
+
client.query("SELECT * FROM category").each do |item|
|
128
|
+
category_hash["#{item['category_name']}"] = item['category_id']
|
129
|
+
end
|
130
|
+
client.close
|
131
|
+
|
132
|
+
category_hash
|
133
|
+
end
|
134
|
+
|
135
|
+
def Javlibrary.genres
|
136
|
+
response = Mechanize.new; genres = Array.new
|
137
|
+
begin
|
138
|
+
response.get "http://www.jav11b.com/cn/genres.php"
|
139
|
+
rescue
|
140
|
+
retry
|
141
|
+
end
|
142
|
+
|
143
|
+
Nokogiri::HTML(response.page.body).search('//div[@class="genreitem"]/a').each do |row|
|
144
|
+
genres << row.children.text
|
145
|
+
end
|
146
|
+
genres.uniq
|
147
|
+
end
|
148
|
+
|
149
|
+
def genres_insert
|
150
|
+
client = Javlibrary.client
|
151
|
+
genres = genres()
|
152
|
+
genres.each do |e|
|
153
|
+
begin
|
154
|
+
client.query("INSERT INTO category (category_name) VALUES ('#{e}')")
|
155
|
+
rescue
|
156
|
+
next
|
157
|
+
end
|
158
|
+
end
|
159
|
+
|
160
|
+
client.close
|
161
|
+
end
|
162
|
+
|
163
|
+
def Javlibrary.author_page_num(nokogiri_doc)
|
164
|
+
last_page = 1
|
165
|
+
nokogiri_doc.search('//div[@class="page_selector"]/a[@class="page last"]').each do |row|
|
166
|
+
last_page = row['href'].split("=")[-1].to_i
|
167
|
+
end
|
168
|
+
last_page
|
169
|
+
end
|
170
|
+
|
171
|
+
def get_all_actor
|
172
|
+
firsturl = "http://www.jav11b.com/cn/star_list.php?prefix="
|
173
|
+
|
174
|
+
client = Javlibrary.client
|
175
|
+
'A'.upto('Z') do |alphabet|
|
176
|
+
tempurl = firsturl + alphabet
|
177
|
+
response = RestClient.get tempurl
|
178
|
+
|
179
|
+
doc = Nokogiri::HTML(response.body)
|
180
|
+
last_page = author_page_num(doc)
|
181
|
+
|
182
|
+
1.upto(last_page) do |page_num|
|
183
|
+
temp_page_url = tempurl + "&page=#{page_num.to_s}"
|
184
|
+
response_page = RestClient.get temp_page_url
|
185
|
+
doc_page = Nokogiri::HTML(response_page.body)
|
186
|
+
doc_page.search('//div[@class="starbox"]/div[@class="searchitem"]/a').each do |row|
|
187
|
+
# row.text Actor.name
|
188
|
+
# row['href'].split("=")[-1] Actor.label
|
189
|
+
name = row.text; label = row['href'].split("=")[-1]
|
190
|
+
begin
|
191
|
+
client.query("INSERT INTO actor (actor_name, actor_label, type)
|
192
|
+
VALUES ('#{name}', '#{label}', '#{alphabet}')")
|
193
|
+
rescue
|
194
|
+
next
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
|
200
|
+
client.close
|
201
|
+
end
|
202
|
+
|
203
|
+
def Javlibrary.download_video_label(actor_id)
|
204
|
+
firsturl = "http://www.jav11b.com/ja/vl_star.php?s=#{actor_id}"
|
205
|
+
baseurl = "http://www.jav11b.com/ja/vl_star.php?&mode=&s=#{actor_id}&page="
|
206
|
+
|
207
|
+
begin
|
208
|
+
response = RestClient.get firsturl
|
209
|
+
rescue
|
210
|
+
retry
|
211
|
+
end
|
212
|
+
|
213
|
+
doc = Nokogiri::HTML(response.body)
|
214
|
+
last_page = 1
|
215
|
+
doc.search('//div[@class="page_selector"]/a[@class="page last"]').each do |row|
|
216
|
+
last_page = row['href'].split("=")[-1].to_i
|
217
|
+
end
|
218
|
+
|
219
|
+
result = []
|
220
|
+
1.upto(last_page) do |page|
|
221
|
+
tempurl = baseurl + page.to_s
|
222
|
+
response = RestClient.get tempurl
|
223
|
+
Nokogiri::HTML(response.body).search('//div[@class="video"]/a').each do |row|
|
224
|
+
# Data:
|
225
|
+
# Video_label: row['href'].split("=")[-1]
|
226
|
+
# Video_title: row['title']
|
227
|
+
# client.query("INSERT INTO label (lable) VALUES ('#{row['href'].split("=")[-1]}')")
|
228
|
+
result << row['href'].split("=")[-1]
|
229
|
+
end
|
230
|
+
end
|
231
|
+
|
232
|
+
client = Javlibrary.client
|
233
|
+
result.each do |e|
|
234
|
+
begin
|
235
|
+
client.query("INSERT INTO label (video_label, video_download) VALUES ('#{e}', '0')")
|
236
|
+
rescue
|
237
|
+
next
|
238
|
+
end
|
239
|
+
end
|
240
|
+
client.close
|
241
|
+
end
|
242
|
+
|
243
|
+
def Javlibrary.select_actor(type)
|
244
|
+
client = Javlibrary.client
|
245
|
+
result = client.query("SELECT actor_label FROM actor WHERE type='#{type}'")
|
246
|
+
client.close
|
247
|
+
|
248
|
+
result.each do |e|
|
249
|
+
download_video_label(e["actor_label"])
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
253
|
+
def download_all_video_label
|
254
|
+
thread_pool =[]
|
255
|
+
'A'.upto('Z').each do |alphabet|
|
256
|
+
thread_temp = Thread.new{
|
257
|
+
select_actor(alphabet)
|
258
|
+
}
|
259
|
+
thread_pool << thread_temp
|
260
|
+
end
|
261
|
+
thread_pool.map(&:join)
|
262
|
+
end
|
17
263
|
|
264
|
+
module_function :client
|
265
|
+
module_function :download_all_video
|
266
|
+
module_function :actor_hash, :genre_hash
|
267
|
+
module_function :genres_insert
|
268
|
+
module_function :get_all_actor
|
269
|
+
module_function :download_all_video_label
|
18
270
|
end
|
data/lib/javlibrary/database.rb
CHANGED
@@ -1,5 +1,13 @@
|
|
1
1
|
# encoding:utf-8
|
2
2
|
|
3
|
+
require 'mysql2'
|
4
|
+
require 'mechanize'
|
5
|
+
require 'mechanize'
|
6
|
+
require 'pp'
|
7
|
+
|
8
|
+
require_relative 'database'
|
9
|
+
require_relative 'info_hash'
|
10
|
+
|
3
11
|
module Javlibrary
|
4
12
|
def Javlibrary.downloader(identifer)
|
5
13
|
baseurl = "http://www.jav11b.com/cn/?v=#{identifer}"
|
@@ -97,6 +105,11 @@ module Javlibrary
|
|
97
105
|
thread_pool.map(&:join)
|
98
106
|
end
|
99
107
|
|
108
|
+
def test
|
109
|
+
pp downloader('javlia322m')
|
110
|
+
end
|
111
|
+
|
112
|
+
module_function :test
|
100
113
|
module_function :download_all_video
|
101
114
|
end
|
102
115
|
|
data/lib/javlibrary/info_hash.rb
CHANGED
data/lib/javlibrary/init.rb
CHANGED
data/lib/javlibrary/name.rb
CHANGED
data/lib/javlibrary/version.rb
CHANGED
data/lib/javlibrary/video.rb
CHANGED
@@ -1,5 +1,14 @@
|
|
1
1
|
# encoding:utf-8
|
2
2
|
|
3
|
+
require 'rest-client'
|
4
|
+
require 'nokogiri'
|
5
|
+
require 'mysql2'
|
6
|
+
require 'redis'
|
7
|
+
require 'pp'
|
8
|
+
|
9
|
+
require_relative 'database'
|
10
|
+
|
11
|
+
|
3
12
|
module Javlibrary
|
4
13
|
def Javlibrary.download_video_label(actor_id)
|
5
14
|
firsturl = "http://www.jav11b.com/ja/vl_star.php?s=#{actor_id}"
|