arb-bs 1.1.2 → 1.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/exe/bs_pic_url +51 -0
- data/lib/arb/bs/version.rb +1 -1
- metadata +4 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5b63afe38e81d5cfa92a61fa08eb0e2d0f1d7020
|
4
|
+
data.tar.gz: bd1beab3a6de91d439c340203660bfa021fb6f3c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9928186d35831a79297e25806c8d81bb3697e986abc520055dd9a574ab09b33f7828d6cbb592e9960f1892d15d17b53e510ad76a8c280d1da862e6bba7db93c7
|
7
|
+
data.tar.gz: 916a0fcf65a183207f9a80d3fb9b084d2ae4ab15c8d693e668f12d060505d0f8144e9149a381847186c64ef6a3a0cfcd97d5d444d0774701efce721e594779d6
|
data/exe/bs_pic_url
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'arb/thread'
|
4
|
+
require 'arb/crawler'
|
5
|
+
|
6
|
+
include Arb
|
7
|
+
|
8
|
+
domain='www.budejie.com'
|
9
|
+
map_file='map.txt'
|
10
|
+
|
11
|
+
thread_count=(ARGV[0] || 3).to_i
|
12
|
+
max_page=(ARGV[1] || 50).to_i
|
13
|
+
#Minimun idle time(in seconds) between two complete rounds.
|
14
|
+
min_idle_time=(ARGV[2] || 600).to_i
|
15
|
+
|
16
|
+
|
17
|
+
File.open(map_file,'w+') unless File.exists? map_file
|
18
|
+
|
19
|
+
Thread.parallel(thread_count) do |dispatcher|
|
20
|
+
|
21
|
+
loop do
|
22
|
+
"http://#{domain}/pic/?".enum('?',1..max_page).each_with_index do |url,index|
|
23
|
+
dispatcher.new_task do |lock|
|
24
|
+
res=Crawler.get_by_css(url,"div.j-r-list-c-img a img")
|
25
|
+
unless res
|
26
|
+
puts "Some errors occur when parsing page #{index+1}."
|
27
|
+
next
|
28
|
+
end
|
29
|
+
res.each do |hash|
|
30
|
+
url_file=Crawler.filename_of_url(hash[:"data-original"])
|
31
|
+
|
32
|
+
lock.synchronize do
|
33
|
+
unless File.readlines(map_file).find{|line| line.to_s.include? url_file}
|
34
|
+
puts "#{hash[:'data-original']}\n#{hash[:title]}",''
|
35
|
+
File.open map_file,'a' do |file|
|
36
|
+
file.puts "#{hash[:'data-original']}:#{hash[:title]}"
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
tmp=1+rand(5)
|
43
|
+
puts "Page round finished for page #{index+1}, next action in #{tmp} seconds later."
|
44
|
+
sleep tmp
|
45
|
+
end
|
46
|
+
end
|
47
|
+
tmp=min_idle_time+rand(5)
|
48
|
+
puts "Complete round finished, next action in #{tmp} seconds later."
|
49
|
+
sleep tmp
|
50
|
+
end
|
51
|
+
end
|
data/lib/arb/bs/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arb-bs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- arybin
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-04-
|
11
|
+
date: 2018-04-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -71,6 +71,7 @@ email:
|
|
71
71
|
- arybin@163.com
|
72
72
|
executables:
|
73
73
|
- bs_pic
|
74
|
+
- bs_pic_url
|
74
75
|
extensions: []
|
75
76
|
extra_rdoc_files: []
|
76
77
|
files:
|
@@ -82,6 +83,7 @@ files:
|
|
82
83
|
- bin/console
|
83
84
|
- bin/setup
|
84
85
|
- exe/bs_pic
|
86
|
+
- exe/bs_pic_url
|
85
87
|
- lib/arb/bs.rb
|
86
88
|
- lib/arb/bs/version.rb
|
87
89
|
homepage: https://github.com/arybin-cn/arb-bs
|