arb-bs 0.1.6 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/arb-bs.gemspec +1 -0
- data/exe/bs_pic +24 -17
- data/lib/arb/bs/version.rb +1 -1
- metadata +17 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d3fee1fcde122cff7050d84adeee0089b80a7b59
|
4
|
+
data.tar.gz: f54a5e8778a5718c7c88fb9b4f336bbf0ec1f423
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a915ebb6fe64cf5579683cf1b32082cae589758534b423b9cfce73ef9c70c18f715d442f62283655497dfa9b2c404c88bf57269fdac44c9dd8c8ad0957ad434e
|
7
|
+
data.tar.gz: 36f3da886a3eb1cff62adbd87aee7def07da9e37a8b62c8f3005ba6023a4444dc0d92ed3b049e4bd442108ed4f149e09051356e9fce551f11473c011cb24bbac
|
data/arb-bs.gemspec
CHANGED
data/exe/bs_pic
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
+
require 'arb/thread'
|
3
4
|
require 'arb/crawler'
|
4
5
|
|
5
6
|
include Arb
|
@@ -9,31 +10,37 @@ map_file='map.txt'
|
|
9
10
|
max_page=(ARGV[0] || 50).to_i
|
10
11
|
#Minimun idle time(in seconds) between two complete rounds.
|
11
12
|
min_idle_time=(ARGV[1] || 600).to_i
|
13
|
+
thread_count=(ARGV[2] || 3).to_i
|
12
14
|
|
13
15
|
|
14
16
|
File.open(map_file,'w+') unless File.exists? map_file
|
15
17
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
File.
|
29
|
-
|
18
|
+
Thread.parallel(thread_count) do |dispatcher|
|
19
|
+
|
20
|
+
loop do
|
21
|
+
"http://#{domain}/pic/?".enum('?',1..max_page).each_with_index do |url,index|
|
22
|
+
dispatcher.new_task do
|
23
|
+
res=Crawler.get_by_css(url,"div.j-r-list-c-img a img")
|
24
|
+
unless res
|
25
|
+
puts "Some errors occur when parsing page #{index+1}."
|
26
|
+
next
|
27
|
+
end
|
28
|
+
res.each do |hash|
|
29
|
+
url_file=Crawler.filename_of_url(hash[:"data-original"])
|
30
|
+
unless File.readlines(map_file).find{|line| line.to_s.include? url_file}
|
31
|
+
if Crawler.download(hash[:"data-original"],url_file)
|
32
|
+
puts "#{hash[:'data-original']}\n#{hash[:title]}",''
|
33
|
+
File.open map_file,'a' do |file|
|
34
|
+
file.puts "#{url_file}:#{Crawler.filter_str(hash[:title])}"
|
35
|
+
end
|
36
|
+
end
|
30
37
|
end
|
31
38
|
end
|
39
|
+
tmp=1+rand(5)
|
40
|
+
puts "Page round finished for page #{index+1}, next action in #{tmp} seconds later."
|
41
|
+
sleep tmp
|
32
42
|
end
|
33
43
|
end
|
34
|
-
tmp=1+rand(5)
|
35
|
-
puts "Page round finished for page #{index+1}, next action in #{tmp} seconds later."
|
36
|
-
sleep tmp
|
37
44
|
end
|
38
45
|
tmp=min_idle_time+rand(5)
|
39
46
|
puts "Complete round finished, next action in #{tmp} seconds later."
|
data/lib/arb/bs/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arb-bs
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- arybin
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-04-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -52,6 +52,20 @@ dependencies:
|
|
52
52
|
- - ">="
|
53
53
|
- !ruby/object:Gem::Version
|
54
54
|
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: arb-thread
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
55
69
|
description: A demo of Web Crawler using arb-crawler
|
56
70
|
email:
|
57
71
|
- arybin@163.com
|
@@ -89,7 +103,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
89
103
|
version: '0'
|
90
104
|
requirements: []
|
91
105
|
rubyforge_project:
|
92
|
-
rubygems_version: 2.
|
106
|
+
rubygems_version: 2.6.14
|
93
107
|
signing_key:
|
94
108
|
specification_version: 4
|
95
109
|
summary: A demo of Web Crawler using arb-crawler
|