arb-bs 0.1.6 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: cc17e9bd17a4771edc4696a6e8a052da7edd1484
4
- data.tar.gz: 8038b6face47fd2f01172dbb0577d0e95c551dbf
3
+ metadata.gz: d3fee1fcde122cff7050d84adeee0089b80a7b59
4
+ data.tar.gz: f54a5e8778a5718c7c88fb9b4f336bbf0ec1f423
5
5
  SHA512:
6
- metadata.gz: 57b740f6b723bef1202f1180e6209bcc5093f682539b1d053638c5d11e4180f115142d3e49681adb0707597a75abcc3c137117ee98dedced0498eba19793e1bd
7
- data.tar.gz: 20550beb04745265ad363d5f75c66a25adcfec0b2de4c1625302187fe8b8e0fe466a1a890a1ad535b840d9cdcf048cdd0e5031ba65e418ebf1350a2e85964ae9
6
+ metadata.gz: a915ebb6fe64cf5579683cf1b32082cae589758534b423b9cfce73ef9c70c18f715d442f62283655497dfa9b2c404c88bf57269fdac44c9dd8c8ad0957ad434e
7
+ data.tar.gz: 36f3da886a3eb1cff62adbd87aee7def07da9e37a8b62c8f3005ba6023a4444dc0d92ed3b049e4bd442108ed4f149e09051356e9fce551f11473c011cb24bbac
data/arb-bs.gemspec CHANGED
@@ -23,5 +23,6 @@ Gem::Specification.new do |spec|
23
23
  spec.add_development_dependency "bundler", "~> 1.14"
24
24
  spec.add_development_dependency "rake", "~> 10.0"
25
25
  spec.add_dependency "arb-crawler"
26
+ spec.add_dependency "arb-thread"
26
27
 
27
28
  end
data/exe/bs_pic CHANGED
@@ -1,5 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
+ require 'arb/thread'
3
4
  require 'arb/crawler'
4
5
 
5
6
  include Arb
@@ -9,31 +10,37 @@ map_file='map.txt'
9
10
  max_page=(ARGV[0] || 50).to_i
10
11
  #Minimun idle time(in seconds) between two complete rounds.
11
12
  min_idle_time=(ARGV[1] || 600).to_i
13
+ thread_count=(ARGV[2] || 3).to_i
12
14
 
13
15
 
14
16
  File.open(map_file,'w+') unless File.exists? map_file
15
17
 
16
- loop do
17
- "http://#{domain}/pic/?".enum('?',1..max_page).each_with_index do |url,index|
18
- res=Crawler.get_by_css(url,"div.j-r-list-c-img a img")
19
- unless res
20
- puts "Some errors occur when parsing page #{index+1}."
21
- next
22
- end
23
- res.each do |hash|
24
- url_file=Crawler.filename_of_url(hash[:"data-original"])
25
- unless File.readlines(map_file).find{|line| line.to_s.include? url_file}
26
- if Crawler.download(hash[:"data-original"],url_file)
27
- puts "#{hash[:'data-original']}\n#{hash[:title]}",''
28
- File.open map_file,'a' do |file|
29
- file.puts "#{url_file}:#{Crawler.filter_str(hash[:title])}"
18
+ Thread.parallel(thread_count) do |dispatcher|
19
+
20
+ loop do
21
+ "http://#{domain}/pic/?".enum('?',1..max_page).each_with_index do |url,index|
22
+ dispatcher.new_task do
23
+ res=Crawler.get_by_css(url,"div.j-r-list-c-img a img")
24
+ unless res
25
+ puts "Some errors occur when parsing page #{index+1}."
26
+ next
27
+ end
28
+ res.each do |hash|
29
+ url_file=Crawler.filename_of_url(hash[:"data-original"])
30
+ unless File.readlines(map_file).find{|line| line.to_s.include? url_file}
31
+ if Crawler.download(hash[:"data-original"],url_file)
32
+ puts "#{hash[:'data-original']}\n#{hash[:title]}",''
33
+ File.open map_file,'a' do |file|
34
+ file.puts "#{url_file}:#{Crawler.filter_str(hash[:title])}"
35
+ end
36
+ end
30
37
  end
31
38
  end
39
+ tmp=1+rand(5)
40
+ puts "Page round finished for page #{index+1}, next action in #{tmp} seconds later."
41
+ sleep tmp
32
42
  end
33
43
  end
34
- tmp=1+rand(5)
35
- puts "Page round finished for page #{index+1}, next action in #{tmp} seconds later."
36
- sleep tmp
37
44
  end
38
45
  tmp=min_idle_time+rand(5)
39
46
  puts "Complete round finished, next action in #{tmp} seconds later."
@@ -1,5 +1,5 @@
1
1
  module Arb
2
2
  module Bs
3
- VERSION = "0.1.6"
3
+ VERSION = "1.0.0"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arb-bs
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.6
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - arybin
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-02-18 00:00:00.000000000 Z
11
+ date: 2018-04-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -52,6 +52,20 @@ dependencies:
52
52
  - - ">="
53
53
  - !ruby/object:Gem::Version
54
54
  version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: arb-thread
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
55
69
  description: A demo of Web Crawler using arb-crawler
56
70
  email:
57
71
  - arybin@163.com
@@ -89,7 +103,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
89
103
  version: '0'
90
104
  requirements: []
91
105
  rubyforge_project:
92
- rubygems_version: 2.4.8
106
+ rubygems_version: 2.6.14
93
107
  signing_key:
94
108
  specification_version: 4
95
109
  summary: A demo of Web Crawler using arb-crawler