arb-bs 1.1.2 → 1.1.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/exe/bs_pic_url +51 -0
  3. data/lib/arb/bs/version.rb +1 -1
  4. metadata +4 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5a44373bde98e9a832f25ab705840704aec79fa8
4
- data.tar.gz: d9523504a380b8b7fa7b3c7f1bcacf9107b03204
3
+ metadata.gz: 5b63afe38e81d5cfa92a61fa08eb0e2d0f1d7020
4
+ data.tar.gz: bd1beab3a6de91d439c340203660bfa021fb6f3c
5
5
  SHA512:
6
- metadata.gz: 37d22665bb8bbaee7eb840a76a4d74262a6ff56c26f99df631e74d9eb147927467008008c73c0a17af1c1fac62d379a0d6642eca1b1d5931b78ac74572dee864
7
- data.tar.gz: f7a77ce4aa1fd1999b72b0ff63c1a22a7887d5315d9fb0283637dfe288c1afa277bba77f10b16df42b79f7416b5241f67cda1f82ca99a037c89813303a64de2e
6
+ metadata.gz: 9928186d35831a79297e25806c8d81bb3697e986abc520055dd9a574ab09b33f7828d6cbb592e9960f1892d15d17b53e510ad76a8c280d1da862e6bba7db93c7
7
+ data.tar.gz: 916a0fcf65a183207f9a80d3fb9b084d2ae4ab15c8d693e668f12d060505d0f8144e9149a381847186c64ef6a3a0cfcd97d5d444d0774701efce721e594779d6
data/exe/bs_pic_url ADDED
@@ -0,0 +1,51 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'arb/thread'
4
+ require 'arb/crawler'
5
+
6
+ include Arb
7
+
8
+ domain='www.budejie.com'
9
+ map_file='map.txt'
10
+
11
+ thread_count=(ARGV[0] || 3).to_i
12
+ max_page=(ARGV[1] || 50).to_i
13
+ #Minimun idle time(in seconds) between two complete rounds.
14
+ min_idle_time=(ARGV[2] || 600).to_i
15
+
16
+
17
+ File.open(map_file,'w+') unless File.exists? map_file
18
+
19
+ Thread.parallel(thread_count) do |dispatcher|
20
+
21
+ loop do
22
+ "http://#{domain}/pic/?".enum('?',1..max_page).each_with_index do |url,index|
23
+ dispatcher.new_task do |lock|
24
+ res=Crawler.get_by_css(url,"div.j-r-list-c-img a img")
25
+ unless res
26
+ puts "Some errors occur when parsing page #{index+1}."
27
+ next
28
+ end
29
+ res.each do |hash|
30
+ url_file=Crawler.filename_of_url(hash[:"data-original"])
31
+
32
+ lock.synchronize do
33
+ unless File.readlines(map_file).find{|line| line.to_s.include? url_file}
34
+ puts "#{hash[:'data-original']}\n#{hash[:title]}",''
35
+ File.open map_file,'a' do |file|
36
+ file.puts "#{hash[:'data-original']}:#{hash[:title]}"
37
+ end
38
+ end
39
+ end
40
+
41
+ end
42
+ tmp=1+rand(5)
43
+ puts "Page round finished for page #{index+1}, next action in #{tmp} seconds later."
44
+ sleep tmp
45
+ end
46
+ end
47
+ tmp=min_idle_time+rand(5)
48
+ puts "Complete round finished, next action in #{tmp} seconds later."
49
+ sleep tmp
50
+ end
51
+ end
@@ -1,5 +1,5 @@
1
1
  module Arb
2
2
  module Bs
3
- VERSION = "1.1.2"
3
+ VERSION = "1.1.3"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arb-bs
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.2
4
+ version: 1.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - arybin
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-04-11 00:00:00.000000000 Z
11
+ date: 2018-04-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -71,6 +71,7 @@ email:
71
71
  - arybin@163.com
72
72
  executables:
73
73
  - bs_pic
74
+ - bs_pic_url
74
75
  extensions: []
75
76
  extra_rdoc_files: []
76
77
  files:
@@ -82,6 +83,7 @@ files:
82
83
  - bin/console
83
84
  - bin/setup
84
85
  - exe/bs_pic
86
+ - exe/bs_pic_url
85
87
  - lib/arb/bs.rb
86
88
  - lib/arb/bs/version.rb
87
89
  homepage: https://github.com/arybin-cn/arb-bs