arb-bs 1.1.2 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/exe/bs_pic_url +51 -0
  3. data/lib/arb/bs/version.rb +1 -1
  4. metadata +4 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 5a44373bde98e9a832f25ab705840704aec79fa8
4
- data.tar.gz: d9523504a380b8b7fa7b3c7f1bcacf9107b03204
3
+ metadata.gz: 5b63afe38e81d5cfa92a61fa08eb0e2d0f1d7020
4
+ data.tar.gz: bd1beab3a6de91d439c340203660bfa021fb6f3c
5
5
  SHA512:
6
- metadata.gz: 37d22665bb8bbaee7eb840a76a4d74262a6ff56c26f99df631e74d9eb147927467008008c73c0a17af1c1fac62d379a0d6642eca1b1d5931b78ac74572dee864
7
- data.tar.gz: f7a77ce4aa1fd1999b72b0ff63c1a22a7887d5315d9fb0283637dfe288c1afa277bba77f10b16df42b79f7416b5241f67cda1f82ca99a037c89813303a64de2e
6
+ metadata.gz: 9928186d35831a79297e25806c8d81bb3697e986abc520055dd9a574ab09b33f7828d6cbb592e9960f1892d15d17b53e510ad76a8c280d1da862e6bba7db93c7
7
+ data.tar.gz: 916a0fcf65a183207f9a80d3fb9b084d2ae4ab15c8d693e668f12d060505d0f8144e9149a381847186c64ef6a3a0cfcd97d5d444d0774701efce721e594779d6
data/exe/bs_pic_url ADDED
@@ -0,0 +1,51 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'arb/thread'
4
+ require 'arb/crawler'
5
+
6
+ include Arb
7
+
8
+ domain='www.budejie.com'
9
+ map_file='map.txt'
10
+
11
+ thread_count=(ARGV[0] || 3).to_i
12
+ max_page=(ARGV[1] || 50).to_i
13
+ #Minimun idle time(in seconds) between two complete rounds.
14
+ min_idle_time=(ARGV[2] || 600).to_i
15
+
16
+
17
+ File.open(map_file,'w+') unless File.exists? map_file
18
+
19
+ Thread.parallel(thread_count) do |dispatcher|
20
+
21
+ loop do
22
+ "http://#{domain}/pic/?".enum('?',1..max_page).each_with_index do |url,index|
23
+ dispatcher.new_task do |lock|
24
+ res=Crawler.get_by_css(url,"div.j-r-list-c-img a img")
25
+ unless res
26
+ puts "Some errors occur when parsing page #{index+1}."
27
+ next
28
+ end
29
+ res.each do |hash|
30
+ url_file=Crawler.filename_of_url(hash[:"data-original"])
31
+
32
+ lock.synchronize do
33
+ unless File.readlines(map_file).find{|line| line.to_s.include? url_file}
34
+ puts "#{hash[:'data-original']}\n#{hash[:title]}",''
35
+ File.open map_file,'a' do |file|
36
+ file.puts "#{hash[:'data-original']}:#{hash[:title]}"
37
+ end
38
+ end
39
+ end
40
+
41
+ end
42
+ tmp=1+rand(5)
43
+ puts "Page round finished for page #{index+1}, next action in #{tmp} seconds later."
44
+ sleep tmp
45
+ end
46
+ end
47
+ tmp=min_idle_time+rand(5)
48
+ puts "Complete round finished, next action in #{tmp} seconds later."
49
+ sleep tmp
50
+ end
51
+ end
@@ -1,5 +1,5 @@
1
1
  module Arb
2
2
  module Bs
3
- VERSION = "1.1.2"
3
+ VERSION = "1.1.3"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arb-bs
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.2
4
+ version: 1.1.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - arybin
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-04-11 00:00:00.000000000 Z
11
+ date: 2018-04-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -71,6 +71,7 @@ email:
71
71
  - arybin@163.com
72
72
  executables:
73
73
  - bs_pic
74
+ - bs_pic_url
74
75
  extensions: []
75
76
  extra_rdoc_files: []
76
77
  files:
@@ -82,6 +83,7 @@ files:
82
83
  - bin/console
83
84
  - bin/setup
84
85
  - exe/bs_pic
86
+ - exe/bs_pic_url
85
87
  - lib/arb/bs.rb
86
88
  - lib/arb/bs/version.rb
87
89
  homepage: https://github.com/arybin-cn/arb-bs