bobik 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +3 -0
- data/Gemfile.lock +20 -0
- data/README.md +6 -0
- data/lib/bobik.rb +2 -0
- data/lib/bobik/client.rb +73 -0
- data/lib/bobik/error.rb +3 -0
- metadata +66 -0
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
bobik (0.0.1)
|
5
|
+
httparty
|
6
|
+
|
7
|
+
GEM
|
8
|
+
remote: https://rubygems.org/
|
9
|
+
specs:
|
10
|
+
httparty (0.8.3)
|
11
|
+
multi_json (~> 1.0)
|
12
|
+
multi_xml
|
13
|
+
multi_json (1.3.6)
|
14
|
+
multi_xml (0.5.1)
|
15
|
+
|
16
|
+
PLATFORMS
|
17
|
+
ruby
|
18
|
+
|
19
|
+
DEPENDENCIES
|
20
|
+
bobik!
|
data/README.md
ADDED
data/lib/bobik.rb
ADDED
data/lib/bobik/client.rb
ADDED
@@ -0,0 +1,73 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'httparty'
|
3
|
+
|
4
|
+
module Bobik
|
5
|
+
class Client
|
6
|
+
include HTTParty
|
7
|
+
base_uri 'https://usebobik.com/api/v1'
|
8
|
+
|
9
|
+
def initialize(opts)
|
10
|
+
@auth_token = opts[:auth_token] || raise(Error.new("'auth_token' was not provided"))
|
11
|
+
@timeout_ms = opts[:timeout_ms] || 30000
|
12
|
+
@log = opts[:logger] || (defined?(Rails.logger) && Rails.logger)
|
13
|
+
end
|
14
|
+
|
15
|
+
|
16
|
+
def scrape(request, block_until_done, &block)
|
17
|
+
request = Marshal.load(Marshal.dump(request))
|
18
|
+
request[:auth_token] = @auth_token
|
19
|
+
|
20
|
+
job_response = self.class.post('/jobs.json', :body => request)
|
21
|
+
raise Error.new(job_response['errors'].join("\n")) if job_response['errors']
|
22
|
+
job_id = job_response['job']
|
23
|
+
|
24
|
+
Thread.abort_on_exception = true
|
25
|
+
t = Thread.new do
|
26
|
+
wait_until_finished(job_id, &block)
|
27
|
+
end
|
28
|
+
t.join if block_until_done
|
29
|
+
true
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
# Blocks until the job is finished or timeout is reached.
|
35
|
+
# When done, yields results to the optional block.
|
36
|
+
# Exceptions thrown: Timeout::Error, Errno::ECONNRESET, Errno::ECONNREFUSED
|
37
|
+
def wait_until_finished(job_id, &block)
|
38
|
+
log("Waiting for job #{job_id} to finish")
|
39
|
+
results = nil
|
40
|
+
errors = nil
|
41
|
+
Timeout::timeout(@timeout_ms.to_f/1000) do
|
42
|
+
while true
|
43
|
+
job_response = get_job_data(job_id, false)
|
44
|
+
progress = job_response['progress']
|
45
|
+
log("Job #{job_id} progress: #{progress*100}%")
|
46
|
+
if progress == 1
|
47
|
+
job_response = get_job_data(job_id, true)
|
48
|
+
results = job_response['results']
|
49
|
+
errors = job_response['errors']
|
50
|
+
break
|
51
|
+
end
|
52
|
+
sleep(job_response['estimated_time_left_ms'].to_f/1000)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
block.call(results, errors)
|
56
|
+
end
|
57
|
+
|
58
|
+
|
59
|
+
def get_job_data(job_id, with_results)
|
60
|
+
job_response = self.class.get('/jobs.json', :body => {
|
61
|
+
auth_token: @auth_token,
|
62
|
+
no_results: !with_results,
|
63
|
+
job: job_id
|
64
|
+
})
|
65
|
+
end
|
66
|
+
|
67
|
+
|
68
|
+
def log(msg)
|
69
|
+
return unless @log
|
70
|
+
@log.debug(msg)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
data/lib/bobik/error.rb
ADDED
metadata
ADDED
@@ -0,0 +1,66 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: bobik
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Eugene Mirkin
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2012-07-05 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: httparty
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
description: Bobik is a web service for scraping the web in real-time
|
31
|
+
email: support@usebobik.com
|
32
|
+
executables: []
|
33
|
+
extensions: []
|
34
|
+
extra_rdoc_files: []
|
35
|
+
files:
|
36
|
+
- lib/bobik/client.rb
|
37
|
+
- lib/bobik/error.rb
|
38
|
+
- lib/bobik.rb
|
39
|
+
- Gemfile
|
40
|
+
- Gemfile.lock
|
41
|
+
- README.md
|
42
|
+
homepage: https://github.com/emirkin/bobik_ruby_gem
|
43
|
+
licenses: []
|
44
|
+
post_install_message: Bobik says, "Happy scraping!"
|
45
|
+
rdoc_options: []
|
46
|
+
require_paths:
|
47
|
+
- lib
|
48
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
55
|
+
none: false
|
56
|
+
requirements:
|
57
|
+
- - ! '>='
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: '0'
|
60
|
+
requirements: []
|
61
|
+
rubyforge_project:
|
62
|
+
rubygems_version: 1.8.24
|
63
|
+
signing_key:
|
64
|
+
specification_version: 3
|
65
|
+
summary: A web service for scraping the web in real-time
|
66
|
+
test_files: []
|