bobik 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "https://rubygems.org"
2
+
3
+ gemspec
@@ -0,0 +1,20 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ bobik (0.0.1)
5
+ httparty
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ httparty (0.8.3)
11
+ multi_json (~> 1.0)
12
+ multi_xml
13
+ multi_json (1.3.6)
14
+ multi_xml (0.5.1)
15
+
16
+ PLATFORMS
17
+ ruby
18
+
19
+ DEPENDENCIES
20
+ bobik!
@@ -0,0 +1,6 @@
1
+ bobik_ruby_gem
2
+ ==============
3
+
4
+ Bobik SDK for Ruby
5
+
6
+ TODO: provide documentation on how to install and use the SDK
@@ -0,0 +1,2 @@
1
+ require_relative "bobik/error"
2
+ require_relative "bobik/client"
@@ -0,0 +1,73 @@
1
+ require 'json'
2
+ require 'httparty'
3
+
4
+ module Bobik
5
+ class Client
6
+ include HTTParty
7
+ base_uri 'https://usebobik.com/api/v1'
8
+
9
+ def initialize(opts)
10
+ @auth_token = opts[:auth_token] || raise(Error.new("'auth_token' was not provided"))
11
+ @timeout_ms = opts[:timeout_ms] || 30000
12
+ @log = opts[:logger] || (defined?(Rails.logger) && Rails.logger)
13
+ end
14
+
15
+
16
+ def scrape(request, block_until_done, &block)
17
+ request = Marshal.load(Marshal.dump(request))
18
+ request[:auth_token] = @auth_token
19
+
20
+ job_response = self.class.post('/jobs.json', :body => request)
21
+ raise Error.new(job_response['errors'].join("\n")) if job_response['errors']
22
+ job_id = job_response['job']
23
+
24
+ Thread.abort_on_exception = true
25
+ t = Thread.new do
26
+ wait_until_finished(job_id, &block)
27
+ end
28
+ t.join if block_until_done
29
+ true
30
+ end
31
+
32
+ private
33
+
34
+ # Blocks until the job is finished or timeout is reached.
35
+ # When done, yields results to the optional block.
36
+ # Exceptions thrown: Timeout::Error, Errno::ECONNRESET, Errno::ECONNREFUSED
37
+ def wait_until_finished(job_id, &block)
38
+ log("Waiting for job #{job_id} to finish")
39
+ results = nil
40
+ errors = nil
41
+ Timeout::timeout(@timeout_ms.to_f/1000) do
42
+ while true
43
+ job_response = get_job_data(job_id, false)
44
+ progress = job_response['progress']
45
+ log("Job #{job_id} progress: #{progress*100}%")
46
+ if progress == 1
47
+ job_response = get_job_data(job_id, true)
48
+ results = job_response['results']
49
+ errors = job_response['errors']
50
+ break
51
+ end
52
+ sleep(job_response['estimated_time_left_ms'].to_f/1000)
53
+ end
54
+ end
55
+ block.call(results, errors)
56
+ end
57
+
58
+
59
+ def get_job_data(job_id, with_results)
60
+ job_response = self.class.get('/jobs.json', :body => {
61
+ auth_token: @auth_token,
62
+ no_results: !with_results,
63
+ job: job_id
64
+ })
65
+ end
66
+
67
+
68
+ def log(msg)
69
+ return unless @log
70
+ @log.debug(msg)
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,3 @@
1
+ module Bobik
2
+ class Error < StandardError; end
3
+ end
metadata ADDED
@@ -0,0 +1,66 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bobik
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Eugene Mirkin
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-07-05 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: httparty
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ description: Bobik is a web service for scraping the web in real-time
31
+ email: support@usebobik.com
32
+ executables: []
33
+ extensions: []
34
+ extra_rdoc_files: []
35
+ files:
36
+ - lib/bobik/client.rb
37
+ - lib/bobik/error.rb
38
+ - lib/bobik.rb
39
+ - Gemfile
40
+ - Gemfile.lock
41
+ - README.md
42
+ homepage: https://github.com/emirkin/bobik_ruby_gem
43
+ licenses: []
44
+ post_install_message: Bobik says, "Happy scraping!"
45
+ rdoc_options: []
46
+ require_paths:
47
+ - lib
48
+ required_ruby_version: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
55
+ none: false
56
+ requirements:
57
+ - - ! '>='
58
+ - !ruby/object:Gem::Version
59
+ version: '0'
60
+ requirements: []
61
+ rubyforge_project:
62
+ rubygems_version: 1.8.24
63
+ signing_key:
64
+ specification_version: 3
65
+ summary: A web service for scraping the web in real-time
66
+ test_files: []