bobik 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "https://rubygems.org"
2
+
3
+ gemspec
@@ -0,0 +1,20 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ bobik (0.0.1)
5
+ httparty
6
+
7
+ GEM
8
+ remote: https://rubygems.org/
9
+ specs:
10
+ httparty (0.8.3)
11
+ multi_json (~> 1.0)
12
+ multi_xml
13
+ multi_json (1.3.6)
14
+ multi_xml (0.5.1)
15
+
16
+ PLATFORMS
17
+ ruby
18
+
19
+ DEPENDENCIES
20
+ bobik!
@@ -0,0 +1,6 @@
1
+ bobik_ruby_gem
2
+ ==============
3
+
4
+ Bobik SDK for Ruby
5
+
6
+ TODO: provide documentation on how to install and use the SDK
@@ -0,0 +1,2 @@
1
+ require_relative "bobik/error"
2
+ require_relative "bobik/client"
@@ -0,0 +1,73 @@
1
+ require 'json'
2
+ require 'httparty'
3
+
4
+ module Bobik
5
+ class Client
6
+ include HTTParty
7
+ base_uri 'https://usebobik.com/api/v1'
8
+
9
+ def initialize(opts)
10
+ @auth_token = opts[:auth_token] || raise(Error.new("'auth_token' was not provided"))
11
+ @timeout_ms = opts[:timeout_ms] || 30000
12
+ @log = opts[:logger] || (defined?(Rails.logger) && Rails.logger)
13
+ end
14
+
15
+
16
+ def scrape(request, block_until_done, &block)
17
+ request = Marshal.load(Marshal.dump(request))
18
+ request[:auth_token] = @auth_token
19
+
20
+ job_response = self.class.post('/jobs.json', :body => request)
21
+ raise Error.new(job_response['errors'].join("\n")) if job_response['errors']
22
+ job_id = job_response['job']
23
+
24
+ Thread.abort_on_exception = true
25
+ t = Thread.new do
26
+ wait_until_finished(job_id, &block)
27
+ end
28
+ t.join if block_until_done
29
+ true
30
+ end
31
+
32
+ private
33
+
34
+ # Blocks until the job is finished or timeout is reached.
35
+ # When done, yields results to the optional block.
36
+ # Exceptions thrown: Timeout::Error, Errno::ECONNRESET, Errno::ECONNREFUSED
37
+ def wait_until_finished(job_id, &block)
38
+ log("Waiting for job #{job_id} to finish")
39
+ results = nil
40
+ errors = nil
41
+ Timeout::timeout(@timeout_ms.to_f/1000) do
42
+ while true
43
+ job_response = get_job_data(job_id, false)
44
+ progress = job_response['progress']
45
+ log("Job #{job_id} progress: #{progress*100}%")
46
+ if progress == 1
47
+ job_response = get_job_data(job_id, true)
48
+ results = job_response['results']
49
+ errors = job_response['errors']
50
+ break
51
+ end
52
+ sleep(job_response['estimated_time_left_ms'].to_f/1000)
53
+ end
54
+ end
55
+ block.call(results, errors)
56
+ end
57
+
58
+
59
+ def get_job_data(job_id, with_results)
60
+ job_response = self.class.get('/jobs.json', :body => {
61
+ auth_token: @auth_token,
62
+ no_results: !with_results,
63
+ job: job_id
64
+ })
65
+ end
66
+
67
+
68
+ def log(msg)
69
+ return unless @log
70
+ @log.debug(msg)
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,3 @@
1
+ module Bobik
2
+ class Error < StandardError; end
3
+ end
metadata ADDED
@@ -0,0 +1,66 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bobik
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Eugene Mirkin
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-07-05 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: httparty
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ description: Bobik is a web service for scraping the web in real-time
31
+ email: support@usebobik.com
32
+ executables: []
33
+ extensions: []
34
+ extra_rdoc_files: []
35
+ files:
36
+ - lib/bobik/client.rb
37
+ - lib/bobik/error.rb
38
+ - lib/bobik.rb
39
+ - Gemfile
40
+ - Gemfile.lock
41
+ - README.md
42
+ homepage: https://github.com/emirkin/bobik_ruby_gem
43
+ licenses: []
44
+ post_install_message: Bobik says, "Happy scraping!"
45
+ rdoc_options: []
46
+ require_paths:
47
+ - lib
48
+ required_ruby_version: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ required_rubygems_version: !ruby/object:Gem::Requirement
55
+ none: false
56
+ requirements:
57
+ - - ! '>='
58
+ - !ruby/object:Gem::Version
59
+ version: '0'
60
+ requirements: []
61
+ rubyforge_project:
62
+ rubygems_version: 1.8.24
63
+ signing_key:
64
+ specification_version: 3
65
+ summary: A web service for scraping the web in real-time
66
+ test_files: []