bobik 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +42 -4
- data/lib/bobik/client.rb +12 -3
- metadata +1 -1
data/README.md
CHANGED
@@ -1,6 +1,44 @@
|
|
1
|
-
|
2
|
-
==============
|
1
|
+
## Web Scraping in Ruby using Bobik
|
3
2
|
|
4
|
-
Bobik SDK for Ruby
|
3
|
+
This is a community-supported Bobik SDK for web scraping in Ruby.
|
5
4
|
|
6
|
-
|
5
|
+
### Installing
|
6
|
+
|
7
|
+
+ Either install directly and system-wide:
|
8
|
+
1. Run `gem install bobik` from command line
|
9
|
+
2. Add `require 'bobik'` to your Ruby code
|
10
|
+
|
11
|
+
+ Or, add to bundler:
|
12
|
+
1. add `gem 'bobik'` to Gemfile
|
13
|
+
2. Unless you're using Rails (which includes all gems from Gemfile automatically), add `require 'bobik'` to your Ruby code
|
14
|
+
|
15
|
+
### Using
|
16
|
+
Here's a quick example to get you started.
|
17
|
+
|
18
|
+
```ruby
|
19
|
+
client = Bobik::Client.new(:auth_token => YOUR_AUTH_TOKEN, :timeout_ms => 60000)
|
20
|
+
|
21
|
+
sample_data = {
|
22
|
+
urls: ['amazon.com', 'zynga.com', 'http://finance.yahoo.com/'],
|
23
|
+
queries: ["//th", "//img/@src", "return document.title", "return $('script').length"]
|
24
|
+
}
|
25
|
+
|
26
|
+
client.scrape(sample_data, true) do |results, errors|
|
27
|
+
pust "Errors: #{errors}"
|
28
|
+
results.each do |url, queries|
|
29
|
+
puts "Printing results for #{url}"
|
30
|
+
queries.each do |query, result|
|
31
|
+
puts " Result of query #{query}: #{result}"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
```
|
36
|
+
|
37
|
+
Full API reference is available at http://usebobik.com/sdk/
|
38
|
+
|
39
|
+
### Contributing
|
40
|
+
|
41
|
+
Write to support@usebobik.com to become a collaborator.
|
42
|
+
|
43
|
+
### Bugs?
|
44
|
+
Submit them here on GitHub: https://github.com/emirkin/bobik_ruby_gem/issues
|
data/lib/bobik/client.rb
CHANGED
@@ -2,17 +2,26 @@ require 'json'
|
|
2
2
|
require 'httparty'
|
3
3
|
|
4
4
|
module Bobik
|
5
|
+
# Author:: Eugene Mirkin
|
6
|
+
# This is the main class for interacting with Bobik platform.
|
5
7
|
class Client
|
6
8
|
include HTTParty
|
7
9
|
base_uri 'https://usebobik.com/api/v1'
|
8
10
|
|
11
|
+
# Notable parameters:
|
12
|
+
# * :auth_token - [required] authentication token
|
13
|
+
# * :timeout_ms - [optional] when to stop waiting for the job to finish
|
14
|
+
# * :logger - [optional] any logger that conforms to the Log4r interface
|
9
15
|
def initialize(opts)
|
10
16
|
@auth_token = opts[:auth_token] || raise(Error.new("'auth_token' was not provided"))
|
11
|
-
@timeout_ms = opts[:timeout_ms] ||
|
17
|
+
@timeout_ms = opts[:timeout_ms] || 60000
|
12
18
|
@log = opts[:logger] || (defined?(Rails.logger) && Rails.logger)
|
13
19
|
end
|
14
20
|
|
15
|
-
|
21
|
+
# Submit a scraping request.
|
22
|
+
# The callback block will be invoked when results arrive.
|
23
|
+
# If asynchronous mode is used, the method returns right away.
|
24
|
+
# Otherwise, it blocks until results arrive.
|
16
25
|
def scrape(request, block_until_done, &block)
|
17
26
|
request = Marshal.load(Marshal.dump(request))
|
18
27
|
request[:auth_token] = @auth_token
|
@@ -55,7 +64,7 @@ module Bobik
|
|
55
64
|
block.call(results, errors)
|
56
65
|
end
|
57
66
|
|
58
|
-
|
67
|
+
# A single call to get a given job's status with or without results
|
59
68
|
def get_job_data(job_id, with_results)
|
60
69
|
job_response = self.class.get('/jobs.json', :body => {
|
61
70
|
auth_token: @auth_token,
|