bobik 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +42 -4
- data/lib/bobik/client.rb +12 -3
- metadata +1 -1
data/README.md
CHANGED
@@ -1,6 +1,44 @@
|
|
1
|
-
|
2
|
-
==============
|
1
|
+
## Web Scraping in Ruby using Bobik
|
3
2
|
|
4
|
-
Bobik SDK for Ruby
|
3
|
+
This is a community-supported Bobik SDK for web scraping in Ruby.
|
5
4
|
|
6
|
-
|
5
|
+
### Installing
|
6
|
+
|
7
|
+
+ Either install directly and system-wide:
|
8
|
+
1. Run `gem install bobik` from command line
|
9
|
+
2. Add `require 'bobik'` to your Ruby code
|
10
|
+
|
11
|
+
+ Or, add to bundler:
|
12
|
+
1. add `gem 'bobik'` to Gemfile
|
13
|
+
2. Unless you're using Rails (which includes all gems from Gemfile automatically), add `require 'bobik'` to your Ruby code
|
14
|
+
|
15
|
+
### Using
|
16
|
+
Here's a quick example to get you started.
|
17
|
+
|
18
|
+
```ruby
|
19
|
+
client = Bobik::Client.new(:auth_token => YOUR_AUTH_TOKEN, :timeout_ms => 60000)
|
20
|
+
|
21
|
+
sample_data = {
|
22
|
+
urls: ['amazon.com', 'zynga.com', 'http://finance.yahoo.com/'],
|
23
|
+
queries: ["//th", "//img/@src", "return document.title", "return $('script').length"]
|
24
|
+
}
|
25
|
+
|
26
|
+
client.scrape(sample_data, true) do |results, errors|
|
27
|
+
pust "Errors: #{errors}"
|
28
|
+
results.each do |url, queries|
|
29
|
+
puts "Printing results for #{url}"
|
30
|
+
queries.each do |query, result|
|
31
|
+
puts " Result of query #{query}: #{result}"
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
35
|
+
```
|
36
|
+
|
37
|
+
Full API reference is available at http://usebobik.com/sdk/
|
38
|
+
|
39
|
+
### Contributing
|
40
|
+
|
41
|
+
Write to support@usebobik.com to become a collaborator.
|
42
|
+
|
43
|
+
### Bugs?
|
44
|
+
Submit them here on GitHub: https://github.com/emirkin/bobik_ruby_gem/issues
|
data/lib/bobik/client.rb
CHANGED
@@ -2,17 +2,26 @@ require 'json'
|
|
2
2
|
require 'httparty'
|
3
3
|
|
4
4
|
module Bobik
|
5
|
+
# Author:: Eugene Mirkin
|
6
|
+
# This is the main class for interacting with Bobik platform.
|
5
7
|
class Client
|
6
8
|
include HTTParty
|
7
9
|
base_uri 'https://usebobik.com/api/v1'
|
8
10
|
|
11
|
+
# Notable parameters:
|
12
|
+
# * :auth_token - [required] authentication token
|
13
|
+
# * :timeout_ms - [optional] when to stop waiting for the job to finish
|
14
|
+
# * :logger - [optional] any logger that conforms to the Log4r interface
|
9
15
|
def initialize(opts)
|
10
16
|
@auth_token = opts[:auth_token] || raise(Error.new("'auth_token' was not provided"))
|
11
|
-
@timeout_ms = opts[:timeout_ms] ||
|
17
|
+
@timeout_ms = opts[:timeout_ms] || 60000
|
12
18
|
@log = opts[:logger] || (defined?(Rails.logger) && Rails.logger)
|
13
19
|
end
|
14
20
|
|
15
|
-
|
21
|
+
# Submit a scraping request.
|
22
|
+
# The callback block will be invoked when results arrive.
|
23
|
+
# If asynchronous mode is used, the method returns right away.
|
24
|
+
# Otherwise, it blocks until results arrive.
|
16
25
|
def scrape(request, block_until_done, &block)
|
17
26
|
request = Marshal.load(Marshal.dump(request))
|
18
27
|
request[:auth_token] = @auth_token
|
@@ -55,7 +64,7 @@ module Bobik
|
|
55
64
|
block.call(results, errors)
|
56
65
|
end
|
57
66
|
|
58
|
-
|
67
|
+
# A single call to get a given job's status with or without results
|
59
68
|
def get_job_data(job_id, with_results)
|
60
69
|
job_response = self.class.get('/jobs.json', :body => {
|
61
70
|
auth_token: @auth_token,
|