iterationlabs-online_parselets 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README +33 -0
- data/lib/client.rb +85 -0
- data/lib/instance.rb +68 -0
- data/lib/online_parselets.rb +30 -0
- data/online_parselets.gemspec +25 -0
- data/test/test_client.rb +50 -0
- metadata +69 -0
data/README
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
The online_parselets project is a client gem to talk to http://parselets.com.
|
2
|
+
|
3
|
+
REQUIREMENTS
|
4
|
+
|
5
|
+
This gem supports both local and server-side parselet execution. In general,
|
6
|
+
however, you should have the parsley gem installed for fast, local parsing.
|
7
|
+
Get it here: http://github.com/fizx/parsley-ruby/tree/master
|
8
|
+
|
9
|
+
INSTALL
|
10
|
+
|
11
|
+
From GitHub:
|
12
|
+
Run the following if you haven't already:
|
13
|
+
gem sources -a http://gems.github.com
|
14
|
+
Install the gem:
|
15
|
+
sudo gem install iterationlabs-online_parselets
|
16
|
+
|
17
|
+
From source:
|
18
|
+
sudo rake install
|
19
|
+
|
20
|
+
USAGE
|
21
|
+
|
22
|
+
require "rubygems"
|
23
|
+
require "online_parselets"
|
24
|
+
client = OnlineParselets::Client.new(:api_key => "your_api_key")
|
25
|
+
parselets = @client.find("yelp")
|
26
|
+
parselet = parselets.first
|
27
|
+
yelp_data = parselet.parse(:url => "http://www.yelp.com")
|
28
|
+
puts yelp_data.inspect
|
29
|
+
# Reload a parselet from the server
|
30
|
+
parselet = client.parselet(parselet.server_id)
|
31
|
+
# Or:
|
32
|
+
parselet = client.parselet('my_parselet', version = 5)
|
33
|
+
|
data/lib/client.rb
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
module OnlineParselets
|
2
|
+
# Client class to talk to the parselets.com server.
|
3
|
+
class Client
|
4
|
+
attr_accessor :api_key, :environment
|
5
|
+
HOST = { :production => "http://parselets.com", :local => "http://localhost:3000" }
|
6
|
+
SEARCH_PATH = "/search/?q="
|
7
|
+
PARSELET_PATH = "/parselets/"
|
8
|
+
RUN_ON_SERVER_PATH = "/parse/"
|
9
|
+
|
10
|
+
# Make a new client instance to talk to the parselets.com server.
|
11
|
+
#
|
12
|
+
# OnlineParselets::Client.new( :api_key => "your_api_key" )
|
13
|
+
def initialize(options = {})
|
14
|
+
self.api_key = options[:api_key] or raise InvalidAPIKey.new("You must provide an API key")
|
15
|
+
self.environment = options[:environment] || :production
|
16
|
+
raise InvalidEnvironmentError("Environment #{environment} is not defined") unless HOST[environment]
|
17
|
+
end
|
18
|
+
|
19
|
+
# Search for parselets on the server. Returns an array of matching OnlineParselets::Instance objects.
|
20
|
+
#
|
21
|
+
# matches = client.find("yelp")
|
22
|
+
def find(search_string)
|
23
|
+
results = OnlineParselets.catch_invalid_key do
|
24
|
+
JSON.parse(open(HOST[environment] + SEARCH_PATH + CGI::escape(search_string) + "&format=json&api_key=" + CGI::escape(api_key)).read)
|
25
|
+
end
|
26
|
+
results.map do |parselet|
|
27
|
+
Instance.new parselet['parselet'].merge( :client => self )
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# Retrieve a parselet by ID, or by name and version. Returns nil or an Instance object.
|
32
|
+
#
|
33
|
+
# instance = client.parselet('my_parselet', 5)
|
34
|
+
# instance = client.parselet(17)
|
35
|
+
def parselet(parselet_id_or_name, parselet_version = nil)
|
36
|
+
result = OnlineParselets.catch_invalid_key do
|
37
|
+
if parselet_id_or_name.to_s =~ /\A\d+\Z/
|
38
|
+
unless parselet_version.nil?
|
39
|
+
raise Error.new("You cannot enter a version when looking up a specific parselet by id")
|
40
|
+
end
|
41
|
+
else
|
42
|
+
unless parselet_version.to_s =~ /\A\d+\Z/
|
43
|
+
raise Error.new("Version must be a number")
|
44
|
+
end
|
45
|
+
unless parselet_version.to_i > 0
|
46
|
+
raise Error.new("Version is 1 based, please use 1 as the first version")
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
version = parselet_version ? "/#{parselet_version}" : ''
|
51
|
+
begin
|
52
|
+
fetch = open(HOST[environment] + PARSELET_PATH + CGI::escape(parselet_id_or_name.to_s) + version + "?format=json" +
|
53
|
+
"&api_key=" + CGI::escape(api_key))
|
54
|
+
JSON.parse(fetch.read)
|
55
|
+
rescue OpenURI::HTTPError => e
|
56
|
+
raise unless e.message == "404 Not Found"
|
57
|
+
nil
|
58
|
+
end
|
59
|
+
end
|
60
|
+
result && result['parselet'] && Instance.new(result['parselet'].merge( :client => self ))
|
61
|
+
end
|
62
|
+
|
63
|
+
protected
|
64
|
+
|
65
|
+
# Run a parselet by ID on the server.
|
66
|
+
#
|
67
|
+
# json = client.run_parselet_on_server(parselet_id = 17, :url => "http://www.example.com")
|
68
|
+
def run_parselet_on_server(server_id, options)
|
69
|
+
OnlineParselets.catch_invalid_key do
|
70
|
+
begin
|
71
|
+
url = options[:url] || options[:file]
|
72
|
+
unless url =~ /^http:\/\//i
|
73
|
+
raise InvalidServerSideParse.new("You must provide an http:// address to run this parselet on the server.")
|
74
|
+
end
|
75
|
+
fetch_url = "#{HOST[environment]}#{RUN_ON_SERVER_PATH}#{CGI::escape(server_id.to_s)}" +
|
76
|
+
"?format=json&api_key=#{CGI::escape(api_key)}&url=#{CGI::escape(url)}"
|
77
|
+
JSON.parse(open(fetch_url).read)
|
78
|
+
rescue OpenURI::HTTPError => e
|
79
|
+
raise unless e.message == "404 Not Found"
|
80
|
+
nil
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
data/lib/instance.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
|
3
|
+
module OnlineParselets
|
4
|
+
SERVER_SIDE_EXECUTION_WARNING = "WARNING: Please install the Parsley ruby gem, parselet execution via the server is slower, executed on cached web pages, and possibly rate-limited."
|
5
|
+
|
6
|
+
# An OnlineParselets::Instance is a Parselet object with metadata and the ability to run locally or on the parselets.com server.
|
7
|
+
class Instance
|
8
|
+
attr_reader :name, :created_on_server_at, :code, :updated_on_server_at, :checked_on_server_at,
|
9
|
+
:server_id, :pattern, :example_url, :user_id, :version_on_server, :description, :client
|
10
|
+
|
11
|
+
# Make a new instance of Instance, an object that combines a Parselet with metadata from the parselets.com server.
|
12
|
+
#
|
13
|
+
# This is mostly an internal method.
|
14
|
+
#
|
15
|
+
# instance = OnlineParselets::Instance.new(options)
|
16
|
+
def initialize(options = {})
|
17
|
+
options = options.inject({}) { |m, (k, v)| m[k.to_s] = v; m } # Turn symbols into strings.
|
18
|
+
@name = options['name']
|
19
|
+
@created_on_server_at = options['created_at']
|
20
|
+
@code = options['code']
|
21
|
+
@updated_on_server_at = options['updated_at']
|
22
|
+
@checked_on_server_at = options['checked_at']
|
23
|
+
@server_id = options['id']
|
24
|
+
@pattern = options['pattern']
|
25
|
+
@example_url = options['example_url']
|
26
|
+
@user_id = options['user_id']
|
27
|
+
@version_on_server = options['version']
|
28
|
+
@description = options['description']
|
29
|
+
@client = options['client']
|
30
|
+
end
|
31
|
+
|
32
|
+
# Run the parselet.
|
33
|
+
#
|
34
|
+
# instance.parse :url => "http://www.example.com"
|
35
|
+
#
|
36
|
+
# instance.parse :string => "<html><head>...</html>"
|
37
|
+
def parse(options)
|
38
|
+
if options[:parse_on_server].nil? && defined?(Parsley)
|
39
|
+
if options[:url]
|
40
|
+
string = open(options[:url]).read
|
41
|
+
end
|
42
|
+
parselet.parse({ :string => string }.merge(options))
|
43
|
+
else
|
44
|
+
parse_on_server(options)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# Force an online parselet instance to run on the parselets.com server.
|
49
|
+
#
|
50
|
+
# instance.parse_on_server :url => "http://www.example.com"
|
51
|
+
def parse_on_server(options)
|
52
|
+
puts SERVER_SIDE_EXECUTION_WARNING
|
53
|
+
client.send(:run_parselet_on_server, server_id, options)
|
54
|
+
end
|
55
|
+
|
56
|
+
# Access the wrapped Parselet object.
|
57
|
+
def parselet
|
58
|
+
@parselet ||= Parsley.new(code) if code
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
begin
|
64
|
+
require 'parsley'
|
65
|
+
rescue LoadError
|
66
|
+
puts "Local Parsley bindings not found."
|
67
|
+
puts OnlineParselets::SERVER_SIDE_EXECUTION_WARNING
|
68
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'json'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'cgi'
|
5
|
+
|
6
|
+
require File.dirname(__FILE__) + '/client'
|
7
|
+
require File.dirname(__FILE__) + '/instance'
|
8
|
+
|
9
|
+
module OnlineParselets
|
10
|
+
# Basic error.
|
11
|
+
class Error < Exception; end
|
12
|
+
# Indicates that the provided API key is invalid or not existent.
|
13
|
+
class InvalidAPIKey < Error; end
|
14
|
+
# Indicates that you have requested a non-existent environment. At the moment the available environments are :local and :production.
|
15
|
+
class InvalidEnvironmentError < Error; end
|
16
|
+
# Indicates that the server-side parse failed.
|
17
|
+
class InvalidServerSideParse < Error; end
|
18
|
+
|
19
|
+
def self.catch_invalid_key
|
20
|
+
begin
|
21
|
+
yield
|
22
|
+
rescue OpenURI::HTTPError => e
|
23
|
+
if e.message == "401 Authorization Required"
|
24
|
+
raise InvalidAPIKey.new("That API key is invalid")
|
25
|
+
else
|
26
|
+
raise
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = "online_parselets"
|
3
|
+
s.version = "0.1.0"
|
4
|
+
s.date = "2009-02-27"
|
5
|
+
s.summary = "Client library for parselets.com"
|
6
|
+
s.email = "parselets@gmail.com"
|
7
|
+
s.homepage = "http://github.com/iterationlabs/online_parselets"
|
8
|
+
s.description = "Client library for parselets.com"
|
9
|
+
s.has_rdoc = true
|
10
|
+
s.require_paths = ["lib"]
|
11
|
+
s.extensions = nil
|
12
|
+
s.bindir = 'bin'
|
13
|
+
s.authors = ["Andrew Cantino", "Kyle Maxwell"]
|
14
|
+
s.files = %w[
|
15
|
+
README
|
16
|
+
online_parselets.gemspec
|
17
|
+
test/test_client.rb
|
18
|
+
lib/client.rb
|
19
|
+
lib/instance.rb
|
20
|
+
lib/online_parselets.rb
|
21
|
+
]
|
22
|
+
s.rdoc_options = ["--main", "README"]
|
23
|
+
s.extra_rdoc_files = ["README"]
|
24
|
+
s.add_dependency("json", ["> 0.0.0"])
|
25
|
+
end
|
data/test/test_client.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../lib/online_parselets'
|
2
|
+
require 'test/unit'
|
3
|
+
require 'open-uri'
|
4
|
+
include OnlineParselets
|
5
|
+
|
6
|
+
class TestOnlineParseletsClient < Test::Unit::TestCase
|
7
|
+
def setup
|
8
|
+
@client = OnlineParselets::Client.new(:api_key => 'YOUR_API_KEY')
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_new_client
|
12
|
+
assert_raises(InvalidAPIKey) { OnlineParselets::Client.new(:environment => :local) }
|
13
|
+
assert_nothing_raised { OnlineParselets::Client.new(:api_key => 'elkfjdf') }
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_client_gets_parselets
|
17
|
+
result = @client.find("yelp")
|
18
|
+
assert result.length > 0
|
19
|
+
assert_kind_of(Parsley, result.first.parselet)
|
20
|
+
end
|
21
|
+
|
22
|
+
# This is brittle and depends on what parselets we have on the server
|
23
|
+
def test_running_a_parselet
|
24
|
+
assert @client.find("yelp-home kyle").first.parse(:url => "http://www.yelp.com")['categories'].length > 0
|
25
|
+
end
|
26
|
+
|
27
|
+
# This is brittle and depends on what parselets we have on the server
|
28
|
+
def test_getting_a_specific_parselet
|
29
|
+
assert_equal nil, @client.parselet(12)
|
30
|
+
assert_kind_of(Instance, @client.parselet(50))
|
31
|
+
yelp = @client.parselet('yelp-home', 4)
|
32
|
+
assert_kind_of(Instance, yelp)
|
33
|
+
assert_equal 50, yelp.server_id
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_errors_on_getting_specific_parselet
|
37
|
+
assert_raises(Error) { @client.parselet(nil) }
|
38
|
+
assert_raises(Error) { @client.parselet(1, 1) }
|
39
|
+
assert_nothing_raised { @client.parselet(1) }
|
40
|
+
assert_raises(Error) { @client.parselet("yelp") }
|
41
|
+
assert_raises(Error) { @client.parselet("hello", 0) }
|
42
|
+
assert_nothing_raised { @client.parselet("hello", 1) }
|
43
|
+
end
|
44
|
+
|
45
|
+
# This is brittle and depends on what parselets we have on the server
|
46
|
+
def test_server_side_parsing
|
47
|
+
assert @client.parselet(50).parse(:url => "http://www.yelp.com", :parse_on_server => true)['categories'].length > 0
|
48
|
+
assert @client.parselet('yelp-home', 4).parse(:url => "http://www.yelp.com", :parse_on_server => true)['categories'].length > 0
|
49
|
+
end
|
50
|
+
end
|
metadata
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: iterationlabs-online_parselets
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Andrew Cantino
|
8
|
+
- Kyle Maxwell
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2009-02-27 00:00:00 -08:00
|
14
|
+
default_executable:
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: json
|
18
|
+
type: :runtime
|
19
|
+
version_requirement:
|
20
|
+
version_requirements: !ruby/object:Gem::Requirement
|
21
|
+
requirements:
|
22
|
+
- - ">"
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: 0.0.0
|
25
|
+
version:
|
26
|
+
description: Client library for parselets.com
|
27
|
+
email: parselets@gmail.com
|
28
|
+
executables: []
|
29
|
+
|
30
|
+
extensions: []
|
31
|
+
|
32
|
+
extra_rdoc_files:
|
33
|
+
- README
|
34
|
+
files:
|
35
|
+
- README
|
36
|
+
- online_parselets.gemspec
|
37
|
+
- test/test_client.rb
|
38
|
+
- lib/client.rb
|
39
|
+
- lib/instance.rb
|
40
|
+
- lib/online_parselets.rb
|
41
|
+
has_rdoc: true
|
42
|
+
homepage: http://github.com/iterationlabs/online_parselets
|
43
|
+
post_install_message:
|
44
|
+
rdoc_options:
|
45
|
+
- --main
|
46
|
+
- README
|
47
|
+
require_paths:
|
48
|
+
- lib
|
49
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: "0"
|
54
|
+
version:
|
55
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: "0"
|
60
|
+
version:
|
61
|
+
requirements: []
|
62
|
+
|
63
|
+
rubyforge_project:
|
64
|
+
rubygems_version: 1.2.0
|
65
|
+
signing_key:
|
66
|
+
specification_version: 2
|
67
|
+
summary: Client library for parselets.com
|
68
|
+
test_files: []
|
69
|
+
|