iterationlabs-online_parselets 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +33 -0
- data/lib/client.rb +85 -0
- data/lib/instance.rb +68 -0
- data/lib/online_parselets.rb +30 -0
- data/online_parselets.gemspec +25 -0
- data/test/test_client.rb +50 -0
- metadata +69 -0
data/README
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
The online_parselets project is a client gem to talk to http://parselets.com.
|
2
|
+
|
3
|
+
REQUIREMENTS
|
4
|
+
|
5
|
+
This gem supports both local and server-side parselet execution. In general,
|
6
|
+
however, you should have the parsley gem installed for fast, local parsing.
|
7
|
+
Get it here: http://github.com/fizx/parsley-ruby/tree/master
|
8
|
+
|
9
|
+
INSTALL
|
10
|
+
|
11
|
+
From GitHub:
|
12
|
+
Run the following if you haven't already:
|
13
|
+
gem sources -a http://gems.github.com
|
14
|
+
Install the gem:
|
15
|
+
sudo gem install iterationlabs-online_parselets
|
16
|
+
|
17
|
+
From source:
|
18
|
+
sudo rake install
|
19
|
+
|
20
|
+
USAGE
|
21
|
+
|
22
|
+
require "rubygems"
|
23
|
+
require "online_parselets"
|
24
|
+
client = OnlineParselets::Client.new(:api_key => "your_api_key")
|
25
|
+
parselets = @client.find("yelp")
|
26
|
+
parselet = parselets.first
|
27
|
+
yelp_data = parselet.parse(:url => "http://www.yelp.com")
|
28
|
+
puts yelp_data.inspect
|
29
|
+
# Reload a parselet from the server
|
30
|
+
parselet = client.parselet(parselet.server_id)
|
31
|
+
# Or:
|
32
|
+
parselet = client.parselet('my_parselet', version = 5)
|
33
|
+
|
data/lib/client.rb
ADDED
@@ -0,0 +1,85 @@
|
|
1
|
+
module OnlineParselets
|
2
|
+
# Client class to talk to the parselets.com server.
|
3
|
+
class Client
|
4
|
+
attr_accessor :api_key, :environment
|
5
|
+
HOST = { :production => "http://parselets.com", :local => "http://localhost:3000" }
|
6
|
+
SEARCH_PATH = "/search/?q="
|
7
|
+
PARSELET_PATH = "/parselets/"
|
8
|
+
RUN_ON_SERVER_PATH = "/parse/"
|
9
|
+
|
10
|
+
# Make a new client instance to talk to the parselets.com server.
|
11
|
+
#
|
12
|
+
# OnlineParselets::Client.new( :api_key => "your_api_key" )
|
13
|
+
def initialize(options = {})
|
14
|
+
self.api_key = options[:api_key] or raise InvalidAPIKey.new("You must provide an API key")
|
15
|
+
self.environment = options[:environment] || :production
|
16
|
+
raise InvalidEnvironmentError("Environment #{environment} is not defined") unless HOST[environment]
|
17
|
+
end
|
18
|
+
|
19
|
+
# Search for parselets on the server. Returns an array of matching OnlineParselets::Instance objects.
|
20
|
+
#
|
21
|
+
# matches = client.find("yelp")
|
22
|
+
def find(search_string)
|
23
|
+
results = OnlineParselets.catch_invalid_key do
|
24
|
+
JSON.parse(open(HOST[environment] + SEARCH_PATH + CGI::escape(search_string) + "&format=json&api_key=" + CGI::escape(api_key)).read)
|
25
|
+
end
|
26
|
+
results.map do |parselet|
|
27
|
+
Instance.new parselet['parselet'].merge( :client => self )
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# Retrieve a parselet by ID, or by name and version. Returns nil or an Instance object.
|
32
|
+
#
|
33
|
+
# instance = client.parselet('my_parselet', 5)
|
34
|
+
# instance = client.parselet(17)
|
35
|
+
def parselet(parselet_id_or_name, parselet_version = nil)
|
36
|
+
result = OnlineParselets.catch_invalid_key do
|
37
|
+
if parselet_id_or_name.to_s =~ /\A\d+\Z/
|
38
|
+
unless parselet_version.nil?
|
39
|
+
raise Error.new("You cannot enter a version when looking up a specific parselet by id")
|
40
|
+
end
|
41
|
+
else
|
42
|
+
unless parselet_version.to_s =~ /\A\d+\Z/
|
43
|
+
raise Error.new("Version must be a number")
|
44
|
+
end
|
45
|
+
unless parselet_version.to_i > 0
|
46
|
+
raise Error.new("Version is 1 based, please use 1 as the first version")
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
version = parselet_version ? "/#{parselet_version}" : ''
|
51
|
+
begin
|
52
|
+
fetch = open(HOST[environment] + PARSELET_PATH + CGI::escape(parselet_id_or_name.to_s) + version + "?format=json" +
|
53
|
+
"&api_key=" + CGI::escape(api_key))
|
54
|
+
JSON.parse(fetch.read)
|
55
|
+
rescue OpenURI::HTTPError => e
|
56
|
+
raise unless e.message == "404 Not Found"
|
57
|
+
nil
|
58
|
+
end
|
59
|
+
end
|
60
|
+
result && result['parselet'] && Instance.new(result['parselet'].merge( :client => self ))
|
61
|
+
end
|
62
|
+
|
63
|
+
protected
|
64
|
+
|
65
|
+
# Run a parselet by ID on the server.
|
66
|
+
#
|
67
|
+
# json = client.run_parselet_on_server(parselet_id = 17, :url => "http://www.example.com")
|
68
|
+
def run_parselet_on_server(server_id, options)
|
69
|
+
OnlineParselets.catch_invalid_key do
|
70
|
+
begin
|
71
|
+
url = options[:url] || options[:file]
|
72
|
+
unless url =~ /^http:\/\//i
|
73
|
+
raise InvalidServerSideParse.new("You must provide an http:// address to run this parselet on the server.")
|
74
|
+
end
|
75
|
+
fetch_url = "#{HOST[environment]}#{RUN_ON_SERVER_PATH}#{CGI::escape(server_id.to_s)}" +
|
76
|
+
"?format=json&api_key=#{CGI::escape(api_key)}&url=#{CGI::escape(url)}"
|
77
|
+
JSON.parse(open(fetch_url).read)
|
78
|
+
rescue OpenURI::HTTPError => e
|
79
|
+
raise unless e.message == "404 Not Found"
|
80
|
+
nil
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
data/lib/instance.rb
ADDED
@@ -0,0 +1,68 @@
|
|
1
|
+
require 'open-uri'
|
2
|
+
|
3
|
+
module OnlineParselets
|
4
|
+
SERVER_SIDE_EXECUTION_WARNING = "WARNING: Please install the Parsley ruby gem, parselet execution via the server is slower, executed on cached web pages, and possibly rate-limited."
|
5
|
+
|
6
|
+
# An OnlineParselets::Instance is a Parselet object with metadata and the ability to run locally or on the parselets.com server.
|
7
|
+
class Instance
|
8
|
+
attr_reader :name, :created_on_server_at, :code, :updated_on_server_at, :checked_on_server_at,
|
9
|
+
:server_id, :pattern, :example_url, :user_id, :version_on_server, :description, :client
|
10
|
+
|
11
|
+
# Make a new instance of Instance, an object that combines a Parselet with metadata from the parselets.com server.
|
12
|
+
#
|
13
|
+
# This is mostly an internal method.
|
14
|
+
#
|
15
|
+
# instance = OnlineParselets::Instance.new(options)
|
16
|
+
def initialize(options = {})
|
17
|
+
options = options.inject({}) { |m, (k, v)| m[k.to_s] = v; m } # Turn symbols into strings.
|
18
|
+
@name = options['name']
|
19
|
+
@created_on_server_at = options['created_at']
|
20
|
+
@code = options['code']
|
21
|
+
@updated_on_server_at = options['updated_at']
|
22
|
+
@checked_on_server_at = options['checked_at']
|
23
|
+
@server_id = options['id']
|
24
|
+
@pattern = options['pattern']
|
25
|
+
@example_url = options['example_url']
|
26
|
+
@user_id = options['user_id']
|
27
|
+
@version_on_server = options['version']
|
28
|
+
@description = options['description']
|
29
|
+
@client = options['client']
|
30
|
+
end
|
31
|
+
|
32
|
+
# Run the parselet.
|
33
|
+
#
|
34
|
+
# instance.parse :url => "http://www.example.com"
|
35
|
+
#
|
36
|
+
# instance.parse :string => "<html><head>...</html>"
|
37
|
+
def parse(options)
|
38
|
+
if options[:parse_on_server].nil? && defined?(Parsley)
|
39
|
+
if options[:url]
|
40
|
+
string = open(options[:url]).read
|
41
|
+
end
|
42
|
+
parselet.parse({ :string => string }.merge(options))
|
43
|
+
else
|
44
|
+
parse_on_server(options)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
# Force an online parselet instance to run on the parselets.com server.
|
49
|
+
#
|
50
|
+
# instance.parse_on_server :url => "http://www.example.com"
|
51
|
+
def parse_on_server(options)
|
52
|
+
puts SERVER_SIDE_EXECUTION_WARNING
|
53
|
+
client.send(:run_parselet_on_server, server_id, options)
|
54
|
+
end
|
55
|
+
|
56
|
+
# Access the wrapped Parselet object.
|
57
|
+
def parselet
|
58
|
+
@parselet ||= Parsley.new(code) if code
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
begin
|
64
|
+
require 'parsley'
|
65
|
+
rescue LoadError
|
66
|
+
puts "Local Parsley bindings not found."
|
67
|
+
puts OnlineParselets::SERVER_SIDE_EXECUTION_WARNING
|
68
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'json'
|
3
|
+
require 'open-uri'
|
4
|
+
require 'cgi'
|
5
|
+
|
6
|
+
require File.dirname(__FILE__) + '/client'
|
7
|
+
require File.dirname(__FILE__) + '/instance'
|
8
|
+
|
9
|
+
module OnlineParselets
|
10
|
+
# Basic error.
|
11
|
+
class Error < Exception; end
|
12
|
+
# Indicates that the provided API key is invalid or not existent.
|
13
|
+
class InvalidAPIKey < Error; end
|
14
|
+
# Indicates that you have requested a non-existent environment. At the moment the available environments are :local and :production.
|
15
|
+
class InvalidEnvironmentError < Error; end
|
16
|
+
# Indicates that the server-side parse failed.
|
17
|
+
class InvalidServerSideParse < Error; end
|
18
|
+
|
19
|
+
def self.catch_invalid_key
|
20
|
+
begin
|
21
|
+
yield
|
22
|
+
rescue OpenURI::HTTPError => e
|
23
|
+
if e.message == "401 Authorization Required"
|
24
|
+
raise InvalidAPIKey.new("That API key is invalid")
|
25
|
+
else
|
26
|
+
raise
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = "online_parselets"
|
3
|
+
s.version = "0.1.0"
|
4
|
+
s.date = "2009-02-27"
|
5
|
+
s.summary = "Client library for parselets.com"
|
6
|
+
s.email = "parselets@gmail.com"
|
7
|
+
s.homepage = "http://github.com/iterationlabs/online_parselets"
|
8
|
+
s.description = "Client library for parselets.com"
|
9
|
+
s.has_rdoc = true
|
10
|
+
s.require_paths = ["lib"]
|
11
|
+
s.extensions = nil
|
12
|
+
s.bindir = 'bin'
|
13
|
+
s.authors = ["Andrew Cantino", "Kyle Maxwell"]
|
14
|
+
s.files = %w[
|
15
|
+
README
|
16
|
+
online_parselets.gemspec
|
17
|
+
test/test_client.rb
|
18
|
+
lib/client.rb
|
19
|
+
lib/instance.rb
|
20
|
+
lib/online_parselets.rb
|
21
|
+
]
|
22
|
+
s.rdoc_options = ["--main", "README"]
|
23
|
+
s.extra_rdoc_files = ["README"]
|
24
|
+
s.add_dependency("json", ["> 0.0.0"])
|
25
|
+
end
|
data/test/test_client.rb
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/../lib/online_parselets'
|
2
|
+
require 'test/unit'
|
3
|
+
require 'open-uri'
|
4
|
+
include OnlineParselets
|
5
|
+
|
6
|
+
class TestOnlineParseletsClient < Test::Unit::TestCase
|
7
|
+
def setup
|
8
|
+
@client = OnlineParselets::Client.new(:api_key => 'YOUR_API_KEY')
|
9
|
+
end
|
10
|
+
|
11
|
+
def test_new_client
|
12
|
+
assert_raises(InvalidAPIKey) { OnlineParselets::Client.new(:environment => :local) }
|
13
|
+
assert_nothing_raised { OnlineParselets::Client.new(:api_key => 'elkfjdf') }
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_client_gets_parselets
|
17
|
+
result = @client.find("yelp")
|
18
|
+
assert result.length > 0
|
19
|
+
assert_kind_of(Parsley, result.first.parselet)
|
20
|
+
end
|
21
|
+
|
22
|
+
# This is brittle and depends on what parselets we have on the server
|
23
|
+
def test_running_a_parselet
|
24
|
+
assert @client.find("yelp-home kyle").first.parse(:url => "http://www.yelp.com")['categories'].length > 0
|
25
|
+
end
|
26
|
+
|
27
|
+
# This is brittle and depends on what parselets we have on the server
|
28
|
+
def test_getting_a_specific_parselet
|
29
|
+
assert_equal nil, @client.parselet(12)
|
30
|
+
assert_kind_of(Instance, @client.parselet(50))
|
31
|
+
yelp = @client.parselet('yelp-home', 4)
|
32
|
+
assert_kind_of(Instance, yelp)
|
33
|
+
assert_equal 50, yelp.server_id
|
34
|
+
end
|
35
|
+
|
36
|
+
def test_errors_on_getting_specific_parselet
|
37
|
+
assert_raises(Error) { @client.parselet(nil) }
|
38
|
+
assert_raises(Error) { @client.parselet(1, 1) }
|
39
|
+
assert_nothing_raised { @client.parselet(1) }
|
40
|
+
assert_raises(Error) { @client.parselet("yelp") }
|
41
|
+
assert_raises(Error) { @client.parselet("hello", 0) }
|
42
|
+
assert_nothing_raised { @client.parselet("hello", 1) }
|
43
|
+
end
|
44
|
+
|
45
|
+
# This is brittle and depends on what parselets we have on the server
|
46
|
+
def test_server_side_parsing
|
47
|
+
assert @client.parselet(50).parse(:url => "http://www.yelp.com", :parse_on_server => true)['categories'].length > 0
|
48
|
+
assert @client.parselet('yelp-home', 4).parse(:url => "http://www.yelp.com", :parse_on_server => true)['categories'].length > 0
|
49
|
+
end
|
50
|
+
end
|
metadata
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: iterationlabs-online_parselets
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Andrew Cantino
|
8
|
+
- Kyle Maxwell
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2009-02-27 00:00:00 -08:00
|
14
|
+
default_executable:
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: json
|
18
|
+
type: :runtime
|
19
|
+
version_requirement:
|
20
|
+
version_requirements: !ruby/object:Gem::Requirement
|
21
|
+
requirements:
|
22
|
+
- - ">"
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: 0.0.0
|
25
|
+
version:
|
26
|
+
description: Client library for parselets.com
|
27
|
+
email: parselets@gmail.com
|
28
|
+
executables: []
|
29
|
+
|
30
|
+
extensions: []
|
31
|
+
|
32
|
+
extra_rdoc_files:
|
33
|
+
- README
|
34
|
+
files:
|
35
|
+
- README
|
36
|
+
- online_parselets.gemspec
|
37
|
+
- test/test_client.rb
|
38
|
+
- lib/client.rb
|
39
|
+
- lib/instance.rb
|
40
|
+
- lib/online_parselets.rb
|
41
|
+
has_rdoc: true
|
42
|
+
homepage: http://github.com/iterationlabs/online_parselets
|
43
|
+
post_install_message:
|
44
|
+
rdoc_options:
|
45
|
+
- --main
|
46
|
+
- README
|
47
|
+
require_paths:
|
48
|
+
- lib
|
49
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
50
|
+
requirements:
|
51
|
+
- - ">="
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: "0"
|
54
|
+
version:
|
55
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
56
|
+
requirements:
|
57
|
+
- - ">="
|
58
|
+
- !ruby/object:Gem::Version
|
59
|
+
version: "0"
|
60
|
+
version:
|
61
|
+
requirements: []
|
62
|
+
|
63
|
+
rubyforge_project:
|
64
|
+
rubygems_version: 1.2.0
|
65
|
+
signing_key:
|
66
|
+
specification_version: 2
|
67
|
+
summary: Client library for parselets.com
|
68
|
+
test_files: []
|
69
|
+
|