iterationlabs-online_parselets 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/README ADDED
@@ -0,0 +1,33 @@
1
+ The online_parselets project is a client gem to talk to http://parselets.com.
2
+
3
+ REQUIREMENTS
4
+
5
+ This gem supports both local and server-side parselet execution. In general,
6
+ however, you should have the parsley gem installed for fast, local parsing.
7
+ Get it here: http://github.com/fizx/parsley-ruby/tree/master
8
+
9
+ INSTALL
10
+
11
+ From GitHub:
12
+ Run the following if you haven't already:
13
+ gem sources -a http://gems.github.com
14
+ Install the gem:
15
+ sudo gem install iterationlabs-online_parselets
16
+
17
+ From source:
18
+ sudo rake install
19
+
20
+ USAGE
21
+
22
+ require "rubygems"
23
+ require "online_parselets"
24
+ client = OnlineParselets::Client.new(:api_key => "your_api_key")
25
+ parselets = @client.find("yelp")
26
+ parselet = parselets.first
27
+ yelp_data = parselet.parse(:url => "http://www.yelp.com")
28
+ puts yelp_data.inspect
29
+ # Reload a parselet from the server
30
+ parselet = client.parselet(parselet.server_id)
31
+ # Or:
32
+ parselet = client.parselet('my_parselet', version = 5)
33
+
data/lib/client.rb ADDED
@@ -0,0 +1,85 @@
1
+ module OnlineParselets
2
+ # Client class to talk to the parselets.com server.
3
+ class Client
4
+ attr_accessor :api_key, :environment
5
+ HOST = { :production => "http://parselets.com", :local => "http://localhost:3000" }
6
+ SEARCH_PATH = "/search/?q="
7
+ PARSELET_PATH = "/parselets/"
8
+ RUN_ON_SERVER_PATH = "/parse/"
9
+
10
+ # Make a new client instance to talk to the parselets.com server.
11
+ #
12
+ # OnlineParselets::Client.new( :api_key => "your_api_key" )
13
+ def initialize(options = {})
14
+ self.api_key = options[:api_key] or raise InvalidAPIKey.new("You must provide an API key")
15
+ self.environment = options[:environment] || :production
16
+ raise InvalidEnvironmentError("Environment #{environment} is not defined") unless HOST[environment]
17
+ end
18
+
19
+ # Search for parselets on the server. Returns an array of matching OnlineParselets::Instance objects.
20
+ #
21
+ # matches = client.find("yelp")
22
+ def find(search_string)
23
+ results = OnlineParselets.catch_invalid_key do
24
+ JSON.parse(open(HOST[environment] + SEARCH_PATH + CGI::escape(search_string) + "&format=json&api_key=" + CGI::escape(api_key)).read)
25
+ end
26
+ results.map do |parselet|
27
+ Instance.new parselet['parselet'].merge( :client => self )
28
+ end
29
+ end
30
+
31
+ # Retrieve a parselet by ID, or by name and version. Returns nil or an Instance object.
32
+ #
33
+ # instance = client.parselet('my_parselet', 5)
34
+ # instance = client.parselet(17)
35
+ def parselet(parselet_id_or_name, parselet_version = nil)
36
+ result = OnlineParselets.catch_invalid_key do
37
+ if parselet_id_or_name.to_s =~ /\A\d+\Z/
38
+ unless parselet_version.nil?
39
+ raise Error.new("You cannot enter a version when looking up a specific parselet by id")
40
+ end
41
+ else
42
+ unless parselet_version.to_s =~ /\A\d+\Z/
43
+ raise Error.new("Version must be a number")
44
+ end
45
+ unless parselet_version.to_i > 0
46
+ raise Error.new("Version is 1 based, please use 1 as the first version")
47
+ end
48
+ end
49
+
50
+ version = parselet_version ? "/#{parselet_version}" : ''
51
+ begin
52
+ fetch = open(HOST[environment] + PARSELET_PATH + CGI::escape(parselet_id_or_name.to_s) + version + "?format=json" +
53
+ "&api_key=" + CGI::escape(api_key))
54
+ JSON.parse(fetch.read)
55
+ rescue OpenURI::HTTPError => e
56
+ raise unless e.message == "404 Not Found"
57
+ nil
58
+ end
59
+ end
60
+ result && result['parselet'] && Instance.new(result['parselet'].merge( :client => self ))
61
+ end
62
+
63
+ protected
64
+
65
+ # Run a parselet by ID on the server.
66
+ #
67
+ # json = client.run_parselet_on_server(parselet_id = 17, :url => "http://www.example.com")
68
+ def run_parselet_on_server(server_id, options)
69
+ OnlineParselets.catch_invalid_key do
70
+ begin
71
+ url = options[:url] || options[:file]
72
+ unless url =~ /^http:\/\//i
73
+ raise InvalidServerSideParse.new("You must provide an http:// address to run this parselet on the server.")
74
+ end
75
+ fetch_url = "#{HOST[environment]}#{RUN_ON_SERVER_PATH}#{CGI::escape(server_id.to_s)}" +
76
+ "?format=json&api_key=#{CGI::escape(api_key)}&url=#{CGI::escape(url)}"
77
+ JSON.parse(open(fetch_url).read)
78
+ rescue OpenURI::HTTPError => e
79
+ raise unless e.message == "404 Not Found"
80
+ nil
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
data/lib/instance.rb ADDED
@@ -0,0 +1,68 @@
1
+ require 'open-uri'
2
+
3
+ module OnlineParselets
4
+ SERVER_SIDE_EXECUTION_WARNING = "WARNING: Please install the Parsley ruby gem, parselet execution via the server is slower, executed on cached web pages, and possibly rate-limited."
5
+
6
+ # An OnlineParselets::Instance is a Parselet object with metadata and the ability to run locally or on the parselets.com server.
7
+ class Instance
8
+ attr_reader :name, :created_on_server_at, :code, :updated_on_server_at, :checked_on_server_at,
9
+ :server_id, :pattern, :example_url, :user_id, :version_on_server, :description, :client
10
+
11
+ # Make a new instance of Instance, an object that combines a Parselet with metadata from the parselets.com server.
12
+ #
13
+ # This is mostly an internal method.
14
+ #
15
+ # instance = OnlineParselets::Instance.new(options)
16
+ def initialize(options = {})
17
+ options = options.inject({}) { |m, (k, v)| m[k.to_s] = v; m } # Turn symbols into strings.
18
+ @name = options['name']
19
+ @created_on_server_at = options['created_at']
20
+ @code = options['code']
21
+ @updated_on_server_at = options['updated_at']
22
+ @checked_on_server_at = options['checked_at']
23
+ @server_id = options['id']
24
+ @pattern = options['pattern']
25
+ @example_url = options['example_url']
26
+ @user_id = options['user_id']
27
+ @version_on_server = options['version']
28
+ @description = options['description']
29
+ @client = options['client']
30
+ end
31
+
32
+ # Run the parselet.
33
+ #
34
+ # instance.parse :url => "http://www.example.com"
35
+ #
36
+ # instance.parse :string => "<html><head>...</html>"
37
+ def parse(options)
38
+ if options[:parse_on_server].nil? && defined?(Parsley)
39
+ if options[:url]
40
+ string = open(options[:url]).read
41
+ end
42
+ parselet.parse({ :string => string }.merge(options))
43
+ else
44
+ parse_on_server(options)
45
+ end
46
+ end
47
+
48
+ # Force an online parselet instance to run on the parselets.com server.
49
+ #
50
+ # instance.parse_on_server :url => "http://www.example.com"
51
+ def parse_on_server(options)
52
+ puts SERVER_SIDE_EXECUTION_WARNING
53
+ client.send(:run_parselet_on_server, server_id, options)
54
+ end
55
+
56
+ # Access the wrapped Parselet object.
57
+ def parselet
58
+ @parselet ||= Parsley.new(code) if code
59
+ end
60
+ end
61
+ end
62
+
63
+ begin
64
+ require 'parsley'
65
+ rescue LoadError
66
+ puts "Local Parsley bindings not found."
67
+ puts OnlineParselets::SERVER_SIDE_EXECUTION_WARNING
68
+ end
@@ -0,0 +1,30 @@
1
+ require 'rubygems'
2
+ require 'json'
3
+ require 'open-uri'
4
+ require 'cgi'
5
+
6
+ require File.dirname(__FILE__) + '/client'
7
+ require File.dirname(__FILE__) + '/instance'
8
+
9
+ module OnlineParselets
10
+ # Basic error.
11
+ class Error < Exception; end
12
+ # Indicates that the provided API key is invalid or not existent.
13
+ class InvalidAPIKey < Error; end
14
+ # Indicates that you have requested a non-existent environment. At the moment the available environments are :local and :production.
15
+ class InvalidEnvironmentError < Error; end
16
+ # Indicates that the server-side parse failed.
17
+ class InvalidServerSideParse < Error; end
18
+
19
+ def self.catch_invalid_key
20
+ begin
21
+ yield
22
+ rescue OpenURI::HTTPError => e
23
+ if e.message == "401 Authorization Required"
24
+ raise InvalidAPIKey.new("That API key is invalid")
25
+ else
26
+ raise
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,25 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "online_parselets"
3
+ s.version = "0.1.0"
4
+ s.date = "2009-02-27"
5
+ s.summary = "Client library for parselets.com"
6
+ s.email = "parselets@gmail.com"
7
+ s.homepage = "http://github.com/iterationlabs/online_parselets"
8
+ s.description = "Client library for parselets.com"
9
+ s.has_rdoc = true
10
+ s.require_paths = ["lib"]
11
+ s.extensions = nil
12
+ s.bindir = 'bin'
13
+ s.authors = ["Andrew Cantino", "Kyle Maxwell"]
14
+ s.files = %w[
15
+ README
16
+ online_parselets.gemspec
17
+ test/test_client.rb
18
+ lib/client.rb
19
+ lib/instance.rb
20
+ lib/online_parselets.rb
21
+ ]
22
+ s.rdoc_options = ["--main", "README"]
23
+ s.extra_rdoc_files = ["README"]
24
+ s.add_dependency("json", ["> 0.0.0"])
25
+ end
@@ -0,0 +1,50 @@
1
+ require File.dirname(__FILE__) + '/../lib/online_parselets'
2
+ require 'test/unit'
3
+ require 'open-uri'
4
+ include OnlineParselets
5
+
6
+ class TestOnlineParseletsClient < Test::Unit::TestCase
7
+ def setup
8
+ @client = OnlineParselets::Client.new(:api_key => 'YOUR_API_KEY')
9
+ end
10
+
11
+ def test_new_client
12
+ assert_raises(InvalidAPIKey) { OnlineParselets::Client.new(:environment => :local) }
13
+ assert_nothing_raised { OnlineParselets::Client.new(:api_key => 'elkfjdf') }
14
+ end
15
+
16
+ def test_client_gets_parselets
17
+ result = @client.find("yelp")
18
+ assert result.length > 0
19
+ assert_kind_of(Parsley, result.first.parselet)
20
+ end
21
+
22
+ # This is brittle and depends on what parselets we have on the server
23
+ def test_running_a_parselet
24
+ assert @client.find("yelp-home kyle").first.parse(:url => "http://www.yelp.com")['categories'].length > 0
25
+ end
26
+
27
+ # This is brittle and depends on what parselets we have on the server
28
+ def test_getting_a_specific_parselet
29
+ assert_equal nil, @client.parselet(12)
30
+ assert_kind_of(Instance, @client.parselet(50))
31
+ yelp = @client.parselet('yelp-home', 4)
32
+ assert_kind_of(Instance, yelp)
33
+ assert_equal 50, yelp.server_id
34
+ end
35
+
36
+ def test_errors_on_getting_specific_parselet
37
+ assert_raises(Error) { @client.parselet(nil) }
38
+ assert_raises(Error) { @client.parselet(1, 1) }
39
+ assert_nothing_raised { @client.parselet(1) }
40
+ assert_raises(Error) { @client.parselet("yelp") }
41
+ assert_raises(Error) { @client.parselet("hello", 0) }
42
+ assert_nothing_raised { @client.parselet("hello", 1) }
43
+ end
44
+
45
+ # This is brittle and depends on what parselets we have on the server
46
+ def test_server_side_parsing
47
+ assert @client.parselet(50).parse(:url => "http://www.yelp.com", :parse_on_server => true)['categories'].length > 0
48
+ assert @client.parselet('yelp-home', 4).parse(:url => "http://www.yelp.com", :parse_on_server => true)['categories'].length > 0
49
+ end
50
+ end
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: iterationlabs-online_parselets
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Andrew Cantino
8
+ - Kyle Maxwell
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2009-02-27 00:00:00 -08:00
14
+ default_executable:
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: json
18
+ type: :runtime
19
+ version_requirement:
20
+ version_requirements: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - ">"
23
+ - !ruby/object:Gem::Version
24
+ version: 0.0.0
25
+ version:
26
+ description: Client library for parselets.com
27
+ email: parselets@gmail.com
28
+ executables: []
29
+
30
+ extensions: []
31
+
32
+ extra_rdoc_files:
33
+ - README
34
+ files:
35
+ - README
36
+ - online_parselets.gemspec
37
+ - test/test_client.rb
38
+ - lib/client.rb
39
+ - lib/instance.rb
40
+ - lib/online_parselets.rb
41
+ has_rdoc: true
42
+ homepage: http://github.com/iterationlabs/online_parselets
43
+ post_install_message:
44
+ rdoc_options:
45
+ - --main
46
+ - README
47
+ require_paths:
48
+ - lib
49
+ required_ruby_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: "0"
54
+ version:
55
+ required_rubygems_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: "0"
60
+ version:
61
+ requirements: []
62
+
63
+ rubyforge_project:
64
+ rubygems_version: 1.2.0
65
+ signing_key:
66
+ specification_version: 2
67
+ summary: Client library for parselets.com
68
+ test_files: []
69
+