iterationlabs-online_parselets 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README ADDED
@@ -0,0 +1,33 @@
1
+ The online_parselets project is a client gem to talk to http://parselets.com.
2
+
3
+ REQUIREMENTS
4
+
5
+ This gem supports both local and server-side parselet execution. In general,
6
+ however, you should have the parsley gem installed for fast, local parsing.
7
+ Get it here: http://github.com/fizx/parsley-ruby/tree/master
8
+
9
+ INSTALL
10
+
11
+ From GitHub:
12
+ Run the following if you haven't already:
13
+ gem sources -a http://gems.github.com
14
+ Install the gem:
15
+ sudo gem install iterationlabs-online_parselets
16
+
17
+ From source:
18
+ sudo rake install
19
+
20
+ USAGE
21
+
22
+ require "rubygems"
23
+ require "online_parselets"
24
+ client = OnlineParselets::Client.new(:api_key => "your_api_key")
25
+ parselets = @client.find("yelp")
26
+ parselet = parselets.first
27
+ yelp_data = parselet.parse(:url => "http://www.yelp.com")
28
+ puts yelp_data.inspect
29
+ # Reload a parselet from the server
30
+ parselet = client.parselet(parselet.server_id)
31
+ # Or:
32
+ parselet = client.parselet('my_parselet', version = 5)
33
+
data/lib/client.rb ADDED
@@ -0,0 +1,85 @@
1
+ module OnlineParselets
2
+ # Client class to talk to the parselets.com server.
3
+ class Client
4
+ attr_accessor :api_key, :environment
5
+ HOST = { :production => "http://parselets.com", :local => "http://localhost:3000" }
6
+ SEARCH_PATH = "/search/?q="
7
+ PARSELET_PATH = "/parselets/"
8
+ RUN_ON_SERVER_PATH = "/parse/"
9
+
10
+ # Make a new client instance to talk to the parselets.com server.
11
+ #
12
+ # OnlineParselets::Client.new( :api_key => "your_api_key" )
13
+ def initialize(options = {})
14
+ self.api_key = options[:api_key] or raise InvalidAPIKey.new("You must provide an API key")
15
+ self.environment = options[:environment] || :production
16
+ raise InvalidEnvironmentError("Environment #{environment} is not defined") unless HOST[environment]
17
+ end
18
+
19
+ # Search for parselets on the server. Returns an array of matching OnlineParselets::Instance objects.
20
+ #
21
+ # matches = client.find("yelp")
22
+ def find(search_string)
23
+ results = OnlineParselets.catch_invalid_key do
24
+ JSON.parse(open(HOST[environment] + SEARCH_PATH + CGI::escape(search_string) + "&format=json&api_key=" + CGI::escape(api_key)).read)
25
+ end
26
+ results.map do |parselet|
27
+ Instance.new parselet['parselet'].merge( :client => self )
28
+ end
29
+ end
30
+
31
+ # Retrieve a parselet by ID, or by name and version. Returns nil or an Instance object.
32
+ #
33
+ # instance = client.parselet('my_parselet', 5)
34
+ # instance = client.parselet(17)
35
+ def parselet(parselet_id_or_name, parselet_version = nil)
36
+ result = OnlineParselets.catch_invalid_key do
37
+ if parselet_id_or_name.to_s =~ /\A\d+\Z/
38
+ unless parselet_version.nil?
39
+ raise Error.new("You cannot enter a version when looking up a specific parselet by id")
40
+ end
41
+ else
42
+ unless parselet_version.to_s =~ /\A\d+\Z/
43
+ raise Error.new("Version must be a number")
44
+ end
45
+ unless parselet_version.to_i > 0
46
+ raise Error.new("Version is 1 based, please use 1 as the first version")
47
+ end
48
+ end
49
+
50
+ version = parselet_version ? "/#{parselet_version}" : ''
51
+ begin
52
+ fetch = open(HOST[environment] + PARSELET_PATH + CGI::escape(parselet_id_or_name.to_s) + version + "?format=json" +
53
+ "&api_key=" + CGI::escape(api_key))
54
+ JSON.parse(fetch.read)
55
+ rescue OpenURI::HTTPError => e
56
+ raise unless e.message == "404 Not Found"
57
+ nil
58
+ end
59
+ end
60
+ result && result['parselet'] && Instance.new(result['parselet'].merge( :client => self ))
61
+ end
62
+
63
+ protected
64
+
65
+ # Run a parselet by ID on the server.
66
+ #
67
+ # json = client.run_parselet_on_server(parselet_id = 17, :url => "http://www.example.com")
68
+ def run_parselet_on_server(server_id, options)
69
+ OnlineParselets.catch_invalid_key do
70
+ begin
71
+ url = options[:url] || options[:file]
72
+ unless url =~ /^http:\/\//i
73
+ raise InvalidServerSideParse.new("You must provide an http:// address to run this parselet on the server.")
74
+ end
75
+ fetch_url = "#{HOST[environment]}#{RUN_ON_SERVER_PATH}#{CGI::escape(server_id.to_s)}" +
76
+ "?format=json&api_key=#{CGI::escape(api_key)}&url=#{CGI::escape(url)}"
77
+ JSON.parse(open(fetch_url).read)
78
+ rescue OpenURI::HTTPError => e
79
+ raise unless e.message == "404 Not Found"
80
+ nil
81
+ end
82
+ end
83
+ end
84
+ end
85
+ end
data/lib/instance.rb ADDED
@@ -0,0 +1,68 @@
1
+ require 'open-uri'
2
+
3
+ module OnlineParselets
4
+ SERVER_SIDE_EXECUTION_WARNING = "WARNING: Please install the Parsley ruby gem, parselet execution via the server is slower, executed on cached web pages, and possibly rate-limited."
5
+
6
+ # An OnlineParselets::Instance is a Parselet object with metadata and the ability to run locally or on the parselets.com server.
7
+ class Instance
8
+ attr_reader :name, :created_on_server_at, :code, :updated_on_server_at, :checked_on_server_at,
9
+ :server_id, :pattern, :example_url, :user_id, :version_on_server, :description, :client
10
+
11
+ # Make a new instance of Instance, an object that combines a Parselet with metadata from the parselets.com server.
12
+ #
13
+ # This is mostly an internal method.
14
+ #
15
+ # instance = OnlineParselets::Instance.new(options)
16
+ def initialize(options = {})
17
+ options = options.inject({}) { |m, (k, v)| m[k.to_s] = v; m } # Turn symbols into strings.
18
+ @name = options['name']
19
+ @created_on_server_at = options['created_at']
20
+ @code = options['code']
21
+ @updated_on_server_at = options['updated_at']
22
+ @checked_on_server_at = options['checked_at']
23
+ @server_id = options['id']
24
+ @pattern = options['pattern']
25
+ @example_url = options['example_url']
26
+ @user_id = options['user_id']
27
+ @version_on_server = options['version']
28
+ @description = options['description']
29
+ @client = options['client']
30
+ end
31
+
32
+ # Run the parselet.
33
+ #
34
+ # instance.parse :url => "http://www.example.com"
35
+ #
36
+ # instance.parse :string => "<html><head>...</html>"
37
+ def parse(options)
38
+ if options[:parse_on_server].nil? && defined?(Parsley)
39
+ if options[:url]
40
+ string = open(options[:url]).read
41
+ end
42
+ parselet.parse({ :string => string }.merge(options))
43
+ else
44
+ parse_on_server(options)
45
+ end
46
+ end
47
+
48
+ # Force an online parselet instance to run on the parselets.com server.
49
+ #
50
+ # instance.parse_on_server :url => "http://www.example.com"
51
+ def parse_on_server(options)
52
+ puts SERVER_SIDE_EXECUTION_WARNING
53
+ client.send(:run_parselet_on_server, server_id, options)
54
+ end
55
+
56
+ # Access the wrapped Parselet object.
57
+ def parselet
58
+ @parselet ||= Parsley.new(code) if code
59
+ end
60
+ end
61
+ end
62
+
63
+ begin
64
+ require 'parsley'
65
+ rescue LoadError
66
+ puts "Local Parsley bindings not found."
67
+ puts OnlineParselets::SERVER_SIDE_EXECUTION_WARNING
68
+ end
@@ -0,0 +1,30 @@
1
+ require 'rubygems'
2
+ require 'json'
3
+ require 'open-uri'
4
+ require 'cgi'
5
+
6
+ require File.dirname(__FILE__) + '/client'
7
+ require File.dirname(__FILE__) + '/instance'
8
+
9
+ module OnlineParselets
10
+ # Basic error.
11
+ class Error < Exception; end
12
+ # Indicates that the provided API key is invalid or not existent.
13
+ class InvalidAPIKey < Error; end
14
+ # Indicates that you have requested a non-existent environment. At the moment the available environments are :local and :production.
15
+ class InvalidEnvironmentError < Error; end
16
+ # Indicates that the server-side parse failed.
17
+ class InvalidServerSideParse < Error; end
18
+
19
+ def self.catch_invalid_key
20
+ begin
21
+ yield
22
+ rescue OpenURI::HTTPError => e
23
+ if e.message == "401 Authorization Required"
24
+ raise InvalidAPIKey.new("That API key is invalid")
25
+ else
26
+ raise
27
+ end
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,25 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "online_parselets"
3
+ s.version = "0.1.0"
4
+ s.date = "2009-02-27"
5
+ s.summary = "Client library for parselets.com"
6
+ s.email = "parselets@gmail.com"
7
+ s.homepage = "http://github.com/iterationlabs/online_parselets"
8
+ s.description = "Client library for parselets.com"
9
+ s.has_rdoc = true
10
+ s.require_paths = ["lib"]
11
+ s.extensions = nil
12
+ s.bindir = 'bin'
13
+ s.authors = ["Andrew Cantino", "Kyle Maxwell"]
14
+ s.files = %w[
15
+ README
16
+ online_parselets.gemspec
17
+ test/test_client.rb
18
+ lib/client.rb
19
+ lib/instance.rb
20
+ lib/online_parselets.rb
21
+ ]
22
+ s.rdoc_options = ["--main", "README"]
23
+ s.extra_rdoc_files = ["README"]
24
+ s.add_dependency("json", ["> 0.0.0"])
25
+ end
@@ -0,0 +1,50 @@
1
+ require File.dirname(__FILE__) + '/../lib/online_parselets'
2
+ require 'test/unit'
3
+ require 'open-uri'
4
+ include OnlineParselets
5
+
6
+ class TestOnlineParseletsClient < Test::Unit::TestCase
7
+ def setup
8
+ @client = OnlineParselets::Client.new(:api_key => 'YOUR_API_KEY')
9
+ end
10
+
11
+ def test_new_client
12
+ assert_raises(InvalidAPIKey) { OnlineParselets::Client.new(:environment => :local) }
13
+ assert_nothing_raised { OnlineParselets::Client.new(:api_key => 'elkfjdf') }
14
+ end
15
+
16
+ def test_client_gets_parselets
17
+ result = @client.find("yelp")
18
+ assert result.length > 0
19
+ assert_kind_of(Parsley, result.first.parselet)
20
+ end
21
+
22
+ # This is brittle and depends on what parselets we have on the server
23
+ def test_running_a_parselet
24
+ assert @client.find("yelp-home kyle").first.parse(:url => "http://www.yelp.com")['categories'].length > 0
25
+ end
26
+
27
+ # This is brittle and depends on what parselets we have on the server
28
+ def test_getting_a_specific_parselet
29
+ assert_equal nil, @client.parselet(12)
30
+ assert_kind_of(Instance, @client.parselet(50))
31
+ yelp = @client.parselet('yelp-home', 4)
32
+ assert_kind_of(Instance, yelp)
33
+ assert_equal 50, yelp.server_id
34
+ end
35
+
36
+ def test_errors_on_getting_specific_parselet
37
+ assert_raises(Error) { @client.parselet(nil) }
38
+ assert_raises(Error) { @client.parselet(1, 1) }
39
+ assert_nothing_raised { @client.parselet(1) }
40
+ assert_raises(Error) { @client.parselet("yelp") }
41
+ assert_raises(Error) { @client.parselet("hello", 0) }
42
+ assert_nothing_raised { @client.parselet("hello", 1) }
43
+ end
44
+
45
+ # This is brittle and depends on what parselets we have on the server
46
+ def test_server_side_parsing
47
+ assert @client.parselet(50).parse(:url => "http://www.yelp.com", :parse_on_server => true)['categories'].length > 0
48
+ assert @client.parselet('yelp-home', 4).parse(:url => "http://www.yelp.com", :parse_on_server => true)['categories'].length > 0
49
+ end
50
+ end
metadata ADDED
@@ -0,0 +1,69 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: iterationlabs-online_parselets
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Andrew Cantino
8
+ - Kyle Maxwell
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2009-02-27 00:00:00 -08:00
14
+ default_executable:
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: json
18
+ type: :runtime
19
+ version_requirement:
20
+ version_requirements: !ruby/object:Gem::Requirement
21
+ requirements:
22
+ - - ">"
23
+ - !ruby/object:Gem::Version
24
+ version: 0.0.0
25
+ version:
26
+ description: Client library for parselets.com
27
+ email: parselets@gmail.com
28
+ executables: []
29
+
30
+ extensions: []
31
+
32
+ extra_rdoc_files:
33
+ - README
34
+ files:
35
+ - README
36
+ - online_parselets.gemspec
37
+ - test/test_client.rb
38
+ - lib/client.rb
39
+ - lib/instance.rb
40
+ - lib/online_parselets.rb
41
+ has_rdoc: true
42
+ homepage: http://github.com/iterationlabs/online_parselets
43
+ post_install_message:
44
+ rdoc_options:
45
+ - --main
46
+ - README
47
+ require_paths:
48
+ - lib
49
+ required_ruby_version: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - ">="
52
+ - !ruby/object:Gem::Version
53
+ version: "0"
54
+ version:
55
+ required_rubygems_version: !ruby/object:Gem::Requirement
56
+ requirements:
57
+ - - ">="
58
+ - !ruby/object:Gem::Version
59
+ version: "0"
60
+ version:
61
+ requirements: []
62
+
63
+ rubyforge_project:
64
+ rubygems_version: 1.2.0
65
+ signing_key:
66
+ specification_version: 2
67
+ summary: Client library for parselets.com
68
+ test_files: []
69
+