jphastings-SlyPI 0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/slypi.rb +146 -0
- metadata +62 -0
data/slypi.rb
ADDED
@@ -0,0 +1,146 @@
|
|
1
|
+
# = SlyPI
|
2
|
+
# This gem accepts a SlyPI (a glorified YAML settings file) and will give you an object with the SlyPI specified methods.
|
3
|
+
#
|
4
|
+
# The aim of these is to create simple SlyPI files to collect information from websites that don't have web-apis. Essentially its formalized screen-scraping, not entirely responsible, but its been useful to me before!
|
5
|
+
#
|
6
|
+
# If a website changes its structure (breaking any screen scraping procedures) only the SlyPI settings file need be updated, and and depending applications will go back to working as normal.
|
7
|
+
#
|
8
|
+
# == Example
|
9
|
+
# require 'slypi'
|
10
|
+
# s = SlyPI.new("tv.com.slypi") # From here: http://github.com/jphastings/SlyPI-examples
|
11
|
+
# p s.slypi_methods # ["SearchShows","Episodes","EpisodeDetails","ShowDetails"]
|
12
|
+
# p s.SearchShows(:q => "Fawlty Towers")
|
13
|
+
# # {"Shows"=>[{"showid"=>"3453", "Title"=>"Fawlty Towers", "Description"=>"The genesis of ... and so on
|
14
|
+
# p s.ShowDetails(:showid => 3453)
|
15
|
+
# # {"Genres"=>"Comedy", "Title"=>"Fawlty Towers", "Description"=> ... and so on
|
16
|
+
# At the moment there's no obvious way to find out what parameters are required by each slypi_method, but the slypi settings files are human readable, everything is the same as it is in there, so you should be able to work out what to call.
|
17
|
+
#
|
18
|
+
# == Warning
|
19
|
+
# Screen scraping in this fashion is generally speaking not allowed by websites. Use this at your own risk, and be nice - don't try and make hundreds of calls in one go.
|
20
|
+
#
|
21
|
+
# == Getting in Touch
|
22
|
+
# githib:: http://github.com/jphastings
|
23
|
+
# twitter:: @jphastings
|
24
|
+
# facebook:: http://facebook.com/profile.php?id=36800401
|
25
|
+
# email:: slypi@projects.kedakai.co.uk
|
26
|
+
|
27
|
+
require 'yaml'
|
28
|
+
require 'rubygems'
|
29
|
+
require 'mechanize'
|
30
|
+
|
31
|
+
# The SlyPI class. It has only two permanent methods, the others are dynamically generated by the SlyPI settings file specified at initialization.
|
32
|
+
class SlyPI
|
33
|
+
attr_reader :service, :version, :author, :site, :description, :slypi_methods
|
34
|
+
|
35
|
+
# Give it a file (in the slypi format) and your class will be generated for you!
|
36
|
+
#
|
37
|
+
# In the future I may get this to accept string settings files too (at the moment, it must be a file)
|
38
|
+
def initialize(slypi_file)
|
39
|
+
settings = nil
|
40
|
+
raise "File not found" if not File.exists?(slypi_file)
|
41
|
+
open(slypi_file) do |f|
|
42
|
+
settings = YAML.load(f)
|
43
|
+
end
|
44
|
+
|
45
|
+
@agent = WWW::Mechanize.new
|
46
|
+
|
47
|
+
@service = settings['About']['Name']
|
48
|
+
@service.freeze
|
49
|
+
@version = settings['About']['Version']
|
50
|
+
@version.freeze
|
51
|
+
@author = settings['About']['Author']
|
52
|
+
@author.freeze
|
53
|
+
@site = settings['About']['Site']
|
54
|
+
@site.freeze
|
55
|
+
@description = settings['About']['Description']
|
56
|
+
@description.freeze
|
57
|
+
@slypi_methods = []
|
58
|
+
|
59
|
+
settings['Functions'].each do |function_name,details|
|
60
|
+
if function_name =~ /^[0-9a-zA-Z]+$/
|
61
|
+
@slypi_methods.push(function_name)
|
62
|
+
eval("def #{function_name}( options ); raise \"Please specify a hash of the function terms\" if not options.is_a?(Hash);details = #{details.inspect};params = {}; return send(\"run_function\",details,options); end")
|
63
|
+
else
|
64
|
+
$stderr.puts "The method '#{function_name}' is not a valid SlyPI method name. Please check your api file!"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
@slypi_methods.freeze
|
68
|
+
end
|
69
|
+
|
70
|
+
# Just a standard inspect method at the moment. In the future it will allow you to inspect the dynamically generated methods to find out what they require and other such information
|
71
|
+
def inspect(conditions = nil)
|
72
|
+
if conditions.nil?
|
73
|
+
"SlyPI #{@service} class"
|
74
|
+
else
|
75
|
+
if @methods.include?(conditions[:method])
|
76
|
+
# TODO: give info about what paramters are required about the given message
|
77
|
+
puts "details about this method here"
|
78
|
+
return nil
|
79
|
+
else
|
80
|
+
raise "I don't know what part of this SlyPI you want to inspect"
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
private
|
86
|
+
def run_function(spec,parameters)
|
87
|
+
# Check to make sure the parameters meet the requirements. Raise an error if they don't (they're required)
|
88
|
+
if not spec['requires'].nil?
|
89
|
+
spec['requires'].each do |spec_name,details|
|
90
|
+
raise "Required parameter '#{spec_name}' does not need the requiremens ('#{parameters[spec_name.to_sym]}' should fit regexp: #{details['format']})" if parameters[spec_name.to_sym].to_s.match(Regexp.new(details['format'],Regexp::MULTILINE)).nil?
|
91
|
+
end
|
92
|
+
end
|
93
|
+
# Remove any optional parameters that don't meet requirements, append defaults to those that aren't specified
|
94
|
+
if not spec['optional'].nil?
|
95
|
+
spec['optional'].each do |spec_name,details|
|
96
|
+
parameters[spec_name.to_sym] = nil if not parameters[spec_name.to_sym].nil? and parameters[spec_name.to_sym].to_s.match(Regexp.new(details['format'],Regexp::MULTILINE)).nil?
|
97
|
+
parameters[spec_name.to_sym] = details['default'] if parameters[spec_name.to_sym].nil?
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
url = subst(spec['request']['url'],parameters)
|
102
|
+
begin
|
103
|
+
page = @agent.get(url)
|
104
|
+
rescue
|
105
|
+
$stderr.puts "We've experienced an error attempting to get the information from '#{url}'. More information follows."
|
106
|
+
raise
|
107
|
+
end
|
108
|
+
output = traverse(page.parser,spec['returns'],parameters)
|
109
|
+
output['_sourceUrl'] = url
|
110
|
+
return output
|
111
|
+
end
|
112
|
+
|
113
|
+
def traverse(root,items,params)
|
114
|
+
output = {}
|
115
|
+
items.each do |item|
|
116
|
+
if item[1].include? "_base"
|
117
|
+
cont = item[1].reject{|key,val| key == "_base"}
|
118
|
+
output[item[0]] = root.xpath(item[1]["_base"]).collect{ |hits| traverse(hits,cont,params) }
|
119
|
+
else
|
120
|
+
begin
|
121
|
+
el = root.xpath(subst(item[1]['xpath'],params))
|
122
|
+
if item[1]['regex'].nil?
|
123
|
+
output[item[0]] = (el.is_a?(String) or el.length < 2) ? el.inner_text.strip : el.collect{|e| e.inner_text.strip}
|
124
|
+
else
|
125
|
+
output[item[0]] = (el.is_a?(String) or el.length < 2) ?
|
126
|
+
el.inner_text.strip.match(Regexp.new(subst(item[1]['regex'],params),Regexp::MULTILINE))[1] :
|
127
|
+
el.collect{ |e|
|
128
|
+
e.inner_text.strip.match(Regexp.new(subst(item[1]['regex'],params),Regexp::MULTILINE))[1]
|
129
|
+
}
|
130
|
+
end
|
131
|
+
rescue NoMethodError
|
132
|
+
end
|
133
|
+
end
|
134
|
+
end
|
135
|
+
return output
|
136
|
+
end
|
137
|
+
|
138
|
+
def subst(input,params)
|
139
|
+
return input if input.match(/%\{[a-zA-Z0-9]+\}/).nil?
|
140
|
+
string = input
|
141
|
+
params.each do |param|
|
142
|
+
string = string.gsub(/%\{#{param[0]}\}/,URI::encode(param[1].to_s))
|
143
|
+
end
|
144
|
+
return string
|
145
|
+
end
|
146
|
+
end
|
metadata
ADDED
@@ -0,0 +1,62 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: jphastings-SlyPI
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: "0.6"
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- JP Hastings-Spital
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-05-16 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: mechanize
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
version:
|
25
|
+
description: Use SlyPIs (web-apis for sites that don't have them) in your ruby code with this simple gem
|
26
|
+
email: slypi@projects.kedakai.co.uk
|
27
|
+
executables: []
|
28
|
+
|
29
|
+
extensions: []
|
30
|
+
|
31
|
+
extra_rdoc_files: []
|
32
|
+
|
33
|
+
files:
|
34
|
+
- slypi.rb
|
35
|
+
has_rdoc: true
|
36
|
+
homepage: http://projects.kedakai.co.uk/slypi/
|
37
|
+
post_install_message:
|
38
|
+
rdoc_options: []
|
39
|
+
|
40
|
+
require_paths:
|
41
|
+
- .
|
42
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
43
|
+
requirements:
|
44
|
+
- - ">="
|
45
|
+
- !ruby/object:Gem::Version
|
46
|
+
version: "0"
|
47
|
+
version:
|
48
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
49
|
+
requirements:
|
50
|
+
- - ">="
|
51
|
+
- !ruby/object:Gem::Version
|
52
|
+
version: "0"
|
53
|
+
version:
|
54
|
+
requirements: []
|
55
|
+
|
56
|
+
rubyforge_project:
|
57
|
+
rubygems_version: 1.2.0
|
58
|
+
signing_key:
|
59
|
+
specification_version: 2
|
60
|
+
summary: Use SlyPIs (web-apis for sites that don't have them) in your ruby code with this simple gem
|
61
|
+
test_files: []
|
62
|
+
|