jphastings-SlyPI 0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/slypi.rb +146 -0
  2. metadata +62 -0
@@ -0,0 +1,146 @@
1
+ # = SlyPI
2
+ # This gem accepts a SlyPI (a glorified YAML settings file) and will give you an object with the SlyPI specified methods.
3
+ #
4
+ # The aim of these is to create simple SlyPI files to collect information from websites that don't have web-apis. Essentially its formalized screen-scraping, not entirely responsible, but its been useful to me before!
5
+ #
6
+ # If a website changes its structure (breaking any screen scraping procedures) only the SlyPI settings file need be updated, and and depending applications will go back to working as normal.
7
+ #
8
+ # == Example
9
+ # require 'slypi'
10
+ # s = SlyPI.new("tv.com.slypi") # From here: http://github.com/jphastings/SlyPI-examples
11
+ # p s.slypi_methods # ["SearchShows","Episodes","EpisodeDetails","ShowDetails"]
12
+ # p s.SearchShows(:q => "Fawlty Towers")
13
+ # # {"Shows"=>[{"showid"=>"3453", "Title"=>"Fawlty Towers", "Description"=>"The genesis of ... and so on
14
+ # p s.ShowDetails(:showid => 3453)
15
+ # # {"Genres"=>"Comedy", "Title"=>"Fawlty Towers", "Description"=> ... and so on
16
+ # At the moment there's no obvious way to find out what parameters are required by each slypi_method, but the slypi settings files are human readable, everything is the same as it is in there, so you should be able to work out what to call.
17
+ #
18
+ # == Warning
19
+ # Screen scraping in this fashion is generally speaking not allowed by websites. Use this at your own risk, and be nice - don't try and make hundreds of calls in one go.
20
+ #
21
+ # == Getting in Touch
22
+ # githib:: http://github.com/jphastings
23
+ # twitter:: @jphastings
24
+ # facebook:: http://facebook.com/profile.php?id=36800401
25
+ # email:: slypi@projects.kedakai.co.uk
26
+
27
+ require 'yaml'
28
+ require 'rubygems'
29
+ require 'mechanize'
30
+
31
+ # The SlyPI class. It has only two permanent methods, the others are dynamically generated by the SlyPI settings file specified at initialization.
32
+ class SlyPI
33
+ attr_reader :service, :version, :author, :site, :description, :slypi_methods
34
+
35
+ # Give it a file (in the slypi format) and your class will be generated for you!
36
+ #
37
+ # In the future I may get this to accept string settings files too (at the moment, it must be a file)
38
+ def initialize(slypi_file)
39
+ settings = nil
40
+ raise "File not found" if not File.exists?(slypi_file)
41
+ open(slypi_file) do |f|
42
+ settings = YAML.load(f)
43
+ end
44
+
45
+ @agent = WWW::Mechanize.new
46
+
47
+ @service = settings['About']['Name']
48
+ @service.freeze
49
+ @version = settings['About']['Version']
50
+ @version.freeze
51
+ @author = settings['About']['Author']
52
+ @author.freeze
53
+ @site = settings['About']['Site']
54
+ @site.freeze
55
+ @description = settings['About']['Description']
56
+ @description.freeze
57
+ @slypi_methods = []
58
+
59
+ settings['Functions'].each do |function_name,details|
60
+ if function_name =~ /^[0-9a-zA-Z]+$/
61
+ @slypi_methods.push(function_name)
62
+ eval("def #{function_name}( options ); raise \"Please specify a hash of the function terms\" if not options.is_a?(Hash);details = #{details.inspect};params = {}; return send(\"run_function\",details,options); end")
63
+ else
64
+ $stderr.puts "The method '#{function_name}' is not a valid SlyPI method name. Please check your api file!"
65
+ end
66
+ end
67
+ @slypi_methods.freeze
68
+ end
69
+
70
+ # Just a standard inspect method at the moment. In the future it will allow you to inspect the dynamically generated methods to find out what they require and other such information
71
+ def inspect(conditions = nil)
72
+ if conditions.nil?
73
+ "SlyPI #{@service} class"
74
+ else
75
+ if @methods.include?(conditions[:method])
76
+ # TODO: give info about what paramters are required about the given message
77
+ puts "details about this method here"
78
+ return nil
79
+ else
80
+ raise "I don't know what part of this SlyPI you want to inspect"
81
+ end
82
+ end
83
+ end
84
+
85
+ private
86
+ def run_function(spec,parameters)
87
+ # Check to make sure the parameters meet the requirements. Raise an error if they don't (they're required)
88
+ if not spec['requires'].nil?
89
+ spec['requires'].each do |spec_name,details|
90
+ raise "Required parameter '#{spec_name}' does not need the requiremens ('#{parameters[spec_name.to_sym]}' should fit regexp: #{details['format']})" if parameters[spec_name.to_sym].to_s.match(Regexp.new(details['format'],Regexp::MULTILINE)).nil?
91
+ end
92
+ end
93
+ # Remove any optional parameters that don't meet requirements, append defaults to those that aren't specified
94
+ if not spec['optional'].nil?
95
+ spec['optional'].each do |spec_name,details|
96
+ parameters[spec_name.to_sym] = nil if not parameters[spec_name.to_sym].nil? and parameters[spec_name.to_sym].to_s.match(Regexp.new(details['format'],Regexp::MULTILINE)).nil?
97
+ parameters[spec_name.to_sym] = details['default'] if parameters[spec_name.to_sym].nil?
98
+ end
99
+ end
100
+
101
+ url = subst(spec['request']['url'],parameters)
102
+ begin
103
+ page = @agent.get(url)
104
+ rescue
105
+ $stderr.puts "We've experienced an error attempting to get the information from '#{url}'. More information follows."
106
+ raise
107
+ end
108
+ output = traverse(page.parser,spec['returns'],parameters)
109
+ output['_sourceUrl'] = url
110
+ return output
111
+ end
112
+
113
+ def traverse(root,items,params)
114
+ output = {}
115
+ items.each do |item|
116
+ if item[1].include? "_base"
117
+ cont = item[1].reject{|key,val| key == "_base"}
118
+ output[item[0]] = root.xpath(item[1]["_base"]).collect{ |hits| traverse(hits,cont,params) }
119
+ else
120
+ begin
121
+ el = root.xpath(subst(item[1]['xpath'],params))
122
+ if item[1]['regex'].nil?
123
+ output[item[0]] = (el.is_a?(String) or el.length < 2) ? el.inner_text.strip : el.collect{|e| e.inner_text.strip}
124
+ else
125
+ output[item[0]] = (el.is_a?(String) or el.length < 2) ?
126
+ el.inner_text.strip.match(Regexp.new(subst(item[1]['regex'],params),Regexp::MULTILINE))[1] :
127
+ el.collect{ |e|
128
+ e.inner_text.strip.match(Regexp.new(subst(item[1]['regex'],params),Regexp::MULTILINE))[1]
129
+ }
130
+ end
131
+ rescue NoMethodError
132
+ end
133
+ end
134
+ end
135
+ return output
136
+ end
137
+
138
+ def subst(input,params)
139
+ return input if input.match(/%\{[a-zA-Z0-9]+\}/).nil?
140
+ string = input
141
+ params.each do |param|
142
+ string = string.gsub(/%\{#{param[0]}\}/,URI::encode(param[1].to_s))
143
+ end
144
+ return string
145
+ end
146
+ end
metadata ADDED
@@ -0,0 +1,62 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jphastings-SlyPI
3
+ version: !ruby/object:Gem::Version
4
+ version: "0.6"
5
+ platform: ruby
6
+ authors:
7
+ - JP Hastings-Spital
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-05-16 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: mechanize
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: "0"
24
+ version:
25
+ description: Use SlyPIs (web-apis for sites that don't have them) in your ruby code with this simple gem
26
+ email: slypi@projects.kedakai.co.uk
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files: []
32
+
33
+ files:
34
+ - slypi.rb
35
+ has_rdoc: true
36
+ homepage: http://projects.kedakai.co.uk/slypi/
37
+ post_install_message:
38
+ rdoc_options: []
39
+
40
+ require_paths:
41
+ - .
42
+ required_ruby_version: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: "0"
47
+ version:
48
+ required_rubygems_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: "0"
53
+ version:
54
+ requirements: []
55
+
56
+ rubyforge_project:
57
+ rubygems_version: 1.2.0
58
+ signing_key:
59
+ specification_version: 2
60
+ summary: Use SlyPIs (web-apis for sites that don't have them) in your ruby code with this simple gem
61
+ test_files: []
62
+