apist 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/CHANGELOG +7 -0
- data/Gemfile +10 -0
- data/LICENSE +22 -0
- data/README.md +114 -0
- data/Rakefile +2 -0
- data/apist.gemspec +25 -0
- data/examples/auth_and_query.rb +46 -0
- data/examples/basic.rb +39 -0
- data/examples/error404.rb +18 -0
- data/lib/apist.rb +139 -0
- data/lib/apist/error/http.rb +12 -0
- data/lib/apist/error/method.rb +5 -0
- data/lib/apist/filter.rb +237 -0
- data/lib/apist/method.rb +88 -0
- data/lib/apist/request.rb +12 -0
- data/lib/apist/resultcallback.rb +45 -0
- data/lib/apist/selector.rb +65 -0
- data/script/release +42 -0
- data/spec/apist_spec.rb +39 -0
- data/spec/test_api.rb +46 -0
- metadata +124 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 8df71c3751ea4cb1eff329f5531da796297f1d96
|
4
|
+
data.tar.gz: bfe02bcbcc663ec7e2189e834598a6fc266b456a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 13d9d5ba292f4baafad3551848836c23427a96f2313af37b8eb89b5b3e4789cbd839564a72d5d02e26395454c9b08feadaab2d0df57ecaec1cf3e11e24e32cb3
|
7
|
+
data.tar.gz: c54c9eefc258f3a3ba3b45d81316ffa8d1ed7ccbe737357ff006fc01a9f3c0c08cdd04a1db9bc750260188c8e26082550df128252f47a676d76e6bb07cd9131e
|
data/.gitignore
ADDED
data/CHANGELOG
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Sleeping Owl
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
## SleepingOwl Apist
|
2
|
+
|
3
|
+
SleepingOwl Apist is a small library which allows you to access any site in api-like style, based on html parsing.
|
4
|
+
|
5
|
+
## Overview
|
6
|
+
|
7
|
+
This package allows you to write method like this:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
require 'apist'
|
11
|
+
|
12
|
+
class WikiApi < Apist
|
13
|
+
base_url 'http://en.wikipedia.org'
|
14
|
+
|
15
|
+
def index
|
16
|
+
get '/wiki/Main_Page',
|
17
|
+
welcome_message: filter('#mp-topbanner div:first').text[0...-1],
|
18
|
+
portals: filter('a[title^="Portal:"]').each(
|
19
|
+
link: current.attr('href').call(lambda { |href| self.class.base_url + href }),
|
20
|
+
label: current.text
|
21
|
+
),
|
22
|
+
languages: filter('#p-lang li a[title]').each(
|
23
|
+
label: current.text,
|
24
|
+
lang: current.attr('title'),
|
25
|
+
link: current.attr('href').call(lambda { |href| 'http:' + href })
|
26
|
+
),
|
27
|
+
sister_projects: filter('#mp-sister b a').each.text,
|
28
|
+
featured_article: filter('#mp-tfa').html
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
```
|
33
|
+
|
34
|
+
use it:
|
35
|
+
|
36
|
+
```ruby
|
37
|
+
api = WikiApi.new
|
38
|
+
data = api.index
|
39
|
+
```
|
40
|
+
|
41
|
+
and get the following result (*json format used only for visualization, actual result type is `Hash`*):
|
42
|
+
|
43
|
+
```json
|
44
|
+
{
|
45
|
+
"welcome_message": "Welcome to Wikipedia",
|
46
|
+
"portals": [
|
47
|
+
{
|
48
|
+
"link": "http:\/\/en.wikipedia.org\/wiki\/Portal:Arts",
|
49
|
+
"label": "Arts"
|
50
|
+
},
|
51
|
+
{
|
52
|
+
"link": "http:\/\/en.wikipedia.org\/wiki\/Portal:Biography",
|
53
|
+
"label": "Biography"
|
54
|
+
},
|
55
|
+
...
|
56
|
+
],
|
57
|
+
"languages": [
|
58
|
+
{
|
59
|
+
"label": "Simple English",
|
60
|
+
"lang": "Simple English",
|
61
|
+
"link": "http:\/\/simple.wikipedia.org\/wiki\/"
|
62
|
+
},
|
63
|
+
{
|
64
|
+
"label": "العربية",
|
65
|
+
"lang": "Arabic",
|
66
|
+
"link": "http:\/\/ar.wikipedia.org\/wiki\/"
|
67
|
+
},
|
68
|
+
{
|
69
|
+
"label": "Bahasa Indonesia",
|
70
|
+
"lang": "Indonesian",
|
71
|
+
"link": "http:\/\/id.wikipedia.org\/wiki\/"
|
72
|
+
},
|
73
|
+
...
|
74
|
+
],
|
75
|
+
"sister_projects": [
|
76
|
+
"Commons",
|
77
|
+
"MediaWiki",
|
78
|
+
...
|
79
|
+
],
|
80
|
+
"featured_article": "<div style=\"float: left; margin: 0.5em 0.9em 0.4em 0em;\">...<\/div>"
|
81
|
+
}
|
82
|
+
```
|
83
|
+
|
84
|
+
## Installation
|
85
|
+
|
86
|
+
Add this line to your application's Gemfile:
|
87
|
+
|
88
|
+
```ruby
|
89
|
+
gem 'apist'
|
90
|
+
```
|
91
|
+
|
92
|
+
And then execute:
|
93
|
+
|
94
|
+
$ bundle
|
95
|
+
|
96
|
+
Or install it yourself as:
|
97
|
+
|
98
|
+
$ gem install apist
|
99
|
+
|
100
|
+
## Documentation
|
101
|
+
|
102
|
+
Documentation can be found at [sleeping owl apist](http://sleeping-owl-apist.gopagoda.com/en/ruby/documentation).
|
103
|
+
|
104
|
+
## Examples
|
105
|
+
|
106
|
+
View [examples](http://sleeping-owl-apist.gopagoda.com/en/ruby#examples).
|
107
|
+
|
108
|
+
## Support Library
|
109
|
+
|
110
|
+
You can donate in BTC: 13k36pym383rEmsBSLyWfT3TxCQMN2Lekd
|
111
|
+
|
112
|
+
## Copyright and License
|
113
|
+
|
114
|
+
Apist was written by Sleeping Owl and is released under the MIT License. See the LICENSE file for details.
|
data/Rakefile
ADDED
data/apist.gemspec
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'apist'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "apist"
|
8
|
+
spec.version = Apist::VERSION
|
9
|
+
spec.authors = ["Sleeping Owl"]
|
10
|
+
spec.email = ["owl.sleeping@yahoo.com"]
|
11
|
+
spec.summary = %q{Package to provide api-like access to foreign sites based on html parsing}
|
12
|
+
spec.description = %q{Package to provide api-like access to foreign sites based on html parsing}
|
13
|
+
spec.homepage = "http://sleeping-owl-apist.gopagoda.com"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
22
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
23
|
+
spec.add_development_dependency "rspec"
|
24
|
+
spec.add_dependency 'httparty'
|
25
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
dir = File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
$:.push File.expand_path(dir, __FILE__)
|
3
|
+
require File.join(dir, 'apist')
|
4
|
+
require 'pp'
|
5
|
+
require 'json'
|
6
|
+
|
7
|
+
class AdminApi < Apist
|
8
|
+
base_url 'http://sleeping-owl-admin-demo.gopagoda.com'
|
9
|
+
# base_url 'http://sleeping-owl-admin.my'
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@username = 'admin'
|
13
|
+
@password = 'SleepingOwl'
|
14
|
+
super
|
15
|
+
end
|
16
|
+
|
17
|
+
def get_login_token
|
18
|
+
get '/admin/login', filter('input[name="_token"]').attr('value')
|
19
|
+
end
|
20
|
+
|
21
|
+
def login
|
22
|
+
post '/admin/login', filter('.page-header').html,
|
23
|
+
body: {
|
24
|
+
_token: get_login_token,
|
25
|
+
username: @username,
|
26
|
+
password: @password
|
27
|
+
}
|
28
|
+
end
|
29
|
+
|
30
|
+
def contacts
|
31
|
+
login
|
32
|
+
get '/admin/contacts',
|
33
|
+
entries: filter('.table tbody tr').each(
|
34
|
+
photo: filter('td:first-child img').attr('src'),
|
35
|
+
name: filter('td').eq(1).text,
|
36
|
+
birthday: filter('.column-date').attr('data-order'),
|
37
|
+
country: filter('td').eq(3).text,
|
38
|
+
companies: filter('td:nth-child(5) li').each.text
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
api = AdminApi.new
|
45
|
+
data = api.contacts
|
46
|
+
puts JSON.pretty_generate data
|
data/examples/basic.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
dir = File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
$:.push File.expand_path(dir, __FILE__)
|
3
|
+
require File.join(dir, 'apist')
|
4
|
+
require 'pp'
|
5
|
+
require 'json'
|
6
|
+
|
7
|
+
class WikiApi < Apist
|
8
|
+
base_url 'http://en.wikipedia.org'
|
9
|
+
|
10
|
+
def index
|
11
|
+
get '/wiki/Main_Page',
|
12
|
+
welcome_message: filter('#mp-topbanner div:first').text[0...-1],
|
13
|
+
portals: filter('a[title^="Portal:"]').each(
|
14
|
+
link: current.attr('href').call(lambda { |href| self.class.base_url + href }),
|
15
|
+
label: current.text
|
16
|
+
),
|
17
|
+
languages: filter('#p-lang li a[title]').each(
|
18
|
+
label: current.text,
|
19
|
+
lang: current.attr('title'),
|
20
|
+
link: current.attr('href').call(lambda { |href| 'http:' + href })
|
21
|
+
),
|
22
|
+
sister_projects: filter('#mp-sister b a').each.text,
|
23
|
+
featured_article: filter('#mp-tfa').html
|
24
|
+
end
|
25
|
+
|
26
|
+
def current_events
|
27
|
+
get '/wiki/Portal:Current_events',
|
28
|
+
filter('#mw-content-text > table:last td:first table.vevent').each(
|
29
|
+
date: filter('.bday').text,
|
30
|
+
events: filter('dl').each.text
|
31
|
+
)
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
api = WikiApi.new
|
38
|
+
data = api.current_events
|
39
|
+
puts JSON.pretty_generate data
|
@@ -0,0 +1,18 @@
|
|
1
|
+
dir = File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
$:.push File.expand_path(dir, __FILE__)
|
3
|
+
require File.join(dir, 'apist')
|
4
|
+
require 'pp'
|
5
|
+
require 'json'
|
6
|
+
|
7
|
+
class HabrApi < Apist
|
8
|
+
base_url 'http://habrahabr.ru'
|
9
|
+
|
10
|
+
def get404
|
11
|
+
get '/unknown-page',
|
12
|
+
menu: filter('#TMpanel .menu a').each.text
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
api = HabrApi.new
|
17
|
+
data = api.get404
|
18
|
+
puts JSON.pretty_generate data
|
data/lib/apist.rb
ADDED
@@ -0,0 +1,139 @@
|
|
1
|
+
require 'apist/request'
|
2
|
+
require 'apist/method'
|
3
|
+
require 'apist/selector'
|
4
|
+
|
5
|
+
class Apist
|
6
|
+
|
7
|
+
VERSION = '1.0.0'
|
8
|
+
|
9
|
+
attr_reader :requester
|
10
|
+
attr_reader :current_method
|
11
|
+
attr_accessor :suppress_exceptions
|
12
|
+
|
13
|
+
def initialize
|
14
|
+
@requester = Apist::Request.new self.class.base_url
|
15
|
+
@suppress_exceptions = true
|
16
|
+
end
|
17
|
+
|
18
|
+
# Allows setting a base url to be used for each request.
|
19
|
+
#
|
20
|
+
# class Foo < Apist
|
21
|
+
# base_url 'http://en.wikipedia.org'
|
22
|
+
# end
|
23
|
+
def self.base_url(url=nil)
|
24
|
+
return @base_url unless url
|
25
|
+
@base_url = url
|
26
|
+
end
|
27
|
+
|
28
|
+
# Create new filter in blueprint
|
29
|
+
#
|
30
|
+
# class Foo < Apist
|
31
|
+
# base_url 'http://en.wikipedia.org'
|
32
|
+
# def index
|
33
|
+
# get '/wiki/Main_Page',
|
34
|
+
# welcome_message: filter('#mp-topbanner div:first').text,
|
35
|
+
# end
|
36
|
+
# end
|
37
|
+
# @return [Apist::Filter]
|
38
|
+
def self.filter(css_selector)
|
39
|
+
Apist::Selector.new css_selector
|
40
|
+
end
|
41
|
+
|
42
|
+
# Create new filter in blueprint
|
43
|
+
#
|
44
|
+
# class Foo < Apist
|
45
|
+
# base_url 'http://en.wikipedia.org'
|
46
|
+
# def index
|
47
|
+
# get '/wiki/Main_Page',
|
48
|
+
# welcome_message: filter('#mp-topbanner div:first').text,
|
49
|
+
# end
|
50
|
+
# end
|
51
|
+
# @return [Apist::Filter]
|
52
|
+
def filter(css_selector)
|
53
|
+
self.class.filter css_selector
|
54
|
+
end
|
55
|
+
|
56
|
+
# Create new filter object with current node as filter result
|
57
|
+
#
|
58
|
+
# class Foo < Apist
|
59
|
+
# base_url 'http://en.wikipedia.org'
|
60
|
+
# def index
|
61
|
+
# get '/wiki/Main_Page',
|
62
|
+
# portals: filter('a[title^="Portal:"]').each(
|
63
|
+
# link: current.attr('href'),
|
64
|
+
# label: current.text
|
65
|
+
# ),
|
66
|
+
# end
|
67
|
+
# end
|
68
|
+
# @return [Apist::Filter]
|
69
|
+
def self.current
|
70
|
+
self.filter '*'
|
71
|
+
end
|
72
|
+
|
73
|
+
# Create new filter object with current node as filter result
|
74
|
+
#
|
75
|
+
# class Foo < Apist
|
76
|
+
# base_url 'http://en.wikipedia.org'
|
77
|
+
# def index
|
78
|
+
# get '/wiki/Main_Page',
|
79
|
+
# portals: filter('a[title^="Portal:"]').each(
|
80
|
+
# link: current.attr('href'),
|
81
|
+
# label: current.text
|
82
|
+
# ),
|
83
|
+
# end
|
84
|
+
# end
|
85
|
+
# @return [Apist::Filter]
|
86
|
+
def current
|
87
|
+
self.class.current
|
88
|
+
end
|
89
|
+
|
90
|
+
def parse(content, blueprint)
|
91
|
+
@current_method = Apist::Method.new self, nil, blueprint
|
92
|
+
@current_method.set_content content
|
93
|
+
result = @current_method.parse_blueprint blueprint
|
94
|
+
@current_method = nil
|
95
|
+
result
|
96
|
+
end
|
97
|
+
|
98
|
+
# Perform GET http-request
|
99
|
+
def get(url, blueprint = nil, options = {})
|
100
|
+
request 'get', url, blueprint, options
|
101
|
+
end
|
102
|
+
|
103
|
+
# Perform HEAD http-request
|
104
|
+
def head(url, blueprint = nil, options = {})
|
105
|
+
request 'head', url, blueprint, options
|
106
|
+
end
|
107
|
+
|
108
|
+
# Perform POST http-request
|
109
|
+
def post(url, blueprint = nil, options = {})
|
110
|
+
request 'post', url, blueprint, options
|
111
|
+
end
|
112
|
+
|
113
|
+
# Perform PUT http-request
|
114
|
+
def put(url, blueprint = nil, options = {})
|
115
|
+
request 'put', url, blueprint, options
|
116
|
+
end
|
117
|
+
|
118
|
+
# Perform PATCH http-request
|
119
|
+
def patch(url, blueprint = nil, options = {})
|
120
|
+
request 'patch', url, blueprint, options
|
121
|
+
end
|
122
|
+
|
123
|
+
# Perform DELETE http-request
|
124
|
+
def delete(url, blueprint = nil, options = {})
|
125
|
+
request 'delete', url, blueprint, options
|
126
|
+
end
|
127
|
+
|
128
|
+
private
|
129
|
+
|
130
|
+
# Perform http-request with options and parse result by blueprint
|
131
|
+
def request(http_method, url, blueprint = nil, options = [])
|
132
|
+
@current_method = Apist::Method.new self, url, blueprint
|
133
|
+
@current_method.method = http_method
|
134
|
+
result = @current_method.get options
|
135
|
+
@current_method = nil
|
136
|
+
result
|
137
|
+
end
|
138
|
+
|
139
|
+
end
|
data/lib/apist/filter.rb
ADDED
@@ -0,0 +1,237 @@
|
|
1
|
+
class Apist
|
2
|
+
class Filter
|
3
|
+
|
4
|
+
attr :node
|
5
|
+
attr :method
|
6
|
+
attr :resource
|
7
|
+
|
8
|
+
def initialize(node, method)
|
9
|
+
@node = node
|
10
|
+
@method = method
|
11
|
+
@resource = method.resource
|
12
|
+
end
|
13
|
+
|
14
|
+
# @return [Apist::Filter]
|
15
|
+
def text
|
16
|
+
guard_crawler
|
17
|
+
@node.text
|
18
|
+
end
|
19
|
+
|
20
|
+
# @return [Apist::Filter]
|
21
|
+
def html
|
22
|
+
guard_crawler
|
23
|
+
@node.inner_html
|
24
|
+
end
|
25
|
+
|
26
|
+
# @return [Apist::Filter]
|
27
|
+
def filter(selector)
|
28
|
+
guard_crawler
|
29
|
+
@node.css selector
|
30
|
+
end
|
31
|
+
|
32
|
+
# @return [Apist::Filter]
|
33
|
+
def filter_nodes(selector)
|
34
|
+
guard_crawler
|
35
|
+
@node.filter selector
|
36
|
+
end
|
37
|
+
|
38
|
+
# @return [Apist::Filter]
|
39
|
+
def find(selector)
|
40
|
+
guard_crawler
|
41
|
+
@node.css selector
|
42
|
+
end
|
43
|
+
|
44
|
+
# @return [Apist::Filter]
|
45
|
+
def children
|
46
|
+
guard_crawler
|
47
|
+
@node.children
|
48
|
+
end
|
49
|
+
|
50
|
+
# @return [Apist::Filter]
|
51
|
+
def prev
|
52
|
+
guard_crawler
|
53
|
+
prev_all[0]
|
54
|
+
end
|
55
|
+
|
56
|
+
# @return [Apist::Filter]
|
57
|
+
def prev_all
|
58
|
+
guard_crawler
|
59
|
+
sibling 'previous'
|
60
|
+
end
|
61
|
+
|
62
|
+
# @return [Apist::Filter]
|
63
|
+
def next
|
64
|
+
guard_crawler
|
65
|
+
next_all[0]
|
66
|
+
end
|
67
|
+
|
68
|
+
# @return [Apist::Filter]
|
69
|
+
def next_all
|
70
|
+
guard_crawler
|
71
|
+
sibling 'next'
|
72
|
+
end
|
73
|
+
|
74
|
+
# @return [Apist::Filter]
|
75
|
+
def is(selector)
|
76
|
+
guard_crawler
|
77
|
+
node = get_node
|
78
|
+
node.matches? selector
|
79
|
+
end
|
80
|
+
|
81
|
+
# @return [Apist::Filter]
|
82
|
+
def closest(selector)
|
83
|
+
guard_crawler
|
84
|
+
node = get_node
|
85
|
+
node.ancestors(selector).last
|
86
|
+
end
|
87
|
+
|
88
|
+
# @return [Apist::Filter]
|
89
|
+
def attr(attribute)
|
90
|
+
guard_crawler
|
91
|
+
@node.attr(attribute).to_s
|
92
|
+
end
|
93
|
+
|
94
|
+
# @return [Apist::Filter]
|
95
|
+
def hasAttr(attribute)
|
96
|
+
guard_crawler
|
97
|
+
@node.attr(attribute) != nil
|
98
|
+
end
|
99
|
+
|
100
|
+
# @return [Apist::Filter]
|
101
|
+
def eq(position)
|
102
|
+
guard_crawler
|
103
|
+
@node.at position
|
104
|
+
end
|
105
|
+
|
106
|
+
# @return [Apist::Filter]
|
107
|
+
def first
|
108
|
+
guard_crawler
|
109
|
+
@node.first
|
110
|
+
end
|
111
|
+
|
112
|
+
# @return [Apist::Filter]
|
113
|
+
def last
|
114
|
+
guard_crawler
|
115
|
+
@node.last
|
116
|
+
end
|
117
|
+
|
118
|
+
# @return [Apist::Filter]
|
119
|
+
def element
|
120
|
+
@node
|
121
|
+
end
|
122
|
+
|
123
|
+
# @return [Apist::Filter]
|
124
|
+
def call(block)
|
125
|
+
block.call @node
|
126
|
+
end
|
127
|
+
|
128
|
+
# @return [Apist::Filter]
|
129
|
+
def strip
|
130
|
+
guard_text
|
131
|
+
@node.strip
|
132
|
+
end
|
133
|
+
|
134
|
+
# @return [Apist::Filter]
|
135
|
+
def lstrip
|
136
|
+
guard_text
|
137
|
+
@node.lstrip
|
138
|
+
end
|
139
|
+
|
140
|
+
# @return [Apist::Filter]
|
141
|
+
def rstrip
|
142
|
+
guard_text
|
143
|
+
@node.rstrip
|
144
|
+
end
|
145
|
+
|
146
|
+
# @return [Apist::Filter]
|
147
|
+
def gsub(*several_variants)
|
148
|
+
guard_text
|
149
|
+
@node.send :gsub, *several_variants
|
150
|
+
end
|
151
|
+
|
152
|
+
# @return [Apist::Filter]
|
153
|
+
def to_i(base = 10)
|
154
|
+
guard_text
|
155
|
+
@node.to_i base
|
156
|
+
end
|
157
|
+
|
158
|
+
# @return [Apist::Filter]
|
159
|
+
def to_f
|
160
|
+
guard_text
|
161
|
+
@node.to_f
|
162
|
+
end
|
163
|
+
|
164
|
+
# @return [Apist::Filter]
|
165
|
+
def exists
|
166
|
+
!@node.empty?
|
167
|
+
end
|
168
|
+
|
169
|
+
# @return [Apist::Filter]
|
170
|
+
def check(block)
|
171
|
+
call block
|
172
|
+
end
|
173
|
+
|
174
|
+
# @return [Apist::Filter]
|
175
|
+
def then(blueprint)
|
176
|
+
return @node unless @node === true
|
177
|
+
return @method.parse_blueprint blueprint
|
178
|
+
end
|
179
|
+
|
180
|
+
# @return [Apist::Filter]
|
181
|
+
def else(blueprint)
|
182
|
+
return @node unless @node === false
|
183
|
+
return @method.parse_blueprint blueprint
|
184
|
+
end
|
185
|
+
|
186
|
+
# @return [Apist::Filter]
|
187
|
+
def each(blueprint = nil)
|
188
|
+
callback = blueprint
|
189
|
+
if callback.nil?
|
190
|
+
callback = lambda { |node, i| node }
|
191
|
+
end
|
192
|
+
|
193
|
+
unless callback.is_a? Proc
|
194
|
+
callback = lambda { |node, i|
|
195
|
+
@method.parse_blueprint blueprint.clone, node
|
196
|
+
}
|
197
|
+
end
|
198
|
+
|
199
|
+
result = []
|
200
|
+
@node.each do |node|
|
201
|
+
result << callback.call(node, result.length)
|
202
|
+
end
|
203
|
+
result
|
204
|
+
end
|
205
|
+
|
206
|
+
private
|
207
|
+
|
208
|
+
def is_node
|
209
|
+
@node.is_a? Nokogiri::XML::NodeSet or @node.is_a? Nokogiri::XML::Node
|
210
|
+
end
|
211
|
+
|
212
|
+
def get_node
|
213
|
+
return @node[0] if @node.is_a? Nokogiri::XML::NodeSet
|
214
|
+
@node
|
215
|
+
end
|
216
|
+
|
217
|
+
def sibling(direction)
|
218
|
+
nodes = []
|
219
|
+
node = get_node
|
220
|
+
while (node = node.send(direction)) != nil
|
221
|
+
nodes << node if node.node_type === 1
|
222
|
+
end
|
223
|
+
nodes
|
224
|
+
end
|
225
|
+
|
226
|
+
def guard_text
|
227
|
+
@node = @node.text if is_node
|
228
|
+
end
|
229
|
+
|
230
|
+
def guard_crawler
|
231
|
+
unless is_node
|
232
|
+
raise Apist::MethodError, 'Current node isnt instance of Nokogiri Node or NodeSet.'
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
end
|
237
|
+
end
|
data/lib/apist/method.rb
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'apist/selector'
|
3
|
+
require 'apist/error/http'
|
4
|
+
|
5
|
+
class Apist
|
6
|
+
class Method
|
7
|
+
|
8
|
+
attr_reader :resource
|
9
|
+
attr_reader :url
|
10
|
+
attr_reader :blueprint
|
11
|
+
attr_accessor :method
|
12
|
+
attr_reader :content
|
13
|
+
attr_reader :crawler
|
14
|
+
|
15
|
+
# @param [Apist] resource
|
16
|
+
# @param [String] url
|
17
|
+
def initialize(resource, url, blueprint)
|
18
|
+
@resource = resource
|
19
|
+
@url = url
|
20
|
+
@blueprint = blueprint
|
21
|
+
end
|
22
|
+
|
23
|
+
def get(options)
|
24
|
+
begin
|
25
|
+
make_request options
|
26
|
+
parse_blueprint @blueprint
|
27
|
+
rescue Apist::Error::Http => e
|
28
|
+
error_response e.code, e.reason, e.url
|
29
|
+
rescue SocketError => e
|
30
|
+
error_response 0, e.message, @url
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def make_request(options = {})
|
35
|
+
@content = @resource.requester.class.send method, @url, options
|
36
|
+
if @content.code != 200
|
37
|
+
code = @content.code
|
38
|
+
message = @content.response.message
|
39
|
+
url = @content.request.last_uri.to_s
|
40
|
+
raise Apist::Error::Http.new(code, message, url)
|
41
|
+
end
|
42
|
+
store_cookies
|
43
|
+
set_content @content.body
|
44
|
+
end
|
45
|
+
|
46
|
+
def store_cookies
|
47
|
+
cookie = @content.headers['Set-Cookie']
|
48
|
+
unless cookie.nil?
|
49
|
+
@resource.requester.class.default_cookies.add_cookies cookie
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def set_content(content)
|
54
|
+
@crawler = Nokogiri::HTML content
|
55
|
+
end
|
56
|
+
|
57
|
+
def parse_blueprint(blueprint, node = nil)
|
58
|
+
return @content if blueprint.nil?
|
59
|
+
return parse_blueprint_value(blueprint, node) unless blueprint.is_a? Hash
|
60
|
+
blueprint.each do |key, value|
|
61
|
+
if value.is_a? Hash
|
62
|
+
blueprint[key] = parse_blueprint value.clone, node
|
63
|
+
else
|
64
|
+
blueprint[key] = parse_blueprint_value value, node
|
65
|
+
end
|
66
|
+
end
|
67
|
+
blueprint
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
def parse_blueprint_value(value, node)
|
73
|
+
return value.get_value(self, node) if value.is_a? Apist::Selector
|
74
|
+
return value
|
75
|
+
end
|
76
|
+
|
77
|
+
def error_response(code, reason, url)
|
78
|
+
{
|
79
|
+
url: url,
|
80
|
+
error: {
|
81
|
+
status: code,
|
82
|
+
reason: reason
|
83
|
+
}
|
84
|
+
}
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'apist/error/method'
|
2
|
+
require 'apist/filter'
|
3
|
+
|
4
|
+
class Apist
|
5
|
+
class ResultCallback
|
6
|
+
|
7
|
+
attr_reader :name
|
8
|
+
attr_reader :arguments
|
9
|
+
|
10
|
+
def initialize(name, arguments)
|
11
|
+
@name = name
|
12
|
+
@arguments = arguments
|
13
|
+
end
|
14
|
+
|
15
|
+
def apply(node, method)
|
16
|
+
return apply_to_array node, method if node.is_a? Array
|
17
|
+
|
18
|
+
return node.to_s if @name === :to_s
|
19
|
+
|
20
|
+
filter = Apist::Filter.new node, method
|
21
|
+
return filter.send @name, *@arguments if filter.respond_to? @name
|
22
|
+
|
23
|
+
resource = method.resource
|
24
|
+
return call_resource_method node, resource if resource.respond_to? @name
|
25
|
+
|
26
|
+
return node.send @name, *@arguments if node.respond_to? @name
|
27
|
+
|
28
|
+
raise Apist::Error::Method, "Method '#{@name}' was not found"
|
29
|
+
end
|
30
|
+
|
31
|
+
def apply_to_array(array, method)
|
32
|
+
result = []
|
33
|
+
array.each do |node|
|
34
|
+
result << apply(node, method)
|
35
|
+
end
|
36
|
+
return result
|
37
|
+
end
|
38
|
+
|
39
|
+
def call_resource_method(node, resource)
|
40
|
+
arguments = @arguments.unshift node
|
41
|
+
resource.send @name, *arguments
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'apist/resultcallback'
|
2
|
+
require 'apist/error/method'
|
3
|
+
|
4
|
+
class Apist
|
5
|
+
class Selector
|
6
|
+
|
7
|
+
undef_method :to_s
|
8
|
+
|
9
|
+
attr_reader :selector
|
10
|
+
attr_reader :result_method_chain
|
11
|
+
|
12
|
+
def initialize(selector)
|
13
|
+
@selector = selector
|
14
|
+
@result_method_chain = []
|
15
|
+
end
|
16
|
+
|
17
|
+
def method_missing(name, *arguments)
|
18
|
+
add_callback name, arguments
|
19
|
+
end
|
20
|
+
|
21
|
+
# @param [Apist::Method] method
|
22
|
+
def get_value(method, root_node = nil)
|
23
|
+
root_node = method.crawler if root_node.nil?
|
24
|
+
if @selector == '*'
|
25
|
+
result = root_node
|
26
|
+
else
|
27
|
+
result = root_node.css @selector
|
28
|
+
end
|
29
|
+
apply_result_callback_chain result, method
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def apply_result_callback_chain(node, method)
|
35
|
+
add_callback 'text' if @result_method_chain.empty?
|
36
|
+
|
37
|
+
trace_stack = []
|
38
|
+
@result_method_chain.each do |result_callback|
|
39
|
+
begin
|
40
|
+
trace_stack << result_callback
|
41
|
+
node = result_callback.apply node, method
|
42
|
+
rescue Apist::Error::Method => e
|
43
|
+
return nil if method.resource.suppress_exceptions
|
44
|
+
raise Apist::Error::Method, create_exception_message(e, trace_stack)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
return node
|
48
|
+
end
|
49
|
+
|
50
|
+
def add_callback(name, arguments = [])
|
51
|
+
@result_method_chain << Apist::ResultCallback.new(name, arguments)
|
52
|
+
return self
|
53
|
+
end
|
54
|
+
|
55
|
+
def create_exception_message(e, trace_stack)
|
56
|
+
message = e.message + ": filter(#{@selector})"
|
57
|
+
trace_stack.each do |callback|
|
58
|
+
message += ".#{callback.name.to_s}"
|
59
|
+
message += '(' + callback.arguments.join(', ') + ')' unless callback.arguments.empty?
|
60
|
+
end
|
61
|
+
return message
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
end
|
data/script/release
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
#!/bin/sh
|
2
|
+
#/ Usage: release
|
3
|
+
#/
|
4
|
+
#/ Tag the version in the repo and push the gem.
|
5
|
+
#/
|
6
|
+
|
7
|
+
set -e
|
8
|
+
cd $(dirname "$0")/..
|
9
|
+
|
10
|
+
[ "$1" = "--help" -o "$1" = "-h" -o "$1" = "help" ] && {
|
11
|
+
grep '^#/' <"$0"| cut -c4-
|
12
|
+
exit 0
|
13
|
+
}
|
14
|
+
|
15
|
+
gem_name=apist
|
16
|
+
|
17
|
+
# Build a new gem archive.
|
18
|
+
rm -rf $gem_name-*.gem
|
19
|
+
gem build -q $gem_name.gemspec
|
20
|
+
|
21
|
+
# Make sure we're on the master branch.
|
22
|
+
(git branch | grep -q '* master') || {
|
23
|
+
echo "Only release from the master branch."
|
24
|
+
exit 1
|
25
|
+
}
|
26
|
+
|
27
|
+
# Figure out what version we're releasing.
|
28
|
+
tag=v`ls $gem_name-*.gem | sed "s/^$gem_name-\(.*\)\.gem$/\1/"`
|
29
|
+
|
30
|
+
echo "Releasing $tag"
|
31
|
+
|
32
|
+
# Make sure we haven't released this version before.
|
33
|
+
git fetch -t origin
|
34
|
+
|
35
|
+
(git tag -l | grep -q "$tag") && {
|
36
|
+
echo "Whoops, there's already a '${tag}' tag."
|
37
|
+
exit 1
|
38
|
+
}
|
39
|
+
|
40
|
+
# Tag it and bag it.
|
41
|
+
gem push $gem_name-*.gem && git tag "$tag" &&
|
42
|
+
git push origin master && git push origin "$tag"
|
data/spec/apist_spec.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'rspec'
|
2
|
+
require 'test_api'
|
3
|
+
|
4
|
+
describe 'apist' do
|
5
|
+
|
6
|
+
before(:each) do
|
7
|
+
@api = TestApi.new
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'should generate pretty http error response' do
|
11
|
+
result = @api.get404
|
12
|
+
expect(result).to include :url, :error => {:status => 404, :reason => 'Not Found'}
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'parses single blueprint value' do
|
16
|
+
result = @api.menu_first
|
17
|
+
expect(result).to eql('Welcome to Wikipedia')
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'parses only filter objects' do
|
21
|
+
blueprint = {
|
22
|
+
title: 'My title',
|
23
|
+
sub: {
|
24
|
+
first: 1,
|
25
|
+
second: 2
|
26
|
+
}
|
27
|
+
}
|
28
|
+
result = @api.static_blueprint blueprint
|
29
|
+
expect(result).to eql(blueprint)
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'parses blueprint' do
|
33
|
+
result = @api.index
|
34
|
+
expect(result).to include :welcome_message => 'Welcome to Wikipedia'
|
35
|
+
expect(result).to include :portals
|
36
|
+
expect(result[:portals].first).to include :link, :label
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
data/spec/test_api.rb
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'apist'
|
2
|
+
|
3
|
+
class TestApi < Apist
|
4
|
+
base_url 'http://en.wikipedia.org'
|
5
|
+
# base_url 'http://habrahabr.my'
|
6
|
+
|
7
|
+
def get404
|
8
|
+
get '/unknown-page'
|
9
|
+
end
|
10
|
+
|
11
|
+
def menu_first
|
12
|
+
get '/wiki/Main_Page', filter('#mp-topbanner div:first').text[0...-1]
|
13
|
+
end
|
14
|
+
|
15
|
+
def static_blueprint(blueprint)
|
16
|
+
get '/wiki/Main_Page', blueprint
|
17
|
+
end
|
18
|
+
|
19
|
+
def index
|
20
|
+
get '/wiki/Main_Page',
|
21
|
+
welcome_message: filter('#mp-topbanner div:first').text[0...-1],
|
22
|
+
portals: filter('a[title^="Portal:"]').each(
|
23
|
+
link: current.attr('href').call(lambda { |href| self.class.base_url + href }),
|
24
|
+
label: current.text
|
25
|
+
),
|
26
|
+
languages: filter('#p-lang li a[title]').each(
|
27
|
+
label: current.text,
|
28
|
+
lang: current.attr('title'),
|
29
|
+
link: current.attr('href').call(lambda { |href| 'http:' + href })
|
30
|
+
),
|
31
|
+
sister_projects: filter('#mp-sister b a').each.text,
|
32
|
+
featured_article: filter('#mp-tfa').html
|
33
|
+
end
|
34
|
+
|
35
|
+
def query
|
36
|
+
post '/test.php', nil,
|
37
|
+
query: {
|
38
|
+
first: 1
|
39
|
+
},
|
40
|
+
body: {
|
41
|
+
first: 1,
|
42
|
+
second: 2
|
43
|
+
}
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
metadata
ADDED
@@ -0,0 +1,124 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: apist
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Sleeping Owl
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-11-26 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.7'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: httparty
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: Package to provide api-like access to foreign sites based on html parsing
|
70
|
+
email:
|
71
|
+
- owl.sleeping@yahoo.com
|
72
|
+
executables: []
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files: []
|
75
|
+
files:
|
76
|
+
- ".gitignore"
|
77
|
+
- CHANGELOG
|
78
|
+
- Gemfile
|
79
|
+
- LICENSE
|
80
|
+
- README.md
|
81
|
+
- Rakefile
|
82
|
+
- apist.gemspec
|
83
|
+
- examples/auth_and_query.rb
|
84
|
+
- examples/basic.rb
|
85
|
+
- examples/error404.rb
|
86
|
+
- lib/apist.rb
|
87
|
+
- lib/apist/error/http.rb
|
88
|
+
- lib/apist/error/method.rb
|
89
|
+
- lib/apist/filter.rb
|
90
|
+
- lib/apist/method.rb
|
91
|
+
- lib/apist/request.rb
|
92
|
+
- lib/apist/resultcallback.rb
|
93
|
+
- lib/apist/selector.rb
|
94
|
+
- script/release
|
95
|
+
- spec/apist_spec.rb
|
96
|
+
- spec/test_api.rb
|
97
|
+
homepage: http://sleeping-owl-apist.gopagoda.com
|
98
|
+
licenses:
|
99
|
+
- MIT
|
100
|
+
metadata: {}
|
101
|
+
post_install_message:
|
102
|
+
rdoc_options: []
|
103
|
+
require_paths:
|
104
|
+
- lib
|
105
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - ">="
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
110
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
111
|
+
requirements:
|
112
|
+
- - ">="
|
113
|
+
- !ruby/object:Gem::Version
|
114
|
+
version: '0'
|
115
|
+
requirements: []
|
116
|
+
rubyforge_project:
|
117
|
+
rubygems_version: 2.4.3
|
118
|
+
signing_key:
|
119
|
+
specification_version: 4
|
120
|
+
summary: Package to provide api-like access to foreign sites based on html parsing
|
121
|
+
test_files:
|
122
|
+
- spec/apist_spec.rb
|
123
|
+
- spec/test_api.rb
|
124
|
+
has_rdoc:
|