apist 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +15 -0
- data/CHANGELOG +7 -0
- data/Gemfile +10 -0
- data/LICENSE +22 -0
- data/README.md +114 -0
- data/Rakefile +2 -0
- data/apist.gemspec +25 -0
- data/examples/auth_and_query.rb +46 -0
- data/examples/basic.rb +39 -0
- data/examples/error404.rb +18 -0
- data/lib/apist.rb +139 -0
- data/lib/apist/error/http.rb +12 -0
- data/lib/apist/error/method.rb +5 -0
- data/lib/apist/filter.rb +237 -0
- data/lib/apist/method.rb +88 -0
- data/lib/apist/request.rb +12 -0
- data/lib/apist/resultcallback.rb +45 -0
- data/lib/apist/selector.rb +65 -0
- data/script/release +42 -0
- data/spec/apist_spec.rb +39 -0
- data/spec/test_api.rb +46 -0
- metadata +124 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 8df71c3751ea4cb1eff329f5531da796297f1d96
|
4
|
+
data.tar.gz: bfe02bcbcc663ec7e2189e834598a6fc266b456a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 13d9d5ba292f4baafad3551848836c23427a96f2313af37b8eb89b5b3e4789cbd839564a72d5d02e26395454c9b08feadaab2d0df57ecaec1cf3e11e24e32cb3
|
7
|
+
data.tar.gz: c54c9eefc258f3a3ba3b45d81316ffa8d1ed7ccbe737357ff006fc01a9f3c0c08cdd04a1db9bc750260188c8e26082550df128252f47a676d76e6bb07cd9131e
|
data/.gitignore
ADDED
data/CHANGELOG
ADDED
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2014 Sleeping Owl
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,114 @@
|
|
1
|
+
## SleepingOwl Apist
|
2
|
+
|
3
|
+
SleepingOwl Apist is a small library which allows you to access any site in api-like style, based on html parsing.
|
4
|
+
|
5
|
+
## Overview
|
6
|
+
|
7
|
+
This package allows you to write method like this:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
require 'apist'
|
11
|
+
|
12
|
+
class WikiApi < Apist
|
13
|
+
base_url 'http://en.wikipedia.org'
|
14
|
+
|
15
|
+
def index
|
16
|
+
get '/wiki/Main_Page',
|
17
|
+
welcome_message: filter('#mp-topbanner div:first').text[0...-1],
|
18
|
+
portals: filter('a[title^="Portal:"]').each(
|
19
|
+
link: current.attr('href').call(lambda { |href| self.class.base_url + href }),
|
20
|
+
label: current.text
|
21
|
+
),
|
22
|
+
languages: filter('#p-lang li a[title]').each(
|
23
|
+
label: current.text,
|
24
|
+
lang: current.attr('title'),
|
25
|
+
link: current.attr('href').call(lambda { |href| 'http:' + href })
|
26
|
+
),
|
27
|
+
sister_projects: filter('#mp-sister b a').each.text,
|
28
|
+
featured_article: filter('#mp-tfa').html
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
```
|
33
|
+
|
34
|
+
use it:
|
35
|
+
|
36
|
+
```ruby
|
37
|
+
api = WikiApi.new
|
38
|
+
data = api.index
|
39
|
+
```
|
40
|
+
|
41
|
+
and get the following result (*json format used only for visualization, actual result type is `Hash`*):
|
42
|
+
|
43
|
+
```json
|
44
|
+
{
|
45
|
+
"welcome_message": "Welcome to Wikipedia",
|
46
|
+
"portals": [
|
47
|
+
{
|
48
|
+
"link": "http:\/\/en.wikipedia.org\/wiki\/Portal:Arts",
|
49
|
+
"label": "Arts"
|
50
|
+
},
|
51
|
+
{
|
52
|
+
"link": "http:\/\/en.wikipedia.org\/wiki\/Portal:Biography",
|
53
|
+
"label": "Biography"
|
54
|
+
},
|
55
|
+
...
|
56
|
+
],
|
57
|
+
"languages": [
|
58
|
+
{
|
59
|
+
"label": "Simple English",
|
60
|
+
"lang": "Simple English",
|
61
|
+
"link": "http:\/\/simple.wikipedia.org\/wiki\/"
|
62
|
+
},
|
63
|
+
{
|
64
|
+
"label": "العربية",
|
65
|
+
"lang": "Arabic",
|
66
|
+
"link": "http:\/\/ar.wikipedia.org\/wiki\/"
|
67
|
+
},
|
68
|
+
{
|
69
|
+
"label": "Bahasa Indonesia",
|
70
|
+
"lang": "Indonesian",
|
71
|
+
"link": "http:\/\/id.wikipedia.org\/wiki\/"
|
72
|
+
},
|
73
|
+
...
|
74
|
+
],
|
75
|
+
"sister_projects": [
|
76
|
+
"Commons",
|
77
|
+
"MediaWiki",
|
78
|
+
...
|
79
|
+
],
|
80
|
+
"featured_article": "<div style=\"float: left; margin: 0.5em 0.9em 0.4em 0em;\">...<\/div>"
|
81
|
+
}
|
82
|
+
```
|
83
|
+
|
84
|
+
## Installation
|
85
|
+
|
86
|
+
Add this line to your application's Gemfile:
|
87
|
+
|
88
|
+
```ruby
|
89
|
+
gem 'apist'
|
90
|
+
```
|
91
|
+
|
92
|
+
And then execute:
|
93
|
+
|
94
|
+
$ bundle
|
95
|
+
|
96
|
+
Or install it yourself as:
|
97
|
+
|
98
|
+
$ gem install apist
|
99
|
+
|
100
|
+
## Documentation
|
101
|
+
|
102
|
+
Documentation can be found at [sleeping owl apist](http://sleeping-owl-apist.gopagoda.com/en/ruby/documentation).
|
103
|
+
|
104
|
+
## Examples
|
105
|
+
|
106
|
+
View [examples](http://sleeping-owl-apist.gopagoda.com/en/ruby#examples).
|
107
|
+
|
108
|
+
## Support Library
|
109
|
+
|
110
|
+
You can donate in BTC: 13k36pym383rEmsBSLyWfT3TxCQMN2Lekd
|
111
|
+
|
112
|
+
## Copyright and License
|
113
|
+
|
114
|
+
Apist was written by Sleeping Owl and is released under the MIT License. See the LICENSE file for details.
|
data/Rakefile
ADDED
data/apist.gemspec
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'apist'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "apist"
|
8
|
+
spec.version = Apist::VERSION
|
9
|
+
spec.authors = ["Sleeping Owl"]
|
10
|
+
spec.email = ["owl.sleeping@yahoo.com"]
|
11
|
+
spec.summary = %q{Package to provide api-like access to foreign sites based on html parsing}
|
12
|
+
spec.description = %q{Package to provide api-like access to foreign sites based on html parsing}
|
13
|
+
spec.homepage = "http://sleeping-owl-apist.gopagoda.com"
|
14
|
+
spec.license = "MIT"
|
15
|
+
|
16
|
+
spec.files = `git ls-files -z`.split("\x0")
|
17
|
+
spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
|
18
|
+
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
|
+
spec.require_paths = ["lib"]
|
20
|
+
|
21
|
+
spec.add_development_dependency "bundler", "~> 1.7"
|
22
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
23
|
+
spec.add_development_dependency "rspec"
|
24
|
+
spec.add_dependency 'httparty'
|
25
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
dir = File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
$:.push File.expand_path(dir, __FILE__)
|
3
|
+
require File.join(dir, 'apist')
|
4
|
+
require 'pp'
|
5
|
+
require 'json'
|
6
|
+
|
7
|
+
class AdminApi < Apist
|
8
|
+
base_url 'http://sleeping-owl-admin-demo.gopagoda.com'
|
9
|
+
# base_url 'http://sleeping-owl-admin.my'
|
10
|
+
|
11
|
+
def initialize
|
12
|
+
@username = 'admin'
|
13
|
+
@password = 'SleepingOwl'
|
14
|
+
super
|
15
|
+
end
|
16
|
+
|
17
|
+
def get_login_token
|
18
|
+
get '/admin/login', filter('input[name="_token"]').attr('value')
|
19
|
+
end
|
20
|
+
|
21
|
+
def login
|
22
|
+
post '/admin/login', filter('.page-header').html,
|
23
|
+
body: {
|
24
|
+
_token: get_login_token,
|
25
|
+
username: @username,
|
26
|
+
password: @password
|
27
|
+
}
|
28
|
+
end
|
29
|
+
|
30
|
+
def contacts
|
31
|
+
login
|
32
|
+
get '/admin/contacts',
|
33
|
+
entries: filter('.table tbody tr').each(
|
34
|
+
photo: filter('td:first-child img').attr('src'),
|
35
|
+
name: filter('td').eq(1).text,
|
36
|
+
birthday: filter('.column-date').attr('data-order'),
|
37
|
+
country: filter('td').eq(3).text,
|
38
|
+
companies: filter('td:nth-child(5) li').each.text
|
39
|
+
)
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
api = AdminApi.new
|
45
|
+
data = api.contacts
|
46
|
+
puts JSON.pretty_generate data
|
data/examples/basic.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
dir = File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
$:.push File.expand_path(dir, __FILE__)
|
3
|
+
require File.join(dir, 'apist')
|
4
|
+
require 'pp'
|
5
|
+
require 'json'
|
6
|
+
|
7
|
+
class WikiApi < Apist
|
8
|
+
base_url 'http://en.wikipedia.org'
|
9
|
+
|
10
|
+
def index
|
11
|
+
get '/wiki/Main_Page',
|
12
|
+
welcome_message: filter('#mp-topbanner div:first').text[0...-1],
|
13
|
+
portals: filter('a[title^="Portal:"]').each(
|
14
|
+
link: current.attr('href').call(lambda { |href| self.class.base_url + href }),
|
15
|
+
label: current.text
|
16
|
+
),
|
17
|
+
languages: filter('#p-lang li a[title]').each(
|
18
|
+
label: current.text,
|
19
|
+
lang: current.attr('title'),
|
20
|
+
link: current.attr('href').call(lambda { |href| 'http:' + href })
|
21
|
+
),
|
22
|
+
sister_projects: filter('#mp-sister b a').each.text,
|
23
|
+
featured_article: filter('#mp-tfa').html
|
24
|
+
end
|
25
|
+
|
26
|
+
def current_events
|
27
|
+
get '/wiki/Portal:Current_events',
|
28
|
+
filter('#mw-content-text > table:last td:first table.vevent').each(
|
29
|
+
date: filter('.bday').text,
|
30
|
+
events: filter('dl').each.text
|
31
|
+
)
|
32
|
+
end
|
33
|
+
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
api = WikiApi.new
|
38
|
+
data = api.current_events
|
39
|
+
puts JSON.pretty_generate data
|
@@ -0,0 +1,18 @@
|
|
1
|
+
dir = File.expand_path(File.join(File.dirname(__FILE__), '..', 'lib'))
|
2
|
+
$:.push File.expand_path(dir, __FILE__)
|
3
|
+
require File.join(dir, 'apist')
|
4
|
+
require 'pp'
|
5
|
+
require 'json'
|
6
|
+
|
7
|
+
class HabrApi < Apist
|
8
|
+
base_url 'http://habrahabr.ru'
|
9
|
+
|
10
|
+
def get404
|
11
|
+
get '/unknown-page',
|
12
|
+
menu: filter('#TMpanel .menu a').each.text
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
api = HabrApi.new
|
17
|
+
data = api.get404
|
18
|
+
puts JSON.pretty_generate data
|
data/lib/apist.rb
ADDED
@@ -0,0 +1,139 @@
|
|
1
|
+
require 'apist/request'
|
2
|
+
require 'apist/method'
|
3
|
+
require 'apist/selector'
|
4
|
+
|
5
|
+
class Apist
|
6
|
+
|
7
|
+
VERSION = '1.0.0'
|
8
|
+
|
9
|
+
attr_reader :requester
|
10
|
+
attr_reader :current_method
|
11
|
+
attr_accessor :suppress_exceptions
|
12
|
+
|
13
|
+
def initialize
|
14
|
+
@requester = Apist::Request.new self.class.base_url
|
15
|
+
@suppress_exceptions = true
|
16
|
+
end
|
17
|
+
|
18
|
+
# Allows setting a base url to be used for each request.
|
19
|
+
#
|
20
|
+
# class Foo < Apist
|
21
|
+
# base_url 'http://en.wikipedia.org'
|
22
|
+
# end
|
23
|
+
def self.base_url(url=nil)
|
24
|
+
return @base_url unless url
|
25
|
+
@base_url = url
|
26
|
+
end
|
27
|
+
|
28
|
+
# Create new filter in blueprint
|
29
|
+
#
|
30
|
+
# class Foo < Apist
|
31
|
+
# base_url 'http://en.wikipedia.org'
|
32
|
+
# def index
|
33
|
+
# get '/wiki/Main_Page',
|
34
|
+
# welcome_message: filter('#mp-topbanner div:first').text,
|
35
|
+
# end
|
36
|
+
# end
|
37
|
+
# @return [Apist::Filter]
|
38
|
+
def self.filter(css_selector)
|
39
|
+
Apist::Selector.new css_selector
|
40
|
+
end
|
41
|
+
|
42
|
+
# Create new filter in blueprint
|
43
|
+
#
|
44
|
+
# class Foo < Apist
|
45
|
+
# base_url 'http://en.wikipedia.org'
|
46
|
+
# def index
|
47
|
+
# get '/wiki/Main_Page',
|
48
|
+
# welcome_message: filter('#mp-topbanner div:first').text,
|
49
|
+
# end
|
50
|
+
# end
|
51
|
+
# @return [Apist::Filter]
|
52
|
+
def filter(css_selector)
|
53
|
+
self.class.filter css_selector
|
54
|
+
end
|
55
|
+
|
56
|
+
# Create new filter object with current node as filter result
|
57
|
+
#
|
58
|
+
# class Foo < Apist
|
59
|
+
# base_url 'http://en.wikipedia.org'
|
60
|
+
# def index
|
61
|
+
# get '/wiki/Main_Page',
|
62
|
+
# portals: filter('a[title^="Portal:"]').each(
|
63
|
+
# link: current.attr('href'),
|
64
|
+
# label: current.text
|
65
|
+
# ),
|
66
|
+
# end
|
67
|
+
# end
|
68
|
+
# @return [Apist::Filter]
|
69
|
+
def self.current
|
70
|
+
self.filter '*'
|
71
|
+
end
|
72
|
+
|
73
|
+
# Create new filter object with current node as filter result
|
74
|
+
#
|
75
|
+
# class Foo < Apist
|
76
|
+
# base_url 'http://en.wikipedia.org'
|
77
|
+
# def index
|
78
|
+
# get '/wiki/Main_Page',
|
79
|
+
# portals: filter('a[title^="Portal:"]').each(
|
80
|
+
# link: current.attr('href'),
|
81
|
+
# label: current.text
|
82
|
+
# ),
|
83
|
+
# end
|
84
|
+
# end
|
85
|
+
# @return [Apist::Filter]
|
86
|
+
def current
|
87
|
+
self.class.current
|
88
|
+
end
|
89
|
+
|
90
|
+
def parse(content, blueprint)
|
91
|
+
@current_method = Apist::Method.new self, nil, blueprint
|
92
|
+
@current_method.set_content content
|
93
|
+
result = @current_method.parse_blueprint blueprint
|
94
|
+
@current_method = nil
|
95
|
+
result
|
96
|
+
end
|
97
|
+
|
98
|
+
# Perform GET http-request
|
99
|
+
def get(url, blueprint = nil, options = {})
|
100
|
+
request 'get', url, blueprint, options
|
101
|
+
end
|
102
|
+
|
103
|
+
# Perform HEAD http-request
|
104
|
+
def head(url, blueprint = nil, options = {})
|
105
|
+
request 'head', url, blueprint, options
|
106
|
+
end
|
107
|
+
|
108
|
+
# Perform POST http-request
|
109
|
+
def post(url, blueprint = nil, options = {})
|
110
|
+
request 'post', url, blueprint, options
|
111
|
+
end
|
112
|
+
|
113
|
+
# Perform PUT http-request
|
114
|
+
def put(url, blueprint = nil, options = {})
|
115
|
+
request 'put', url, blueprint, options
|
116
|
+
end
|
117
|
+
|
118
|
+
# Perform PATCH http-request
|
119
|
+
def patch(url, blueprint = nil, options = {})
|
120
|
+
request 'patch', url, blueprint, options
|
121
|
+
end
|
122
|
+
|
123
|
+
# Perform DELETE http-request
|
124
|
+
def delete(url, blueprint = nil, options = {})
|
125
|
+
request 'delete', url, blueprint, options
|
126
|
+
end
|
127
|
+
|
128
|
+
private
|
129
|
+
|
130
|
+
# Perform http-request with options and parse result by blueprint
|
131
|
+
def request(http_method, url, blueprint = nil, options = [])
|
132
|
+
@current_method = Apist::Method.new self, url, blueprint
|
133
|
+
@current_method.method = http_method
|
134
|
+
result = @current_method.get options
|
135
|
+
@current_method = nil
|
136
|
+
result
|
137
|
+
end
|
138
|
+
|
139
|
+
end
|
data/lib/apist/filter.rb
ADDED
@@ -0,0 +1,237 @@
|
|
1
|
+
class Apist
|
2
|
+
class Filter
|
3
|
+
|
4
|
+
attr :node
|
5
|
+
attr :method
|
6
|
+
attr :resource
|
7
|
+
|
8
|
+
def initialize(node, method)
|
9
|
+
@node = node
|
10
|
+
@method = method
|
11
|
+
@resource = method.resource
|
12
|
+
end
|
13
|
+
|
14
|
+
# @return [Apist::Filter]
|
15
|
+
def text
|
16
|
+
guard_crawler
|
17
|
+
@node.text
|
18
|
+
end
|
19
|
+
|
20
|
+
# @return [Apist::Filter]
|
21
|
+
def html
|
22
|
+
guard_crawler
|
23
|
+
@node.inner_html
|
24
|
+
end
|
25
|
+
|
26
|
+
# @return [Apist::Filter]
|
27
|
+
def filter(selector)
|
28
|
+
guard_crawler
|
29
|
+
@node.css selector
|
30
|
+
end
|
31
|
+
|
32
|
+
# @return [Apist::Filter]
|
33
|
+
def filter_nodes(selector)
|
34
|
+
guard_crawler
|
35
|
+
@node.filter selector
|
36
|
+
end
|
37
|
+
|
38
|
+
# @return [Apist::Filter]
|
39
|
+
def find(selector)
|
40
|
+
guard_crawler
|
41
|
+
@node.css selector
|
42
|
+
end
|
43
|
+
|
44
|
+
# @return [Apist::Filter]
|
45
|
+
def children
|
46
|
+
guard_crawler
|
47
|
+
@node.children
|
48
|
+
end
|
49
|
+
|
50
|
+
# @return [Apist::Filter]
|
51
|
+
def prev
|
52
|
+
guard_crawler
|
53
|
+
prev_all[0]
|
54
|
+
end
|
55
|
+
|
56
|
+
# @return [Apist::Filter]
|
57
|
+
def prev_all
|
58
|
+
guard_crawler
|
59
|
+
sibling 'previous'
|
60
|
+
end
|
61
|
+
|
62
|
+
# @return [Apist::Filter]
|
63
|
+
def next
|
64
|
+
guard_crawler
|
65
|
+
next_all[0]
|
66
|
+
end
|
67
|
+
|
68
|
+
# @return [Apist::Filter]
|
69
|
+
def next_all
|
70
|
+
guard_crawler
|
71
|
+
sibling 'next'
|
72
|
+
end
|
73
|
+
|
74
|
+
# @return [Apist::Filter]
|
75
|
+
def is(selector)
|
76
|
+
guard_crawler
|
77
|
+
node = get_node
|
78
|
+
node.matches? selector
|
79
|
+
end
|
80
|
+
|
81
|
+
# @return [Apist::Filter]
|
82
|
+
def closest(selector)
|
83
|
+
guard_crawler
|
84
|
+
node = get_node
|
85
|
+
node.ancestors(selector).last
|
86
|
+
end
|
87
|
+
|
88
|
+
# @return [Apist::Filter]
|
89
|
+
def attr(attribute)
|
90
|
+
guard_crawler
|
91
|
+
@node.attr(attribute).to_s
|
92
|
+
end
|
93
|
+
|
94
|
+
# @return [Apist::Filter]
|
95
|
+
def hasAttr(attribute)
|
96
|
+
guard_crawler
|
97
|
+
@node.attr(attribute) != nil
|
98
|
+
end
|
99
|
+
|
100
|
+
# @return [Apist::Filter]
|
101
|
+
def eq(position)
|
102
|
+
guard_crawler
|
103
|
+
@node.at position
|
104
|
+
end
|
105
|
+
|
106
|
+
# @return [Apist::Filter]
|
107
|
+
def first
|
108
|
+
guard_crawler
|
109
|
+
@node.first
|
110
|
+
end
|
111
|
+
|
112
|
+
# @return [Apist::Filter]
|
113
|
+
def last
|
114
|
+
guard_crawler
|
115
|
+
@node.last
|
116
|
+
end
|
117
|
+
|
118
|
+
# @return [Apist::Filter]
|
119
|
+
def element
|
120
|
+
@node
|
121
|
+
end
|
122
|
+
|
123
|
+
# @return [Apist::Filter]
|
124
|
+
def call(block)
|
125
|
+
block.call @node
|
126
|
+
end
|
127
|
+
|
128
|
+
# @return [Apist::Filter]
|
129
|
+
def strip
|
130
|
+
guard_text
|
131
|
+
@node.strip
|
132
|
+
end
|
133
|
+
|
134
|
+
# @return [Apist::Filter]
|
135
|
+
def lstrip
|
136
|
+
guard_text
|
137
|
+
@node.lstrip
|
138
|
+
end
|
139
|
+
|
140
|
+
# @return [Apist::Filter]
|
141
|
+
def rstrip
|
142
|
+
guard_text
|
143
|
+
@node.rstrip
|
144
|
+
end
|
145
|
+
|
146
|
+
# @return [Apist::Filter]
|
147
|
+
def gsub(*several_variants)
|
148
|
+
guard_text
|
149
|
+
@node.send :gsub, *several_variants
|
150
|
+
end
|
151
|
+
|
152
|
+
# @return [Apist::Filter]
|
153
|
+
def to_i(base = 10)
|
154
|
+
guard_text
|
155
|
+
@node.to_i base
|
156
|
+
end
|
157
|
+
|
158
|
+
# @return [Apist::Filter]
|
159
|
+
def to_f
|
160
|
+
guard_text
|
161
|
+
@node.to_f
|
162
|
+
end
|
163
|
+
|
164
|
+
# @return [Apist::Filter]
|
165
|
+
def exists
|
166
|
+
!@node.empty?
|
167
|
+
end
|
168
|
+
|
169
|
+
# @return [Apist::Filter]
|
170
|
+
def check(block)
|
171
|
+
call block
|
172
|
+
end
|
173
|
+
|
174
|
+
# @return [Apist::Filter]
|
175
|
+
def then(blueprint)
|
176
|
+
return @node unless @node === true
|
177
|
+
return @method.parse_blueprint blueprint
|
178
|
+
end
|
179
|
+
|
180
|
+
# @return [Apist::Filter]
|
181
|
+
def else(blueprint)
|
182
|
+
return @node unless @node === false
|
183
|
+
return @method.parse_blueprint blueprint
|
184
|
+
end
|
185
|
+
|
186
|
+
# @return [Apist::Filter]
|
187
|
+
def each(blueprint = nil)
|
188
|
+
callback = blueprint
|
189
|
+
if callback.nil?
|
190
|
+
callback = lambda { |node, i| node }
|
191
|
+
end
|
192
|
+
|
193
|
+
unless callback.is_a? Proc
|
194
|
+
callback = lambda { |node, i|
|
195
|
+
@method.parse_blueprint blueprint.clone, node
|
196
|
+
}
|
197
|
+
end
|
198
|
+
|
199
|
+
result = []
|
200
|
+
@node.each do |node|
|
201
|
+
result << callback.call(node, result.length)
|
202
|
+
end
|
203
|
+
result
|
204
|
+
end
|
205
|
+
|
206
|
+
private
|
207
|
+
|
208
|
+
def is_node
|
209
|
+
@node.is_a? Nokogiri::XML::NodeSet or @node.is_a? Nokogiri::XML::Node
|
210
|
+
end
|
211
|
+
|
212
|
+
def get_node
|
213
|
+
return @node[0] if @node.is_a? Nokogiri::XML::NodeSet
|
214
|
+
@node
|
215
|
+
end
|
216
|
+
|
217
|
+
def sibling(direction)
|
218
|
+
nodes = []
|
219
|
+
node = get_node
|
220
|
+
while (node = node.send(direction)) != nil
|
221
|
+
nodes << node if node.node_type === 1
|
222
|
+
end
|
223
|
+
nodes
|
224
|
+
end
|
225
|
+
|
226
|
+
def guard_text
|
227
|
+
@node = @node.text if is_node
|
228
|
+
end
|
229
|
+
|
230
|
+
def guard_crawler
|
231
|
+
unless is_node
|
232
|
+
raise Apist::MethodError, 'Current node isnt instance of Nokogiri Node or NodeSet.'
|
233
|
+
end
|
234
|
+
end
|
235
|
+
|
236
|
+
end
|
237
|
+
end
|
data/lib/apist/method.rb
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'apist/selector'
|
3
|
+
require 'apist/error/http'
|
4
|
+
|
5
|
+
class Apist
|
6
|
+
class Method
|
7
|
+
|
8
|
+
attr_reader :resource
|
9
|
+
attr_reader :url
|
10
|
+
attr_reader :blueprint
|
11
|
+
attr_accessor :method
|
12
|
+
attr_reader :content
|
13
|
+
attr_reader :crawler
|
14
|
+
|
15
|
+
# @param [Apist] resource
|
16
|
+
# @param [String] url
|
17
|
+
def initialize(resource, url, blueprint)
|
18
|
+
@resource = resource
|
19
|
+
@url = url
|
20
|
+
@blueprint = blueprint
|
21
|
+
end
|
22
|
+
|
23
|
+
def get(options)
|
24
|
+
begin
|
25
|
+
make_request options
|
26
|
+
parse_blueprint @blueprint
|
27
|
+
rescue Apist::Error::Http => e
|
28
|
+
error_response e.code, e.reason, e.url
|
29
|
+
rescue SocketError => e
|
30
|
+
error_response 0, e.message, @url
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def make_request(options = {})
|
35
|
+
@content = @resource.requester.class.send method, @url, options
|
36
|
+
if @content.code != 200
|
37
|
+
code = @content.code
|
38
|
+
message = @content.response.message
|
39
|
+
url = @content.request.last_uri.to_s
|
40
|
+
raise Apist::Error::Http.new(code, message, url)
|
41
|
+
end
|
42
|
+
store_cookies
|
43
|
+
set_content @content.body
|
44
|
+
end
|
45
|
+
|
46
|
+
def store_cookies
|
47
|
+
cookie = @content.headers['Set-Cookie']
|
48
|
+
unless cookie.nil?
|
49
|
+
@resource.requester.class.default_cookies.add_cookies cookie
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def set_content(content)
|
54
|
+
@crawler = Nokogiri::HTML content
|
55
|
+
end
|
56
|
+
|
57
|
+
def parse_blueprint(blueprint, node = nil)
|
58
|
+
return @content if blueprint.nil?
|
59
|
+
return parse_blueprint_value(blueprint, node) unless blueprint.is_a? Hash
|
60
|
+
blueprint.each do |key, value|
|
61
|
+
if value.is_a? Hash
|
62
|
+
blueprint[key] = parse_blueprint value.clone, node
|
63
|
+
else
|
64
|
+
blueprint[key] = parse_blueprint_value value, node
|
65
|
+
end
|
66
|
+
end
|
67
|
+
blueprint
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
def parse_blueprint_value(value, node)
|
73
|
+
return value.get_value(self, node) if value.is_a? Apist::Selector
|
74
|
+
return value
|
75
|
+
end
|
76
|
+
|
77
|
+
def error_response(code, reason, url)
|
78
|
+
{
|
79
|
+
url: url,
|
80
|
+
error: {
|
81
|
+
status: code,
|
82
|
+
reason: reason
|
83
|
+
}
|
84
|
+
}
|
85
|
+
end
|
86
|
+
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
require 'apist/error/method'
|
2
|
+
require 'apist/filter'
|
3
|
+
|
4
|
+
class Apist
|
5
|
+
class ResultCallback
|
6
|
+
|
7
|
+
attr_reader :name
|
8
|
+
attr_reader :arguments
|
9
|
+
|
10
|
+
def initialize(name, arguments)
|
11
|
+
@name = name
|
12
|
+
@arguments = arguments
|
13
|
+
end
|
14
|
+
|
15
|
+
def apply(node, method)
|
16
|
+
return apply_to_array node, method if node.is_a? Array
|
17
|
+
|
18
|
+
return node.to_s if @name === :to_s
|
19
|
+
|
20
|
+
filter = Apist::Filter.new node, method
|
21
|
+
return filter.send @name, *@arguments if filter.respond_to? @name
|
22
|
+
|
23
|
+
resource = method.resource
|
24
|
+
return call_resource_method node, resource if resource.respond_to? @name
|
25
|
+
|
26
|
+
return node.send @name, *@arguments if node.respond_to? @name
|
27
|
+
|
28
|
+
raise Apist::Error::Method, "Method '#{@name}' was not found"
|
29
|
+
end
|
30
|
+
|
31
|
+
def apply_to_array(array, method)
|
32
|
+
result = []
|
33
|
+
array.each do |node|
|
34
|
+
result << apply(node, method)
|
35
|
+
end
|
36
|
+
return result
|
37
|
+
end
|
38
|
+
|
39
|
+
def call_resource_method(node, resource)
|
40
|
+
arguments = @arguments.unshift node
|
41
|
+
resource.send @name, *arguments
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'apist/resultcallback'
|
2
|
+
require 'apist/error/method'
|
3
|
+
|
4
|
+
class Apist
|
5
|
+
class Selector
|
6
|
+
|
7
|
+
undef_method :to_s
|
8
|
+
|
9
|
+
attr_reader :selector
|
10
|
+
attr_reader :result_method_chain
|
11
|
+
|
12
|
+
def initialize(selector)
|
13
|
+
@selector = selector
|
14
|
+
@result_method_chain = []
|
15
|
+
end
|
16
|
+
|
17
|
+
def method_missing(name, *arguments)
|
18
|
+
add_callback name, arguments
|
19
|
+
end
|
20
|
+
|
21
|
+
# @param [Apist::Method] method
|
22
|
+
def get_value(method, root_node = nil)
|
23
|
+
root_node = method.crawler if root_node.nil?
|
24
|
+
if @selector == '*'
|
25
|
+
result = root_node
|
26
|
+
else
|
27
|
+
result = root_node.css @selector
|
28
|
+
end
|
29
|
+
apply_result_callback_chain result, method
|
30
|
+
end
|
31
|
+
|
32
|
+
private
|
33
|
+
|
34
|
+
def apply_result_callback_chain(node, method)
|
35
|
+
add_callback 'text' if @result_method_chain.empty?
|
36
|
+
|
37
|
+
trace_stack = []
|
38
|
+
@result_method_chain.each do |result_callback|
|
39
|
+
begin
|
40
|
+
trace_stack << result_callback
|
41
|
+
node = result_callback.apply node, method
|
42
|
+
rescue Apist::Error::Method => e
|
43
|
+
return nil if method.resource.suppress_exceptions
|
44
|
+
raise Apist::Error::Method, create_exception_message(e, trace_stack)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
return node
|
48
|
+
end
|
49
|
+
|
50
|
+
def add_callback(name, arguments = [])
|
51
|
+
@result_method_chain << Apist::ResultCallback.new(name, arguments)
|
52
|
+
return self
|
53
|
+
end
|
54
|
+
|
55
|
+
def create_exception_message(e, trace_stack)
|
56
|
+
message = e.message + ": filter(#{@selector})"
|
57
|
+
trace_stack.each do |callback|
|
58
|
+
message += ".#{callback.name.to_s}"
|
59
|
+
message += '(' + callback.arguments.join(', ') + ')' unless callback.arguments.empty?
|
60
|
+
end
|
61
|
+
return message
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
end
|
data/script/release
ADDED
@@ -0,0 +1,42 @@
|
|
1
|
+
#!/bin/sh
|
2
|
+
#/ Usage: release
|
3
|
+
#/
|
4
|
+
#/ Tag the version in the repo and push the gem.
|
5
|
+
#/
|
6
|
+
|
7
|
+
set -e
|
8
|
+
cd $(dirname "$0")/..
|
9
|
+
|
10
|
+
[ "$1" = "--help" -o "$1" = "-h" -o "$1" = "help" ] && {
|
11
|
+
grep '^#/' <"$0"| cut -c4-
|
12
|
+
exit 0
|
13
|
+
}
|
14
|
+
|
15
|
+
gem_name=apist
|
16
|
+
|
17
|
+
# Build a new gem archive.
|
18
|
+
rm -rf $gem_name-*.gem
|
19
|
+
gem build -q $gem_name.gemspec
|
20
|
+
|
21
|
+
# Make sure we're on the master branch.
|
22
|
+
(git branch | grep -q '* master') || {
|
23
|
+
echo "Only release from the master branch."
|
24
|
+
exit 1
|
25
|
+
}
|
26
|
+
|
27
|
+
# Figure out what version we're releasing.
|
28
|
+
tag=v`ls $gem_name-*.gem | sed "s/^$gem_name-\(.*\)\.gem$/\1/"`
|
29
|
+
|
30
|
+
echo "Releasing $tag"
|
31
|
+
|
32
|
+
# Make sure we haven't released this version before.
|
33
|
+
git fetch -t origin
|
34
|
+
|
35
|
+
(git tag -l | grep -q "$tag") && {
|
36
|
+
echo "Whoops, there's already a '${tag}' tag."
|
37
|
+
exit 1
|
38
|
+
}
|
39
|
+
|
40
|
+
# Tag it and bag it.
|
41
|
+
gem push $gem_name-*.gem && git tag "$tag" &&
|
42
|
+
git push origin master && git push origin "$tag"
|
data/spec/apist_spec.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'rspec'
|
2
|
+
require 'test_api'
|
3
|
+
|
4
|
+
describe 'apist' do
|
5
|
+
|
6
|
+
before(:each) do
|
7
|
+
@api = TestApi.new
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'should generate pretty http error response' do
|
11
|
+
result = @api.get404
|
12
|
+
expect(result).to include :url, :error => {:status => 404, :reason => 'Not Found'}
|
13
|
+
end
|
14
|
+
|
15
|
+
it 'parses single blueprint value' do
|
16
|
+
result = @api.menu_first
|
17
|
+
expect(result).to eql('Welcome to Wikipedia')
|
18
|
+
end
|
19
|
+
|
20
|
+
it 'parses only filter objects' do
|
21
|
+
blueprint = {
|
22
|
+
title: 'My title',
|
23
|
+
sub: {
|
24
|
+
first: 1,
|
25
|
+
second: 2
|
26
|
+
}
|
27
|
+
}
|
28
|
+
result = @api.static_blueprint blueprint
|
29
|
+
expect(result).to eql(blueprint)
|
30
|
+
end
|
31
|
+
|
32
|
+
it 'parses blueprint' do
|
33
|
+
result = @api.index
|
34
|
+
expect(result).to include :welcome_message => 'Welcome to Wikipedia'
|
35
|
+
expect(result).to include :portals
|
36
|
+
expect(result[:portals].first).to include :link, :label
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
data/spec/test_api.rb
ADDED
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'apist'
|
2
|
+
|
3
|
+
class TestApi < Apist
|
4
|
+
base_url 'http://en.wikipedia.org'
|
5
|
+
# base_url 'http://habrahabr.my'
|
6
|
+
|
7
|
+
def get404
|
8
|
+
get '/unknown-page'
|
9
|
+
end
|
10
|
+
|
11
|
+
def menu_first
|
12
|
+
get '/wiki/Main_Page', filter('#mp-topbanner div:first').text[0...-1]
|
13
|
+
end
|
14
|
+
|
15
|
+
def static_blueprint(blueprint)
|
16
|
+
get '/wiki/Main_Page', blueprint
|
17
|
+
end
|
18
|
+
|
19
|
+
def index
|
20
|
+
get '/wiki/Main_Page',
|
21
|
+
welcome_message: filter('#mp-topbanner div:first').text[0...-1],
|
22
|
+
portals: filter('a[title^="Portal:"]').each(
|
23
|
+
link: current.attr('href').call(lambda { |href| self.class.base_url + href }),
|
24
|
+
label: current.text
|
25
|
+
),
|
26
|
+
languages: filter('#p-lang li a[title]').each(
|
27
|
+
label: current.text,
|
28
|
+
lang: current.attr('title'),
|
29
|
+
link: current.attr('href').call(lambda { |href| 'http:' + href })
|
30
|
+
),
|
31
|
+
sister_projects: filter('#mp-sister b a').each.text,
|
32
|
+
featured_article: filter('#mp-tfa').html
|
33
|
+
end
|
34
|
+
|
35
|
+
def query
|
36
|
+
post '/test.php', nil,
|
37
|
+
query: {
|
38
|
+
first: 1
|
39
|
+
},
|
40
|
+
body: {
|
41
|
+
first: 1,
|
42
|
+
second: 2
|
43
|
+
}
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
metadata
ADDED
@@ -0,0 +1,124 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: apist
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Sleeping Owl
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2014-11-26 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.7'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.7'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: httparty
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :runtime
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
description: Package to provide api-like access to foreign sites based on html parsing
|
70
|
+
email:
|
71
|
+
- owl.sleeping@yahoo.com
|
72
|
+
executables: []
|
73
|
+
extensions: []
|
74
|
+
extra_rdoc_files: []
|
75
|
+
files:
|
76
|
+
- ".gitignore"
|
77
|
+
- CHANGELOG
|
78
|
+
- Gemfile
|
79
|
+
- LICENSE
|
80
|
+
- README.md
|
81
|
+
- Rakefile
|
82
|
+
- apist.gemspec
|
83
|
+
- examples/auth_and_query.rb
|
84
|
+
- examples/basic.rb
|
85
|
+
- examples/error404.rb
|
86
|
+
- lib/apist.rb
|
87
|
+
- lib/apist/error/http.rb
|
88
|
+
- lib/apist/error/method.rb
|
89
|
+
- lib/apist/filter.rb
|
90
|
+
- lib/apist/method.rb
|
91
|
+
- lib/apist/request.rb
|
92
|
+
- lib/apist/resultcallback.rb
|
93
|
+
- lib/apist/selector.rb
|
94
|
+
- script/release
|
95
|
+
- spec/apist_spec.rb
|
96
|
+
- spec/test_api.rb
|
97
|
+
homepage: http://sleeping-owl-apist.gopagoda.com
|
98
|
+
licenses:
|
99
|
+
- MIT
|
100
|
+
metadata: {}
|
101
|
+
post_install_message:
|
102
|
+
rdoc_options: []
|
103
|
+
require_paths:
|
104
|
+
- lib
|
105
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
106
|
+
requirements:
|
107
|
+
- - ">="
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
110
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
111
|
+
requirements:
|
112
|
+
- - ">="
|
113
|
+
- !ruby/object:Gem::Version
|
114
|
+
version: '0'
|
115
|
+
requirements: []
|
116
|
+
rubyforge_project:
|
117
|
+
rubygems_version: 2.4.3
|
118
|
+
signing_key:
|
119
|
+
specification_version: 4
|
120
|
+
summary: Package to provide api-like access to foreign sites based on html parsing
|
121
|
+
test_files:
|
122
|
+
- spec/apist_spec.rb
|
123
|
+
- spec/test_api.rb
|
124
|
+
has_rdoc:
|