vasily 0.1.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +68 -1
- data/lib/vasily/client.rb +59 -0
- data/lib/vasily/document.rb +13 -0
- data/lib/vasily/error.rb +90 -0
- data/lib/vasily/version.rb +1 -1
- data/lib/vasily.rb +3 -4
- data/vasily.gemspec +2 -0
- metadata +18 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0e9f44d40f4eb754ebb6fcbda54434593f05ada4
|
4
|
+
data.tar.gz: 54b730601c28b30338ca70d2b29a5445155753e3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d1fc29b978dd18222e3d79b3c7b08cb0bfff042a3a590a9900795ceea5e39fe340688b04dcc385193684abcd5aee03b07fad4eb448df152020129557cb994067
|
7
|
+
data.tar.gz: c3b71abe353c043b784537c777de8b1e94cc57b51f581e91e21c35cffc0e40e2d9ee8257a59da766f4c8cf514fe14f2ecf55be14ed4ef417b4b4d0d27436afbb
|
data/README.md
CHANGED
@@ -21,7 +21,74 @@ Or install it yourself as:
|
|
21
21
|
|
22
22
|
## Usage
|
23
23
|
|
24
|
-
|
24
|
+
To use API, first you need to obtain (free) token from [textocat.com](http://textocat.com).
|
25
|
+
Then you can use it to initialize client:
|
26
|
+
```ruby
|
27
|
+
client = Vasily::Client.new('auth_token')
|
28
|
+
client.status # => 200
|
29
|
+
```
|
30
|
+
Let's analyze some documents!
|
31
|
+
```ruby
|
32
|
+
doc1 = Vasily::Document.new("Председатель совета директоров ОАО «МДМ Банк» Олег Вьюгин — о том, чему приведет обмен санкциями между Россией и Западом в следующем году. Беседовала Светлана Сухова.", "doc1")
|
33
|
+
doc2 = Vasily::Document.new("Не перепутает Генри Форда и компанию «Форд» в документах", "doc2")
|
34
|
+
doc3 = Vasily::Document.new("Штаб-квартира компании Форд Моторс располагается в городе Дирборн.", "doc3")
|
35
|
+
batch_id, status = client.queue([doc1, doc2, doc3]) # => ["abcdefgh-1111-2222-3333-abcdefabcdef", "IN_PROGRESS"]
|
36
|
+
status = client.request(batch_id) # => "FINISHED"
|
37
|
+
```
|
38
|
+
To retrieve entities:
|
39
|
+
```ruby
|
40
|
+
entities = client.retrieve([batch_id])
|
41
|
+
puts JSON.pretty_generate(entities)
|
42
|
+
[
|
43
|
+
{
|
44
|
+
"status": "SUCCESS",
|
45
|
+
"tag": "doc1",
|
46
|
+
"entities": [
|
47
|
+
{
|
48
|
+
"span": "Председатель совета директоров ОАО «МДМ Банк» Олег Вьюгин",
|
49
|
+
"category": "PERSON",
|
50
|
+
"beginOffset": 0,
|
51
|
+
"endOffset": 57
|
52
|
+
},
|
53
|
+
...
|
54
|
+
]
|
55
|
+
},
|
56
|
+
{
|
57
|
+
"status": "SUCCESS",
|
58
|
+
"tag": "doc2",
|
59
|
+
"entities": [
|
60
|
+
...
|
61
|
+
]
|
62
|
+
},
|
63
|
+
...
|
64
|
+
]
|
65
|
+
```
|
66
|
+
To perform search:
|
67
|
+
```ruby
|
68
|
+
search_result = client.search('ORGANIZATION:форд')
|
69
|
+
puts JSON.pretty_generate(search_result)
|
70
|
+
[
|
71
|
+
{
|
72
|
+
"status": "SUCCESS",
|
73
|
+
"tag": "doc2",
|
74
|
+
"entities": [
|
75
|
+
{
|
76
|
+
"span": "Генри Форда",
|
77
|
+
"category": "PERSON",
|
78
|
+
"beginOffset": 14,
|
79
|
+
"endOffset": 25
|
80
|
+
},
|
81
|
+
{
|
82
|
+
"span": "компанию «Форд»",
|
83
|
+
"category": "ORGANIZATION",
|
84
|
+
"beginOffset": 28,
|
85
|
+
"endOffset": 43
|
86
|
+
}
|
87
|
+
]
|
88
|
+
},
|
89
|
+
...
|
90
|
+
]
|
91
|
+
```
|
25
92
|
|
26
93
|
## Development
|
27
94
|
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'httparty'
|
2
|
+
require 'vasily/document'
|
3
|
+
require 'vasily/error'
|
4
|
+
|
5
|
+
module Vasily
|
6
|
+
class Client
|
7
|
+
include HTTParty
|
8
|
+
base_uri 'api.textocat.com'
|
9
|
+
|
10
|
+
def initialize(auth_token)
|
11
|
+
@auth = { auth_token: auth_token }
|
12
|
+
end
|
13
|
+
|
14
|
+
def status
|
15
|
+
self.class.get('/status').code
|
16
|
+
end
|
17
|
+
|
18
|
+
def queue(docs)
|
19
|
+
body = docs.map(&:to_hash).to_json
|
20
|
+
options = { body: body, query: @auth, headers: {'Content-Type' => 'application/json'} }
|
21
|
+
response = self.class.post('/entity/queue', options)
|
22
|
+
if response.code == 202
|
23
|
+
return response['batchId'], response['status']
|
24
|
+
else
|
25
|
+
fail(Vasily::Error.from_message_and_code(response.body, response.code))
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def request(batch_id)
|
30
|
+
options = { query: @auth.merge({ batch_id: batch_id }) }
|
31
|
+
response = self.class.get('/entity/request', options)
|
32
|
+
if response.code == 200
|
33
|
+
return response['status']
|
34
|
+
else
|
35
|
+
fail(Vasily::Error.from_message_and_code(response.body, response.code))
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def retrieve(batch_ids)
|
40
|
+
batch_ids_query = URI.encode_www_form("batch_id" => batch_ids)
|
41
|
+
response = self.class.get('/entity/retrieve?' + batch_ids_query, { query: @auth })
|
42
|
+
if response.code == 200
|
43
|
+
return response['documents']
|
44
|
+
else
|
45
|
+
fail(Vasily::Error.from_message_and_code(response.body, response.code))
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def search(search_query)
|
50
|
+
options = { query: @auth.merge({ search_query: search_query }) }
|
51
|
+
response = self.class.get('/entity/search', options)
|
52
|
+
if response.code == 200
|
53
|
+
return response['documents']
|
54
|
+
else
|
55
|
+
fail(Vasily::Error.from_message_and_code(response.body, response.code))
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
data/lib/vasily/error.rb
ADDED
@@ -0,0 +1,90 @@
|
|
1
|
+
module Vasily
|
2
|
+
class Error < StandardError
|
3
|
+
attr_reader :code
|
4
|
+
|
5
|
+
# Raised when Textocat returns a 4xx HTTP status code
|
6
|
+
ClientError = Class.new(self)
|
7
|
+
|
8
|
+
# Raised when Textocat returns the HTTP status code 400
|
9
|
+
BadRequest = Class.new(ClientError)
|
10
|
+
|
11
|
+
# Raised when Textocat returns the HTTP status code 401
|
12
|
+
Unauthorized = Class.new(ClientError)
|
13
|
+
|
14
|
+
# Raised when Textocat returns the HTTP status code 402
|
15
|
+
LimitExceeded = Class.new(ClientError)
|
16
|
+
|
17
|
+
# Raised when Textocat returns the HTTP status code 403
|
18
|
+
Forbidden = Class.new(ClientError)
|
19
|
+
|
20
|
+
# Raised when Textocat returns the HTTP status code 404
|
21
|
+
NotFound = Class.new(ClientError)
|
22
|
+
|
23
|
+
# Raised when Textocat returns the HTTP status code 405
|
24
|
+
UnacceptableMethod = Class.new(ClientError)
|
25
|
+
|
26
|
+
# Raised when Textocat returns the HTTP status code 406
|
27
|
+
NotAcceptable = Class.new(ClientError)
|
28
|
+
|
29
|
+
# Raised when Textocat returns the HTTP status code 413
|
30
|
+
InputLimitExceeded = Class.new(ClientError)
|
31
|
+
|
32
|
+
# Raised when Textocat returns the HTTP status code 415
|
33
|
+
UnacceptableMIME = Class.new(ClientError)
|
34
|
+
|
35
|
+
# Raised when Textocat returns the HTTP status code 416
|
36
|
+
TooManyCollections = Class.new(ClientError)
|
37
|
+
|
38
|
+
# Raised when Textocat returns the HTTP status code 429
|
39
|
+
TooManyRequests = Class.new(ClientError)
|
40
|
+
|
41
|
+
# Raised when Textocat returns a 5xx HTTP status code
|
42
|
+
ServerError = Class.new(self)
|
43
|
+
|
44
|
+
# Raised when Textocat returns the HTTP status code 500
|
45
|
+
InternalServerError = Class.new(ServerError)
|
46
|
+
|
47
|
+
# Raised when Twitter returns the HTTP status code 502
|
48
|
+
BadGateway = Class.new(ServerError)
|
49
|
+
|
50
|
+
# Raised when Twitter returns the HTTP status code 503
|
51
|
+
ServiceUnavailable = Class.new(ServerError)
|
52
|
+
|
53
|
+
# Raised when Twitter returns the HTTP status code 504
|
54
|
+
GatewayTimeout = Class.new(ServerError)
|
55
|
+
|
56
|
+
ERRORS = {
|
57
|
+
400 => Vasily::Error::BadRequest,
|
58
|
+
401 => Vasily::Error::Unauthorized,
|
59
|
+
402 => Vasily::Error::LimitExceeded,
|
60
|
+
403 => Vasily::Error::Forbidden,
|
61
|
+
404 => Vasily::Error::NotFound,
|
62
|
+
405 => Vasily::Error::UnacceptableMethod,
|
63
|
+
406 => Vasily::Error::NotAcceptable,
|
64
|
+
413 => Vasily::Error::InputLimitExceeded,
|
65
|
+
415 => Vasily::Error::UnacceptableMIME,
|
66
|
+
416 => Vasily::Error::TooManyCollections,
|
67
|
+
429 => Vasily::Error::TooManyRequests,
|
68
|
+
500 => Vasily::Error::InternalServerError,
|
69
|
+
502 => Vasily::Error::BadGateway,
|
70
|
+
503 => Vasily::Error::ServiceUnavailable,
|
71
|
+
504 => Vasily::Error::GatewayTimeout,
|
72
|
+
}
|
73
|
+
|
74
|
+
class << self
|
75
|
+
def from_message_and_code(message = '', code)
|
76
|
+
klass = ERRORS[code]
|
77
|
+
if klass.nil?
|
78
|
+
nil
|
79
|
+
else
|
80
|
+
klass.new(message, code)
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
def initialize(message = '', code = nil)
|
86
|
+
super(message)
|
87
|
+
@code = code
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
data/lib/vasily/version.rb
CHANGED
data/lib/vasily.rb
CHANGED
data/vasily.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: vasily
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Fedor Nikolaev
|
@@ -38,6 +38,20 @@ dependencies:
|
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
40
|
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: httparty
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :runtime
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
41
55
|
description:
|
42
56
|
email:
|
43
57
|
- fsqcds@gmail.com
|
@@ -52,6 +66,9 @@ files:
|
|
52
66
|
- bin/console
|
53
67
|
- bin/setup
|
54
68
|
- lib/vasily.rb
|
69
|
+
- lib/vasily/client.rb
|
70
|
+
- lib/vasily/document.rb
|
71
|
+
- lib/vasily/error.rb
|
55
72
|
- lib/vasily/version.rb
|
56
73
|
- vasily.gemspec
|
57
74
|
homepage: https://github.com/fedorn/vasily
|