data_science_theater_3000 0.0.8.4 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/README.md +7 -9
- data/Rakefile +6 -0
- data/data_science_theater_3000.gemspec +4 -1
- data/lib/data_science_theater_3000.rb +17 -93
- data/lib/data_science_theater_3000/coordinates2politics.rb +53 -0
- data/lib/data_science_theater_3000/data_science_toolkit.rb +77 -0
- data/lib/data_science_theater_3000/file2text.rb +33 -0
- data/lib/data_science_theater_3000/html2story.rb +31 -0
- data/lib/data_science_theater_3000/html2text.rb +31 -0
- data/lib/data_science_theater_3000/ip2coordinates.rb +28 -0
- data/lib/data_science_theater_3000/street2coordinates.rb +28 -0
- data/lib/data_science_theater_3000/text2people.rb +28 -0
- data/lib/data_science_theater_3000/text2places.rb +26 -0
- data/lib/data_science_theater_3000/text2times.rb +31 -0
- data/lib/data_science_theater_3000/version.rb +1 -1
- data/test/data_science_toolkit_test.rb +63 -0
- data/test/file2text.txt +15 -0
- data/test/ip2coordinates.txt +10 -0
- data/test/test_helper.rb +5 -0
- data/test/text2people.txt +10 -0
- metadata +57 -8
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# data_science_theater_3000
|
2
2
|
|
3
|
+
Ruby API wrapper for the [Data Science Toolkit](http://www.datasciencetoolkit.org/)
|
3
4
|
|
4
5
|
### Installation
|
5
6
|
|
@@ -13,15 +14,12 @@
|
|
13
14
|
|
14
15
|
#### Just ...
|
15
16
|
require "data_science_theater_3000"
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
dst3k.street2coordinates(address_string)
|
23
|
-
dst3k.coordinates2politics( dst3k.ip2coordinates(ip_string) )
|
24
|
-
dst3k.coordinates2politics( dst3k.street2coordinates(address_string) )
|
17
|
+
|
18
|
+
request = DataScienceTheater3000::Ip2Coordinates.new('8.8.8.8')
|
19
|
+
> #<DataScienceTheater3000::Ip2Coordinates:0x000000019c7f60 @ip="8.8.8.8">
|
20
|
+
request.result
|
21
|
+
> {"8.8.8.8"=>{"longitude"=>-122.057403564453, "country_name"=>"United States", "postal_code"=>"94043", "region"=>"CA", "locality"=>"Mountain View", "country_code"=>"US", "dma_code"=>807, "latitude"=>37.4192008972168, "country_code3"=>"USA", "area_code"=>650}}
|
22
|
+
|
25
23
|
|
26
24
|
## Contributing
|
27
25
|
|
data/Rakefile
CHANGED
@@ -5,7 +5,7 @@ require "data_science_theater_3000/version"
|
|
5
5
|
Gem::Specification.new do |s|
|
6
6
|
s.name = "data_science_theater_3000"
|
7
7
|
s.version = DataScienceTheater3000::VERSION
|
8
|
-
s.authors = ["Tad Hosford"]
|
8
|
+
s.authors = ["Tad Hosford", "Nicholas Fine"]
|
9
9
|
s.email = ["tad@isotope11.com"]
|
10
10
|
s.homepage = "http://www.isotope11.com/blog/data-science-theater-3000-a-ruby-interface-for-data-science-toolkit"
|
11
11
|
s.summary = %q{Ruby interface to Data Science Toolkit.}
|
@@ -21,6 +21,9 @@ Gem::Specification.new do |s|
|
|
21
21
|
s.add_runtime_dependency 'active_support'
|
22
22
|
s.add_dependency 'curb'
|
23
23
|
s.add_dependency 'json'
|
24
|
+
s.add_development_dependency "minitest"
|
25
|
+
s.add_development_dependency "webmock"
|
26
|
+
s.add_development_dependency 'ruby-debug19'
|
24
27
|
|
25
28
|
# specify any dependencies here; for example:
|
26
29
|
# s.add_development_dependency "rspec"
|
@@ -1,101 +1,25 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__)))
|
2
|
+
|
1
3
|
require "active_support"
|
2
4
|
require "json"
|
3
5
|
require "curb"
|
6
|
+
require "cgi"
|
4
7
|
require "data_science_theater_3000/version"
|
5
8
|
|
6
9
|
module DataScienceTheater3000
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
if coordinates.invert.keys.include?(nil)
|
22
|
-
coordinates[ip] = { "longitude" => "unknown", "latitude" => "unknown", "country" => "unknown", "region" => "unknown", "postal_code" => "unknown" }
|
23
|
-
end
|
24
|
-
coordinates
|
25
|
-
end
|
26
|
-
|
27
|
-
# Converts a street address into a location hash
|
28
|
-
#
|
29
|
-
# @param [String, Array] address as either a single string or array of strings
|
30
|
-
# @return [Hash] hash of location information. Singular output can be passed as input to coordinates2politics
|
31
|
-
def self.street2coordinates address
|
32
|
-
url = "http://www.datasciencetoolkit.org"
|
33
|
-
if address.class == String
|
34
|
-
address.gsub!(",", "%2c"); address.gsub!(" ", "+")
|
35
|
-
elsif address.class == Array
|
36
|
-
address.each {|a| a.gsub!(",", "%2c"); a.gsub!(" ", "+") }
|
37
|
-
address = ActiveSupport::JSON.encode(address)
|
38
|
-
end
|
39
|
-
response = Curl::Easy.perform( url + "/street2coordinates/" + address ).body_str
|
40
|
-
|
41
|
-
coordinates = make_hashy(response)
|
42
|
-
end
|
43
|
-
|
44
|
-
# Uses latitude,longitude pair to find detailed political information about a location.
|
45
|
-
# Currently supporting a single pair.
|
46
|
-
#
|
47
|
-
# @param [Hash,String] coords hash returned from ip2coordinates/street2coordinates or just a string like "33,-86"
|
48
|
-
# @return [Array] contains hashes with detailed political information for a location
|
49
|
-
def self.coordinates2politics coords
|
50
|
-
url = "http://www.datasciencetoolkit.org"
|
51
|
-
if coords.class == Hash && coords.keys.count == 1
|
52
|
-
coords = coords.keys.map{ |k| "#{coords[k]["latitude"]},#{coords[k]["longitude"]}" }.first
|
53
|
-
end
|
54
|
-
coords.gsub!( "," , "%2c" )
|
55
|
-
response = Curl::Easy.perform( url + "/coordinates2politics/" + coords ).body_str
|
56
|
-
|
57
|
-
politics = make_hashy(response)
|
58
|
-
end
|
10
|
+
autoload :DataScienceToolkit, 'data_science_theater_3000/data_science_toolkit'
|
11
|
+
autoload :Ip2Coordinates, 'data_science_theater_3000/ip2coordinates'
|
12
|
+
autoload :Street2Coordinates, 'data_science_theater_3000/street2coordinates'
|
13
|
+
autoload :Coordinates2Politics, 'data_science_theater_3000/coordinates2politics'
|
14
|
+
autoload :File2Text, 'data_science_theater_3000/file2text'
|
15
|
+
autoload :Text2People, 'data_science_theater_3000/text2people'
|
16
|
+
autoload :Text2Times, 'data_science_theater_3000/text2times'
|
17
|
+
autoload :Text2Places, 'data_science_theater_3000/text2places'
|
18
|
+
autoload :Html2Text, 'data_science_theater_3000/html2text'
|
19
|
+
autoload :Html2Story, 'data_science_theater_3000/html2story'
|
20
|
+
|
21
|
+
class DataScienceTheater3000Error < StandardError; end
|
22
|
+
|
23
|
+
class File2TextError < DataScienceTheater3000Error; end
|
59
24
|
|
60
|
-
# Specify a name or names and return parsed result as well as a best-guess for gender
|
61
|
-
#
|
62
|
-
# @param [Array,String] name can be array of name strings or a single name
|
63
|
-
# @return [Hash] information related to the provided name
|
64
|
-
def self.text2people name
|
65
|
-
url = "http://www.datasciencetoolkit.org"
|
66
|
-
name = ActiveSupport::JSON.encode(name)
|
67
|
-
c = Curl::Easy.new( url + '/text2people')
|
68
|
-
c.multipart_form_post = true
|
69
|
-
c.http_post(Curl::PostField.content('body', name))
|
70
|
-
response = c.body_str
|
71
|
-
|
72
|
-
person = make_hashy(response)
|
73
|
-
end
|
74
|
-
|
75
|
-
def self.text2times text
|
76
|
-
#url = "http://www.datasciencetoolkit.org"
|
77
|
-
#response = Curl::Easy.perform( url + "/text2times/" + text ).body_str
|
78
|
-
|
79
|
-
#times = make_hashy(response)
|
80
|
-
end
|
81
|
-
|
82
|
-
# Specify path to an image file and return the discernable text.
|
83
|
-
#
|
84
|
-
# @param [String] file_path
|
85
|
-
# @return [String] text discerned from image
|
86
|
-
def self.file2text file_path
|
87
|
-
file_reg = /([^\s]+(\.(?i)(jpg|png|gif|bmp|pdf|html))$)/
|
88
|
-
raise "Does not look like path to a file: #{file_path}" if !file_reg.match(file_path)
|
89
|
-
raise "Support paths to images hosted remotely soon" if file_path.include?('http:')
|
90
|
-
url = "http://www.datasciencetoolkit.org"
|
91
|
-
c = Curl::Easy.new( url + '/file2text')
|
92
|
-
c.multipart_form_post = true
|
93
|
-
c.http_post(Curl::PostField.file('inputfile', file_path))
|
94
|
-
|
95
|
-
response = c.body_str
|
96
|
-
end
|
97
|
-
|
98
|
-
def self.make_hashy response
|
99
|
-
ActiveSupport::JSON.decode(response)
|
100
|
-
end
|
101
25
|
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module DataScienceTheater3000
|
2
|
+
class Coordinates2Politics
|
3
|
+
|
4
|
+
# @api private
|
5
|
+
attr_accessor :coordinates
|
6
|
+
|
7
|
+
# Accpets lat/lon coordinates and returns detailed political information.
|
8
|
+
#
|
9
|
+
# @param [String, Array] coordinates as either a single string or array of strings
|
10
|
+
# @api public
|
11
|
+
def initialize coordinates
|
12
|
+
if coordinates.is_a?(Array)
|
13
|
+
@coordinates = ActiveSupport::JSON.encode(coordinates.map!{|a|CGI::escape(a)}) if coordinates.is_a?(Array)
|
14
|
+
elsif coordinates.is_a?(String)
|
15
|
+
@coordinates = CGI::escape(coordinates)
|
16
|
+
elsif coordinates.is_a?(Hash)
|
17
|
+
coordinates = sanitize(coordinates)
|
18
|
+
@coordinates = ActiveSupport::JSON.encode(coordinates.map!{|a| a.map!{|b| CGI::escape(b)}}) if coordinates.is_a?(Array)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# @return [Hash]
|
23
|
+
# @api public
|
24
|
+
def result
|
25
|
+
dst_request.result
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
protected
|
30
|
+
# @api private
|
31
|
+
def dst_request
|
32
|
+
@dst_request ||= DataScienceToolkit::Request.new(request_method: 'coordinates2politics', request_params: coordinates)
|
33
|
+
end
|
34
|
+
|
35
|
+
def sanitize coordinates
|
36
|
+
# Clear out nil results
|
37
|
+
coordinates.reject!{ |k,v| v.nil? }
|
38
|
+
# Clear out any results that are not a longitude or latitude
|
39
|
+
coordinates.each{ |k,v| coordinates[k].reject!{ |k,v| !["latitude","longitude"].include?(k) }}
|
40
|
+
# Initialize an empty array to hold all the lat, long pairs
|
41
|
+
a = []
|
42
|
+
# Stuff each latitude,longitude (stringified) into the array
|
43
|
+
coordinates.each{ |k,v| coordinates[k].keys.sort.each{ |ki| a << coordinates[k][ki].to_s }}
|
44
|
+
sanitized_coordinates = []
|
45
|
+
# Group the pairs into arrays within an array
|
46
|
+
while a.length > 0
|
47
|
+
sanitized_coordinates << a.slice!(0..1)
|
48
|
+
end
|
49
|
+
|
50
|
+
sanitized_coordinates
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module DataScienceTheater3000
|
2
|
+
module DataScienceToolkit
|
3
|
+
|
4
|
+
# Performs a GET request to the datasciencetoolkit server
|
5
|
+
class Request
|
6
|
+
REQUEST_URL = "http://www.datasciencetoolkit.org"
|
7
|
+
|
8
|
+
# @api private
|
9
|
+
attr_accessor :request_method
|
10
|
+
|
11
|
+
# @api private
|
12
|
+
attr_accessor :request_params
|
13
|
+
|
14
|
+
# @api public
|
15
|
+
def initialize opts={}
|
16
|
+
@request_method = opts.fetch(:request_method){ raise DataScienceTheater3000Error, "No request_method option specified" }
|
17
|
+
@request_params = opts.fetch(:request_params){ raise DataScienceTheater3000Error, "No request_params option specified" }
|
18
|
+
end
|
19
|
+
|
20
|
+
# @api public
|
21
|
+
def result
|
22
|
+
@result ||= make_request
|
23
|
+
end
|
24
|
+
|
25
|
+
protected
|
26
|
+
# @api private
|
27
|
+
def make_request
|
28
|
+
response = Curl::Easy.perform( REQUEST_URL + "/#{request_method}/" + "#{request_params}" ).body_str
|
29
|
+
ActiveSupport::JSON.decode(response)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# Performs a POST request to the datasciencetoolkit server
|
34
|
+
# PostRequest class is always used via a subclass
|
35
|
+
class PostRequest < Request
|
36
|
+
|
37
|
+
# @api private
|
38
|
+
attr_accessor :post_key
|
39
|
+
|
40
|
+
# @api public
|
41
|
+
def initialize opts={}
|
42
|
+
super
|
43
|
+
@post_key = opts.fetch(:post_key){ raise DataScienceTheater3000Error, "No post_key option specified" }
|
44
|
+
end
|
45
|
+
|
46
|
+
protected
|
47
|
+
# Overridden in PostBodyRequest and PostFileRequest
|
48
|
+
# @api private
|
49
|
+
def make_request
|
50
|
+
false
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
class PostBodyRequest < PostRequest
|
55
|
+
protected
|
56
|
+
# @api private
|
57
|
+
def make_request
|
58
|
+
request = Curl::Easy.new(REQUEST_URL + "/#{request_method}")
|
59
|
+
request.multipart_form_post = true
|
60
|
+
request.http_post(Curl::PostField.content(post_key, request_params))
|
61
|
+
ActiveSupport::JSON.decode(request.body_str)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
class PostFileRequest < PostRequest
|
66
|
+
protected
|
67
|
+
# @api private
|
68
|
+
def make_request
|
69
|
+
request = Curl::Easy.new(REQUEST_URL + "/#{request_method}")
|
70
|
+
request.multipart_form_post = true
|
71
|
+
request.http_post(Curl::PostField.file(post_key, request_params))
|
72
|
+
request.body_str
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module DataScienceTheater3000
|
2
|
+
class File2Text
|
3
|
+
|
4
|
+
# @api private
|
5
|
+
attr_accessor :file_path
|
6
|
+
|
7
|
+
# @param [String]
|
8
|
+
# @api public
|
9
|
+
def initialize file_path
|
10
|
+
@file_path = file_path
|
11
|
+
ensure_valid_file
|
12
|
+
end
|
13
|
+
|
14
|
+
# @return [Hash]
|
15
|
+
# @api public
|
16
|
+
def result
|
17
|
+
dst_request.result
|
18
|
+
end
|
19
|
+
|
20
|
+
protected
|
21
|
+
# @api private
|
22
|
+
def ensure_valid_file
|
23
|
+
raise File2TextError, "Does not look like path to a file: #{file_path}" unless file_path.match(/([^\s]+(\.(?i)(jpg|png|gif|bmp|pdf|html))$)/)
|
24
|
+
raise File2TextError, "Remote files not supported. Coming soon" if file_path.match(/^http/)
|
25
|
+
end
|
26
|
+
|
27
|
+
# @api private
|
28
|
+
def dst_request
|
29
|
+
@dst_request ||=
|
30
|
+
DataScienceToolkit::PostFileRequest.new(request_method: 'file2text', request_params: file_path, post_key: 'inputfile')
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module DataScienceTheater3000
|
2
|
+
class Html2Story
|
3
|
+
|
4
|
+
# @api private
|
5
|
+
attr_accessor :html_string
|
6
|
+
|
7
|
+
# @param [String]
|
8
|
+
# @api public
|
9
|
+
def initialize html_string
|
10
|
+
@html_string = html_string
|
11
|
+
end
|
12
|
+
|
13
|
+
# @return [Hash]
|
14
|
+
# @api public
|
15
|
+
def result
|
16
|
+
dst_request.result
|
17
|
+
end
|
18
|
+
|
19
|
+
protected
|
20
|
+
# @api private
|
21
|
+
def dst_request
|
22
|
+
@dst_request ||=
|
23
|
+
case
|
24
|
+
when html_string.length < 7500
|
25
|
+
DataScienceToolkit::Request.new(request_method: 'html2story', request_params: html_string)
|
26
|
+
when html_string.length >= 7500
|
27
|
+
DataScienceToolkit::PostBodyRequest.new(request_method: 'html2story', request_params: html_string, post_key: 'body')
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module DataScienceTheater3000
|
2
|
+
class Html2Text
|
3
|
+
|
4
|
+
# @api private
|
5
|
+
attr_accessor :html_string
|
6
|
+
|
7
|
+
# @param [String]
|
8
|
+
# @api public
|
9
|
+
def initialize html_string
|
10
|
+
@html_string = html_string
|
11
|
+
end
|
12
|
+
|
13
|
+
# @return [Hash]
|
14
|
+
# @api public
|
15
|
+
def result
|
16
|
+
dst_request.result
|
17
|
+
end
|
18
|
+
|
19
|
+
protected
|
20
|
+
# @api private
|
21
|
+
def dst_request
|
22
|
+
@dst_request ||=
|
23
|
+
case
|
24
|
+
when html_string.length < 7500
|
25
|
+
DataScienceToolkit::Request.new(request_method: 'html2text', request_params: html_string)
|
26
|
+
when html_string.length >= 7500
|
27
|
+
DataScienceToolkit::PostBodyRequest.new(request_method: 'html2text', request_params: html_string, post_key: 'body')
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module DataScienceTheater3000
|
2
|
+
class Ip2Coordinates
|
3
|
+
|
4
|
+
# @api private
|
5
|
+
attr_accessor :ip
|
6
|
+
|
7
|
+
# Converts a ip addresses into a location hash
|
8
|
+
#
|
9
|
+
# @param [Array, String] ip Either an array of ip strings or a single ip ip to be located
|
10
|
+
# @api public
|
11
|
+
def initialize ip
|
12
|
+
@ip = CGI::escape(ip) if ip.is_a?(String)
|
13
|
+
@ip = ActiveSupport::JSON.encode(ip.map!{|a|CGI::escape(a)}) if ip.is_a?(Array)
|
14
|
+
end
|
15
|
+
|
16
|
+
# @return [Hash]
|
17
|
+
# @api public
|
18
|
+
def result
|
19
|
+
dst_request.result
|
20
|
+
end
|
21
|
+
|
22
|
+
protected
|
23
|
+
# @api private
|
24
|
+
def dst_request
|
25
|
+
@dst_request ||= DataScienceToolkit::Request.new(request_method: 'ip2coordinates', request_params: ip)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module DataScienceTheater3000
|
2
|
+
class Street2Coordinates
|
3
|
+
|
4
|
+
# @api private
|
5
|
+
attr_accessor :address
|
6
|
+
|
7
|
+
# Converts a street address into a location hash
|
8
|
+
#
|
9
|
+
# @param [String, Array] address as either a single string or array of strings
|
10
|
+
# @api public
|
11
|
+
def initialize address
|
12
|
+
@address = CGI::escape(address) if address.is_a?(String)
|
13
|
+
@address = ActiveSupport::JSON.encode(address.map!{|a|CGI::escape(a)}) if address.is_a?(Array)
|
14
|
+
end
|
15
|
+
|
16
|
+
# @return [Hash]
|
17
|
+
# @api public
|
18
|
+
def result
|
19
|
+
dst_request.result
|
20
|
+
end
|
21
|
+
|
22
|
+
protected
|
23
|
+
# @api private
|
24
|
+
def dst_request
|
25
|
+
@dst_request ||= DataScienceToolkit::Request.new(request_method: 'street2coordinates', request_params: address)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module DataScienceTheater3000
|
2
|
+
class Text2People
|
3
|
+
|
4
|
+
# @api private
|
5
|
+
attr_accessor :name
|
6
|
+
|
7
|
+
# Specify a name or names and return parsed result as well as a best-guess for gender
|
8
|
+
#
|
9
|
+
# @param [Array,String] name can be array of name strings or a single name
|
10
|
+
# @api public
|
11
|
+
def initialize name
|
12
|
+
@name = ActiveSupport::JSON.encode(name)
|
13
|
+
end
|
14
|
+
|
15
|
+
# @return [Hash] information related to the provided name(s)
|
16
|
+
# @api public
|
17
|
+
def result
|
18
|
+
dst_request.result
|
19
|
+
end
|
20
|
+
|
21
|
+
protected
|
22
|
+
# @api private
|
23
|
+
def dst_request
|
24
|
+
@dst_request ||=
|
25
|
+
DataScienceToolkit::PostBodyRequest.new(request_method: 'text2people', request_params: name, post_key: 'body')
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module DataScienceTheater3000
|
2
|
+
class Text2Places
|
3
|
+
|
4
|
+
# @api private
|
5
|
+
attr_accessor :text_string
|
6
|
+
|
7
|
+
# @param [String]
|
8
|
+
# @api public
|
9
|
+
def initialize text_string
|
10
|
+
@text_string = text_string
|
11
|
+
end
|
12
|
+
|
13
|
+
# @return [Hash]
|
14
|
+
# @api public
|
15
|
+
def result
|
16
|
+
dst_request.result
|
17
|
+
end
|
18
|
+
|
19
|
+
protected
|
20
|
+
# @api private
|
21
|
+
def dst_request
|
22
|
+
@dst_request ||=
|
23
|
+
DataScienceToolkit::Request.new(request_method: 'text2places', request_params: text_string)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module DataScienceTheater3000
|
2
|
+
class Text2Times
|
3
|
+
|
4
|
+
# @api private
|
5
|
+
attr_accessor :text_string
|
6
|
+
|
7
|
+
# @param [String]
|
8
|
+
# @api public
|
9
|
+
def initialize text_string
|
10
|
+
@text_string = text_string
|
11
|
+
end
|
12
|
+
|
13
|
+
# @return [Hash]
|
14
|
+
# @api public
|
15
|
+
def result
|
16
|
+
dst_request.result
|
17
|
+
end
|
18
|
+
|
19
|
+
protected
|
20
|
+
# @api private
|
21
|
+
def dst_request
|
22
|
+
@dst_request ||=
|
23
|
+
case
|
24
|
+
when text_string.length < 7500
|
25
|
+
DataScienceToolkit::Request.new(request_method: 'text2times', request_params: text_string)
|
26
|
+
when text_string.length >= 7500
|
27
|
+
DataScienceToolkit::PostBodyRequest.new(request_method: 'text2times', request_params: text_string, post_key: 'body')
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
require './test/test_helper'
|
2
|
+
|
3
|
+
describe DataScienceTheater3000::DataScienceToolkit::Request do
|
4
|
+
before do
|
5
|
+
stub_request(:any, /.*datasciencetoolkit.*/).to_return(File.new('./test/ip2coordinates.txt'))
|
6
|
+
end
|
7
|
+
|
8
|
+
describe '#result' do
|
9
|
+
it 'returns a hash' do
|
10
|
+
DataScienceTheater3000::DataScienceToolkit::Request.new(request_params: 'foo', request_method: 'bar').result.must_be_instance_of Hash
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
describe 'initialized without a request_method' do
|
15
|
+
it 'raises a DataScienceTheater3000Error' do
|
16
|
+
proc{ DataScienceTheater3000::DataScienceToolkit::Request.new(request_params: 'foo')}.must_raise DataScienceTheater3000::DataScienceTheater3000Error
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
describe 'initialized without request_params' do
|
21
|
+
it 'raises a DataScienceTheater3000Error' do
|
22
|
+
proc{ DataScienceTheater3000::DataScienceToolkit::Request.new(request_method: 'foo')}.must_raise DataScienceTheater3000::DataScienceTheater3000Error
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
describe DataScienceTheater3000::DataScienceToolkit::PostRequest do
|
28
|
+
describe 'initialized without a post_key' do
|
29
|
+
it 'raises a DataScienceTheater3000Error' do
|
30
|
+
proc{ DataScienceTheater3000::DataScienceToolkit::PostRequest.new(request_params: 'foo', request_method: 'bar')}.must_raise DataScienceTheater3000::DataScienceTheater3000Error
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
describe '#make_request' do
|
35
|
+
it 'returns false' do
|
36
|
+
DataScienceTheater3000::DataScienceToolkit::PostRequest.new(request_method: 'foo', request_params: 'bar', post_key: 'baz').send(:make_request).must_equal false
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
describe DataScienceTheater3000::DataScienceToolkit::PostBodyRequest do
|
42
|
+
before do
|
43
|
+
stub_request(:any, /.*datasciencetoolkit.*/).to_return(File.new('./test/text2people.txt'))
|
44
|
+
end
|
45
|
+
|
46
|
+
describe '#result' do
|
47
|
+
it 'returns a hash' do
|
48
|
+
DataScienceTheater3000::DataScienceToolkit::PostBodyRequest.new(request_method: 'bar', request_params: 'foo', post_key: 'baz').result.must_be_instance_of String
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
describe DataScienceTheater3000::DataScienceToolkit::PostFileRequest do
|
54
|
+
before do
|
55
|
+
stub_request(:any, /.*datasciencetoolkit.*/).to_return(File.new('./test/file2text.txt'))
|
56
|
+
end
|
57
|
+
|
58
|
+
describe '#result' do
|
59
|
+
it 'returns a hash' do
|
60
|
+
DataScienceTheater3000::DataScienceToolkit::PostFileRequest.new(request_method: 'bar', request_params: 'foo', post_key: 'baz').result.must_be_instance_of String
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
data/test/file2text.txt
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Date: Fri, 27 Jan 2012 21:41:01 GMT
|
3
|
+
Server: Apache/2.2.14 (Ubuntu)
|
4
|
+
X-Powered-By: Phusion Passenger (mod_rails/mod_rack) 3.0.4
|
5
|
+
Content-Disposition: attachment; filename="sample.png.txt"
|
6
|
+
Content-Length: 481
|
7
|
+
Status: 200
|
8
|
+
Vary: Accept-Encoding
|
9
|
+
Content-Type: text/plain;charset=utf-8
|
10
|
+
|
11
|
+
lf you pass in an image, this API will run an optical character recoghition algorithm to extract any words or
|
12
|
+
sentences it can from the picture. lf you upload a PDF file, Word document, Excel spreadsheet or HTML file, it
|
13
|
+
will retum a plain text version of the content. Unlike most of the calls, this one takes its input in the standard
|
14
|
+
multipart form-encoded format that’s used when browsers upload files, rather than as JSON. lt retums any content
|
15
|
+
it finds as a stream of text.
|
@@ -0,0 +1,10 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Date: Fri, 27 Jan 2012 21:36:11 GMT
|
3
|
+
Server: Apache/2.2.14 (Ubuntu)
|
4
|
+
X-Powered-By: Phusion Passenger (mod_rails/mod_rack) 3.0.4
|
5
|
+
Content-Length: 241
|
6
|
+
Status: 200
|
7
|
+
Vary: Accept-Encoding
|
8
|
+
Content-Type: text/html;charset=utf-8
|
9
|
+
|
10
|
+
{"24.73.187.202":{"longitude":-86.8066024780273,"country_name":"United States","postal_code":"35209","region":"AL","locality":"Birmingham","country_code":"US","dma_code":630,"latitude":33.4667015075684,"country_code3":"USA","area_code":205}}
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Date: Fri, 27 Jan 2012 21:36:11 GMT
|
3
|
+
Server: Apache/2.2.14 (Ubuntu)
|
4
|
+
X-Powered-By: Phusion Passenger (mod_rails/mod_rack) 3.0.4
|
5
|
+
Content-Length: 241
|
6
|
+
Status: 200
|
7
|
+
Vary: Accept-Encoding
|
8
|
+
Content-Type: text/html;charset=utf-8
|
9
|
+
|
10
|
+
"[{\"gender\":\"m\",\"first_name\":\"Nicholas\",\"title\":\"\",\"surnames\":\"Fine\",\"start_index\":92,\"end_index\":105,\"matched_string\":\"Nicholas Fine\"}]"
|
metadata
CHANGED
@@ -1,19 +1,20 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_science_theater_3000
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 1.0.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Tad Hosford
|
9
|
+
- Nicholas Fine
|
9
10
|
autorequire:
|
10
11
|
bindir: bin
|
11
12
|
cert_chain: []
|
12
|
-
date: 2012-01-
|
13
|
+
date: 2012-01-28 00:00:00.000000000Z
|
13
14
|
dependencies:
|
14
15
|
- !ruby/object:Gem::Dependency
|
15
16
|
name: active_support
|
16
|
-
requirement: &
|
17
|
+
requirement: &20571340 !ruby/object:Gem::Requirement
|
17
18
|
none: false
|
18
19
|
requirements:
|
19
20
|
- - ! '>='
|
@@ -21,10 +22,10 @@ dependencies:
|
|
21
22
|
version: '0'
|
22
23
|
type: :runtime
|
23
24
|
prerelease: false
|
24
|
-
version_requirements: *
|
25
|
+
version_requirements: *20571340
|
25
26
|
- !ruby/object:Gem::Dependency
|
26
27
|
name: curb
|
27
|
-
requirement: &
|
28
|
+
requirement: &20570440 !ruby/object:Gem::Requirement
|
28
29
|
none: false
|
29
30
|
requirements:
|
30
31
|
- - ! '>='
|
@@ -32,10 +33,10 @@ dependencies:
|
|
32
33
|
version: '0'
|
33
34
|
type: :runtime
|
34
35
|
prerelease: false
|
35
|
-
version_requirements: *
|
36
|
+
version_requirements: *20570440
|
36
37
|
- !ruby/object:Gem::Dependency
|
37
38
|
name: json
|
38
|
-
requirement: &
|
39
|
+
requirement: &20569320 !ruby/object:Gem::Requirement
|
39
40
|
none: false
|
40
41
|
requirements:
|
41
42
|
- - ! '>='
|
@@ -43,7 +44,40 @@ dependencies:
|
|
43
44
|
version: '0'
|
44
45
|
type: :runtime
|
45
46
|
prerelease: false
|
46
|
-
version_requirements: *
|
47
|
+
version_requirements: *20569320
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: minitest
|
50
|
+
requirement: &20568300 !ruby/object:Gem::Requirement
|
51
|
+
none: false
|
52
|
+
requirements:
|
53
|
+
- - ! '>='
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
type: :development
|
57
|
+
prerelease: false
|
58
|
+
version_requirements: *20568300
|
59
|
+
- !ruby/object:Gem::Dependency
|
60
|
+
name: webmock
|
61
|
+
requirement: &20567700 !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
63
|
+
requirements:
|
64
|
+
- - ! '>='
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
67
|
+
type: :development
|
68
|
+
prerelease: false
|
69
|
+
version_requirements: *20567700
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: ruby-debug19
|
72
|
+
requirement: &20567040 !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
type: :development
|
79
|
+
prerelease: false
|
80
|
+
version_requirements: *20567040
|
47
81
|
description: Ruby interface to issue Data Science Toolkit API calls.
|
48
82
|
email:
|
49
83
|
- tad@isotope11.com
|
@@ -58,7 +92,22 @@ files:
|
|
58
92
|
- data_science_theater_3000.gemspec
|
59
93
|
- doc/sample.png
|
60
94
|
- lib/data_science_theater_3000.rb
|
95
|
+
- lib/data_science_theater_3000/coordinates2politics.rb
|
96
|
+
- lib/data_science_theater_3000/data_science_toolkit.rb
|
97
|
+
- lib/data_science_theater_3000/file2text.rb
|
98
|
+
- lib/data_science_theater_3000/html2story.rb
|
99
|
+
- lib/data_science_theater_3000/html2text.rb
|
100
|
+
- lib/data_science_theater_3000/ip2coordinates.rb
|
101
|
+
- lib/data_science_theater_3000/street2coordinates.rb
|
102
|
+
- lib/data_science_theater_3000/text2people.rb
|
103
|
+
- lib/data_science_theater_3000/text2places.rb
|
104
|
+
- lib/data_science_theater_3000/text2times.rb
|
61
105
|
- lib/data_science_theater_3000/version.rb
|
106
|
+
- test/data_science_toolkit_test.rb
|
107
|
+
- test/file2text.txt
|
108
|
+
- test/ip2coordinates.txt
|
109
|
+
- test/test_helper.rb
|
110
|
+
- test/text2people.txt
|
62
111
|
homepage: http://www.isotope11.com/blog/data-science-theater-3000-a-ruby-interface-for-data-science-toolkit
|
63
112
|
licenses: []
|
64
113
|
post_install_message:
|