data_science_theater_3000 0.0.8.4 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/README.md +7 -9
- data/Rakefile +6 -0
- data/data_science_theater_3000.gemspec +4 -1
- data/lib/data_science_theater_3000.rb +17 -93
- data/lib/data_science_theater_3000/coordinates2politics.rb +53 -0
- data/lib/data_science_theater_3000/data_science_toolkit.rb +77 -0
- data/lib/data_science_theater_3000/file2text.rb +33 -0
- data/lib/data_science_theater_3000/html2story.rb +31 -0
- data/lib/data_science_theater_3000/html2text.rb +31 -0
- data/lib/data_science_theater_3000/ip2coordinates.rb +28 -0
- data/lib/data_science_theater_3000/street2coordinates.rb +28 -0
- data/lib/data_science_theater_3000/text2people.rb +28 -0
- data/lib/data_science_theater_3000/text2places.rb +26 -0
- data/lib/data_science_theater_3000/text2times.rb +31 -0
- data/lib/data_science_theater_3000/version.rb +1 -1
- data/test/data_science_toolkit_test.rb +63 -0
- data/test/file2text.txt +15 -0
- data/test/ip2coordinates.txt +10 -0
- data/test/test_helper.rb +5 -0
- data/test/text2people.txt +10 -0
- metadata +57 -8
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# data_science_theater_3000
|
2
2
|
|
3
|
+
Ruby API wrapper for the [Data Science Toolkit](http://www.datasciencetoolkit.org/)
|
3
4
|
|
4
5
|
### Installation
|
5
6
|
|
@@ -13,15 +14,12 @@
|
|
13
14
|
|
14
15
|
#### Just ...
|
15
16
|
require "data_science_theater_3000"
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
dst3k.street2coordinates(address_string)
|
23
|
-
dst3k.coordinates2politics( dst3k.ip2coordinates(ip_string) )
|
24
|
-
dst3k.coordinates2politics( dst3k.street2coordinates(address_string) )
|
17
|
+
|
18
|
+
request = DataScienceTheater3000::Ip2Coordinates.new('8.8.8.8')
|
19
|
+
> #<DataScienceTheater3000::Ip2Coordinates:0x000000019c7f60 @ip="8.8.8.8">
|
20
|
+
request.result
|
21
|
+
> {"8.8.8.8"=>{"longitude"=>-122.057403564453, "country_name"=>"United States", "postal_code"=>"94043", "region"=>"CA", "locality"=>"Mountain View", "country_code"=>"US", "dma_code"=>807, "latitude"=>37.4192008972168, "country_code3"=>"USA", "area_code"=>650}}
|
22
|
+
|
25
23
|
|
26
24
|
## Contributing
|
27
25
|
|
data/Rakefile
CHANGED
@@ -5,7 +5,7 @@ require "data_science_theater_3000/version"
|
|
5
5
|
Gem::Specification.new do |s|
|
6
6
|
s.name = "data_science_theater_3000"
|
7
7
|
s.version = DataScienceTheater3000::VERSION
|
8
|
-
s.authors = ["Tad Hosford"]
|
8
|
+
s.authors = ["Tad Hosford", "Nicholas Fine"]
|
9
9
|
s.email = ["tad@isotope11.com"]
|
10
10
|
s.homepage = "http://www.isotope11.com/blog/data-science-theater-3000-a-ruby-interface-for-data-science-toolkit"
|
11
11
|
s.summary = %q{Ruby interface to Data Science Toolkit.}
|
@@ -21,6 +21,9 @@ Gem::Specification.new do |s|
|
|
21
21
|
s.add_runtime_dependency 'active_support'
|
22
22
|
s.add_dependency 'curb'
|
23
23
|
s.add_dependency 'json'
|
24
|
+
s.add_development_dependency "minitest"
|
25
|
+
s.add_development_dependency "webmock"
|
26
|
+
s.add_development_dependency 'ruby-debug19'
|
24
27
|
|
25
28
|
# specify any dependencies here; for example:
|
26
29
|
# s.add_development_dependency "rspec"
|
@@ -1,101 +1,25 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.expand_path(File.dirname(__FILE__)))
|
2
|
+
|
1
3
|
require "active_support"
|
2
4
|
require "json"
|
3
5
|
require "curb"
|
6
|
+
require "cgi"
|
4
7
|
require "data_science_theater_3000/version"
|
5
8
|
|
6
9
|
module DataScienceTheater3000
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
if coordinates.invert.keys.include?(nil)
|
22
|
-
coordinates[ip] = { "longitude" => "unknown", "latitude" => "unknown", "country" => "unknown", "region" => "unknown", "postal_code" => "unknown" }
|
23
|
-
end
|
24
|
-
coordinates
|
25
|
-
end
|
26
|
-
|
27
|
-
# Converts a street address into a location hash
|
28
|
-
#
|
29
|
-
# @param [String, Array] address as either a single string or array of strings
|
30
|
-
# @return [Hash] hash of location information. Singular output can be passed as input to coordinates2politics
|
31
|
-
def self.street2coordinates address
|
32
|
-
url = "http://www.datasciencetoolkit.org"
|
33
|
-
if address.class == String
|
34
|
-
address.gsub!(",", "%2c"); address.gsub!(" ", "+")
|
35
|
-
elsif address.class == Array
|
36
|
-
address.each {|a| a.gsub!(",", "%2c"); a.gsub!(" ", "+") }
|
37
|
-
address = ActiveSupport::JSON.encode(address)
|
38
|
-
end
|
39
|
-
response = Curl::Easy.perform( url + "/street2coordinates/" + address ).body_str
|
40
|
-
|
41
|
-
coordinates = make_hashy(response)
|
42
|
-
end
|
43
|
-
|
44
|
-
# Uses latitude,longitude pair to find detailed political information about a location.
|
45
|
-
# Currently supporting a single pair.
|
46
|
-
#
|
47
|
-
# @param [Hash,String] coords hash returned from ip2coordinates/street2coordinates or just a string like "33,-86"
|
48
|
-
# @return [Array] contains hashes with detailed political information for a location
|
49
|
-
def self.coordinates2politics coords
|
50
|
-
url = "http://www.datasciencetoolkit.org"
|
51
|
-
if coords.class == Hash && coords.keys.count == 1
|
52
|
-
coords = coords.keys.map{ |k| "#{coords[k]["latitude"]},#{coords[k]["longitude"]}" }.first
|
53
|
-
end
|
54
|
-
coords.gsub!( "," , "%2c" )
|
55
|
-
response = Curl::Easy.perform( url + "/coordinates2politics/" + coords ).body_str
|
56
|
-
|
57
|
-
politics = make_hashy(response)
|
58
|
-
end
|
10
|
+
autoload :DataScienceToolkit, 'data_science_theater_3000/data_science_toolkit'
|
11
|
+
autoload :Ip2Coordinates, 'data_science_theater_3000/ip2coordinates'
|
12
|
+
autoload :Street2Coordinates, 'data_science_theater_3000/street2coordinates'
|
13
|
+
autoload :Coordinates2Politics, 'data_science_theater_3000/coordinates2politics'
|
14
|
+
autoload :File2Text, 'data_science_theater_3000/file2text'
|
15
|
+
autoload :Text2People, 'data_science_theater_3000/text2people'
|
16
|
+
autoload :Text2Times, 'data_science_theater_3000/text2times'
|
17
|
+
autoload :Text2Places, 'data_science_theater_3000/text2places'
|
18
|
+
autoload :Html2Text, 'data_science_theater_3000/html2text'
|
19
|
+
autoload :Html2Story, 'data_science_theater_3000/html2story'
|
20
|
+
|
21
|
+
class DataScienceTheater3000Error < StandardError; end
|
22
|
+
|
23
|
+
class File2TextError < DataScienceTheater3000Error; end
|
59
24
|
|
60
|
-
# Specify a name or names and return parsed result as well as a best-guess for gender
|
61
|
-
#
|
62
|
-
# @param [Array,String] name can be array of name strings or a single name
|
63
|
-
# @return [Hash] information related to the provided name
|
64
|
-
def self.text2people name
|
65
|
-
url = "http://www.datasciencetoolkit.org"
|
66
|
-
name = ActiveSupport::JSON.encode(name)
|
67
|
-
c = Curl::Easy.new( url + '/text2people')
|
68
|
-
c.multipart_form_post = true
|
69
|
-
c.http_post(Curl::PostField.content('body', name))
|
70
|
-
response = c.body_str
|
71
|
-
|
72
|
-
person = make_hashy(response)
|
73
|
-
end
|
74
|
-
|
75
|
-
def self.text2times text
|
76
|
-
#url = "http://www.datasciencetoolkit.org"
|
77
|
-
#response = Curl::Easy.perform( url + "/text2times/" + text ).body_str
|
78
|
-
|
79
|
-
#times = make_hashy(response)
|
80
|
-
end
|
81
|
-
|
82
|
-
# Specify path to an image file and return the discernable text.
|
83
|
-
#
|
84
|
-
# @param [String] file_path
|
85
|
-
# @return [String] text discerned from image
|
86
|
-
def self.file2text file_path
|
87
|
-
file_reg = /([^\s]+(\.(?i)(jpg|png|gif|bmp|pdf|html))$)/
|
88
|
-
raise "Does not look like path to a file: #{file_path}" if !file_reg.match(file_path)
|
89
|
-
raise "Support paths to images hosted remotely soon" if file_path.include?('http:')
|
90
|
-
url = "http://www.datasciencetoolkit.org"
|
91
|
-
c = Curl::Easy.new( url + '/file2text')
|
92
|
-
c.multipart_form_post = true
|
93
|
-
c.http_post(Curl::PostField.file('inputfile', file_path))
|
94
|
-
|
95
|
-
response = c.body_str
|
96
|
-
end
|
97
|
-
|
98
|
-
def self.make_hashy response
|
99
|
-
ActiveSupport::JSON.decode(response)
|
100
|
-
end
|
101
25
|
end
|
@@ -0,0 +1,53 @@
|
|
1
|
+
module DataScienceTheater3000
|
2
|
+
class Coordinates2Politics
|
3
|
+
|
4
|
+
# @api private
|
5
|
+
attr_accessor :coordinates
|
6
|
+
|
7
|
+
# Accpets lat/lon coordinates and returns detailed political information.
|
8
|
+
#
|
9
|
+
# @param [String, Array] coordinates as either a single string or array of strings
|
10
|
+
# @api public
|
11
|
+
def initialize coordinates
|
12
|
+
if coordinates.is_a?(Array)
|
13
|
+
@coordinates = ActiveSupport::JSON.encode(coordinates.map!{|a|CGI::escape(a)}) if coordinates.is_a?(Array)
|
14
|
+
elsif coordinates.is_a?(String)
|
15
|
+
@coordinates = CGI::escape(coordinates)
|
16
|
+
elsif coordinates.is_a?(Hash)
|
17
|
+
coordinates = sanitize(coordinates)
|
18
|
+
@coordinates = ActiveSupport::JSON.encode(coordinates.map!{|a| a.map!{|b| CGI::escape(b)}}) if coordinates.is_a?(Array)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# @return [Hash]
|
23
|
+
# @api public
|
24
|
+
def result
|
25
|
+
dst_request.result
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
protected
|
30
|
+
# @api private
|
31
|
+
def dst_request
|
32
|
+
@dst_request ||= DataScienceToolkit::Request.new(request_method: 'coordinates2politics', request_params: coordinates)
|
33
|
+
end
|
34
|
+
|
35
|
+
def sanitize coordinates
|
36
|
+
# Clear out nil results
|
37
|
+
coordinates.reject!{ |k,v| v.nil? }
|
38
|
+
# Clear out any results that are not a longitude or latitude
|
39
|
+
coordinates.each{ |k,v| coordinates[k].reject!{ |k,v| !["latitude","longitude"].include?(k) }}
|
40
|
+
# Initialize an empty array to hold all the lat, long pairs
|
41
|
+
a = []
|
42
|
+
# Stuff each latitude,longitude (stringified) into the array
|
43
|
+
coordinates.each{ |k,v| coordinates[k].keys.sort.each{ |ki| a << coordinates[k][ki].to_s }}
|
44
|
+
sanitized_coordinates = []
|
45
|
+
# Group the pairs into arrays within an array
|
46
|
+
while a.length > 0
|
47
|
+
sanitized_coordinates << a.slice!(0..1)
|
48
|
+
end
|
49
|
+
|
50
|
+
sanitized_coordinates
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module DataScienceTheater3000
|
2
|
+
module DataScienceToolkit
|
3
|
+
|
4
|
+
# Performs a GET request to the datasciencetoolkit server
|
5
|
+
class Request
|
6
|
+
REQUEST_URL = "http://www.datasciencetoolkit.org"
|
7
|
+
|
8
|
+
# @api private
|
9
|
+
attr_accessor :request_method
|
10
|
+
|
11
|
+
# @api private
|
12
|
+
attr_accessor :request_params
|
13
|
+
|
14
|
+
# @api public
|
15
|
+
def initialize opts={}
|
16
|
+
@request_method = opts.fetch(:request_method){ raise DataScienceTheater3000Error, "No request_method option specified" }
|
17
|
+
@request_params = opts.fetch(:request_params){ raise DataScienceTheater3000Error, "No request_params option specified" }
|
18
|
+
end
|
19
|
+
|
20
|
+
# @api public
|
21
|
+
def result
|
22
|
+
@result ||= make_request
|
23
|
+
end
|
24
|
+
|
25
|
+
protected
|
26
|
+
# @api private
|
27
|
+
def make_request
|
28
|
+
response = Curl::Easy.perform( REQUEST_URL + "/#{request_method}/" + "#{request_params}" ).body_str
|
29
|
+
ActiveSupport::JSON.decode(response)
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
# Performs a POST request to the datasciencetoolkit server
|
34
|
+
# PostRequest class is always used via a subclass
|
35
|
+
class PostRequest < Request
|
36
|
+
|
37
|
+
# @api private
|
38
|
+
attr_accessor :post_key
|
39
|
+
|
40
|
+
# @api public
|
41
|
+
def initialize opts={}
|
42
|
+
super
|
43
|
+
@post_key = opts.fetch(:post_key){ raise DataScienceTheater3000Error, "No post_key option specified" }
|
44
|
+
end
|
45
|
+
|
46
|
+
protected
|
47
|
+
# Overridden in PostBodyRequest and PostFileRequest
|
48
|
+
# @api private
|
49
|
+
def make_request
|
50
|
+
false
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
class PostBodyRequest < PostRequest
|
55
|
+
protected
|
56
|
+
# @api private
|
57
|
+
def make_request
|
58
|
+
request = Curl::Easy.new(REQUEST_URL + "/#{request_method}")
|
59
|
+
request.multipart_form_post = true
|
60
|
+
request.http_post(Curl::PostField.content(post_key, request_params))
|
61
|
+
ActiveSupport::JSON.decode(request.body_str)
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
class PostFileRequest < PostRequest
|
66
|
+
protected
|
67
|
+
# @api private
|
68
|
+
def make_request
|
69
|
+
request = Curl::Easy.new(REQUEST_URL + "/#{request_method}")
|
70
|
+
request.multipart_form_post = true
|
71
|
+
request.http_post(Curl::PostField.file(post_key, request_params))
|
72
|
+
request.body_str
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module DataScienceTheater3000
|
2
|
+
class File2Text
|
3
|
+
|
4
|
+
# @api private
|
5
|
+
attr_accessor :file_path
|
6
|
+
|
7
|
+
# @param [String]
|
8
|
+
# @api public
|
9
|
+
def initialize file_path
|
10
|
+
@file_path = file_path
|
11
|
+
ensure_valid_file
|
12
|
+
end
|
13
|
+
|
14
|
+
# @return [Hash]
|
15
|
+
# @api public
|
16
|
+
def result
|
17
|
+
dst_request.result
|
18
|
+
end
|
19
|
+
|
20
|
+
protected
|
21
|
+
# @api private
|
22
|
+
def ensure_valid_file
|
23
|
+
raise File2TextError, "Does not look like path to a file: #{file_path}" unless file_path.match(/([^\s]+(\.(?i)(jpg|png|gif|bmp|pdf|html))$)/)
|
24
|
+
raise File2TextError, "Remote files not supported. Coming soon" if file_path.match(/^http/)
|
25
|
+
end
|
26
|
+
|
27
|
+
# @api private
|
28
|
+
def dst_request
|
29
|
+
@dst_request ||=
|
30
|
+
DataScienceToolkit::PostFileRequest.new(request_method: 'file2text', request_params: file_path, post_key: 'inputfile')
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module DataScienceTheater3000
|
2
|
+
class Html2Story
|
3
|
+
|
4
|
+
# @api private
|
5
|
+
attr_accessor :html_string
|
6
|
+
|
7
|
+
# @param [String]
|
8
|
+
# @api public
|
9
|
+
def initialize html_string
|
10
|
+
@html_string = html_string
|
11
|
+
end
|
12
|
+
|
13
|
+
# @return [Hash]
|
14
|
+
# @api public
|
15
|
+
def result
|
16
|
+
dst_request.result
|
17
|
+
end
|
18
|
+
|
19
|
+
protected
|
20
|
+
# @api private
|
21
|
+
def dst_request
|
22
|
+
@dst_request ||=
|
23
|
+
case
|
24
|
+
when html_string.length < 7500
|
25
|
+
DataScienceToolkit::Request.new(request_method: 'html2story', request_params: html_string)
|
26
|
+
when html_string.length >= 7500
|
27
|
+
DataScienceToolkit::PostBodyRequest.new(request_method: 'html2story', request_params: html_string, post_key: 'body')
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module DataScienceTheater3000
|
2
|
+
class Html2Text
|
3
|
+
|
4
|
+
# @api private
|
5
|
+
attr_accessor :html_string
|
6
|
+
|
7
|
+
# @param [String]
|
8
|
+
# @api public
|
9
|
+
def initialize html_string
|
10
|
+
@html_string = html_string
|
11
|
+
end
|
12
|
+
|
13
|
+
# @return [Hash]
|
14
|
+
# @api public
|
15
|
+
def result
|
16
|
+
dst_request.result
|
17
|
+
end
|
18
|
+
|
19
|
+
protected
|
20
|
+
# @api private
|
21
|
+
def dst_request
|
22
|
+
@dst_request ||=
|
23
|
+
case
|
24
|
+
when html_string.length < 7500
|
25
|
+
DataScienceToolkit::Request.new(request_method: 'html2text', request_params: html_string)
|
26
|
+
when html_string.length >= 7500
|
27
|
+
DataScienceToolkit::PostBodyRequest.new(request_method: 'html2text', request_params: html_string, post_key: 'body')
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module DataScienceTheater3000
|
2
|
+
class Ip2Coordinates
|
3
|
+
|
4
|
+
# @api private
|
5
|
+
attr_accessor :ip
|
6
|
+
|
7
|
+
# Converts a ip addresses into a location hash
|
8
|
+
#
|
9
|
+
# @param [Array, String] ip Either an array of ip strings or a single ip ip to be located
|
10
|
+
# @api public
|
11
|
+
def initialize ip
|
12
|
+
@ip = CGI::escape(ip) if ip.is_a?(String)
|
13
|
+
@ip = ActiveSupport::JSON.encode(ip.map!{|a|CGI::escape(a)}) if ip.is_a?(Array)
|
14
|
+
end
|
15
|
+
|
16
|
+
# @return [Hash]
|
17
|
+
# @api public
|
18
|
+
def result
|
19
|
+
dst_request.result
|
20
|
+
end
|
21
|
+
|
22
|
+
protected
|
23
|
+
# @api private
|
24
|
+
def dst_request
|
25
|
+
@dst_request ||= DataScienceToolkit::Request.new(request_method: 'ip2coordinates', request_params: ip)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module DataScienceTheater3000
|
2
|
+
class Street2Coordinates
|
3
|
+
|
4
|
+
# @api private
|
5
|
+
attr_accessor :address
|
6
|
+
|
7
|
+
# Converts a street address into a location hash
|
8
|
+
#
|
9
|
+
# @param [String, Array] address as either a single string or array of strings
|
10
|
+
# @api public
|
11
|
+
def initialize address
|
12
|
+
@address = CGI::escape(address) if address.is_a?(String)
|
13
|
+
@address = ActiveSupport::JSON.encode(address.map!{|a|CGI::escape(a)}) if address.is_a?(Array)
|
14
|
+
end
|
15
|
+
|
16
|
+
# @return [Hash]
|
17
|
+
# @api public
|
18
|
+
def result
|
19
|
+
dst_request.result
|
20
|
+
end
|
21
|
+
|
22
|
+
protected
|
23
|
+
# @api private
|
24
|
+
def dst_request
|
25
|
+
@dst_request ||= DataScienceToolkit::Request.new(request_method: 'street2coordinates', request_params: address)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module DataScienceTheater3000
|
2
|
+
class Text2People
|
3
|
+
|
4
|
+
# @api private
|
5
|
+
attr_accessor :name
|
6
|
+
|
7
|
+
# Specify a name or names and return parsed result as well as a best-guess for gender
|
8
|
+
#
|
9
|
+
# @param [Array,String] name can be array of name strings or a single name
|
10
|
+
# @api public
|
11
|
+
def initialize name
|
12
|
+
@name = ActiveSupport::JSON.encode(name)
|
13
|
+
end
|
14
|
+
|
15
|
+
# @return [Hash] information related to the provided name(s)
|
16
|
+
# @api public
|
17
|
+
def result
|
18
|
+
dst_request.result
|
19
|
+
end
|
20
|
+
|
21
|
+
protected
|
22
|
+
# @api private
|
23
|
+
def dst_request
|
24
|
+
@dst_request ||=
|
25
|
+
DataScienceToolkit::PostBodyRequest.new(request_method: 'text2people', request_params: name, post_key: 'body')
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module DataScienceTheater3000
|
2
|
+
class Text2Places
|
3
|
+
|
4
|
+
# @api private
|
5
|
+
attr_accessor :text_string
|
6
|
+
|
7
|
+
# @param [String]
|
8
|
+
# @api public
|
9
|
+
def initialize text_string
|
10
|
+
@text_string = text_string
|
11
|
+
end
|
12
|
+
|
13
|
+
# @return [Hash]
|
14
|
+
# @api public
|
15
|
+
def result
|
16
|
+
dst_request.result
|
17
|
+
end
|
18
|
+
|
19
|
+
protected
|
20
|
+
# @api private
|
21
|
+
def dst_request
|
22
|
+
@dst_request ||=
|
23
|
+
DataScienceToolkit::Request.new(request_method: 'text2places', request_params: text_string)
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module DataScienceTheater3000
|
2
|
+
class Text2Times
|
3
|
+
|
4
|
+
# @api private
|
5
|
+
attr_accessor :text_string
|
6
|
+
|
7
|
+
# @param [String]
|
8
|
+
# @api public
|
9
|
+
def initialize text_string
|
10
|
+
@text_string = text_string
|
11
|
+
end
|
12
|
+
|
13
|
+
# @return [Hash]
|
14
|
+
# @api public
|
15
|
+
def result
|
16
|
+
dst_request.result
|
17
|
+
end
|
18
|
+
|
19
|
+
protected
|
20
|
+
# @api private
|
21
|
+
def dst_request
|
22
|
+
@dst_request ||=
|
23
|
+
case
|
24
|
+
when text_string.length < 7500
|
25
|
+
DataScienceToolkit::Request.new(request_method: 'text2times', request_params: text_string)
|
26
|
+
when text_string.length >= 7500
|
27
|
+
DataScienceToolkit::PostBodyRequest.new(request_method: 'text2times', request_params: text_string, post_key: 'body')
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,63 @@
|
|
1
|
+
require './test/test_helper'
|
2
|
+
|
3
|
+
describe DataScienceTheater3000::DataScienceToolkit::Request do
|
4
|
+
before do
|
5
|
+
stub_request(:any, /.*datasciencetoolkit.*/).to_return(File.new('./test/ip2coordinates.txt'))
|
6
|
+
end
|
7
|
+
|
8
|
+
describe '#result' do
|
9
|
+
it 'returns a hash' do
|
10
|
+
DataScienceTheater3000::DataScienceToolkit::Request.new(request_params: 'foo', request_method: 'bar').result.must_be_instance_of Hash
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
describe 'initialized without a request_method' do
|
15
|
+
it 'raises a DataScienceTheater3000Error' do
|
16
|
+
proc{ DataScienceTheater3000::DataScienceToolkit::Request.new(request_params: 'foo')}.must_raise DataScienceTheater3000::DataScienceTheater3000Error
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
describe 'initialized without request_params' do
|
21
|
+
it 'raises a DataScienceTheater3000Error' do
|
22
|
+
proc{ DataScienceTheater3000::DataScienceToolkit::Request.new(request_method: 'foo')}.must_raise DataScienceTheater3000::DataScienceTheater3000Error
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
describe DataScienceTheater3000::DataScienceToolkit::PostRequest do
|
28
|
+
describe 'initialized without a post_key' do
|
29
|
+
it 'raises a DataScienceTheater3000Error' do
|
30
|
+
proc{ DataScienceTheater3000::DataScienceToolkit::PostRequest.new(request_params: 'foo', request_method: 'bar')}.must_raise DataScienceTheater3000::DataScienceTheater3000Error
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
describe '#make_request' do
|
35
|
+
it 'returns false' do
|
36
|
+
DataScienceTheater3000::DataScienceToolkit::PostRequest.new(request_method: 'foo', request_params: 'bar', post_key: 'baz').send(:make_request).must_equal false
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
describe DataScienceTheater3000::DataScienceToolkit::PostBodyRequest do
|
42
|
+
before do
|
43
|
+
stub_request(:any, /.*datasciencetoolkit.*/).to_return(File.new('./test/text2people.txt'))
|
44
|
+
end
|
45
|
+
|
46
|
+
describe '#result' do
|
47
|
+
it 'returns a hash' do
|
48
|
+
DataScienceTheater3000::DataScienceToolkit::PostBodyRequest.new(request_method: 'bar', request_params: 'foo', post_key: 'baz').result.must_be_instance_of String
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
describe DataScienceTheater3000::DataScienceToolkit::PostFileRequest do
|
54
|
+
before do
|
55
|
+
stub_request(:any, /.*datasciencetoolkit.*/).to_return(File.new('./test/file2text.txt'))
|
56
|
+
end
|
57
|
+
|
58
|
+
describe '#result' do
|
59
|
+
it 'returns a hash' do
|
60
|
+
DataScienceTheater3000::DataScienceToolkit::PostFileRequest.new(request_method: 'bar', request_params: 'foo', post_key: 'baz').result.must_be_instance_of String
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
data/test/file2text.txt
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Date: Fri, 27 Jan 2012 21:41:01 GMT
|
3
|
+
Server: Apache/2.2.14 (Ubuntu)
|
4
|
+
X-Powered-By: Phusion Passenger (mod_rails/mod_rack) 3.0.4
|
5
|
+
Content-Disposition: attachment; filename="sample.png.txt"
|
6
|
+
Content-Length: 481
|
7
|
+
Status: 200
|
8
|
+
Vary: Accept-Encoding
|
9
|
+
Content-Type: text/plain;charset=utf-8
|
10
|
+
|
11
|
+
lf you pass in an image, this API will run an optical character recoghition algorithm to extract any words or
|
12
|
+
sentences it can from the picture. lf you upload a PDF file, Word document, Excel spreadsheet or HTML file, it
|
13
|
+
will retum a plain text version of the content. Unlike most of the calls, this one takes its input in the standard
|
14
|
+
multipart form-encoded format that’s used when browsers upload files, rather than as JSON. lt retums any content
|
15
|
+
it finds as a stream of text.
|
@@ -0,0 +1,10 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Date: Fri, 27 Jan 2012 21:36:11 GMT
|
3
|
+
Server: Apache/2.2.14 (Ubuntu)
|
4
|
+
X-Powered-By: Phusion Passenger (mod_rails/mod_rack) 3.0.4
|
5
|
+
Content-Length: 241
|
6
|
+
Status: 200
|
7
|
+
Vary: Accept-Encoding
|
8
|
+
Content-Type: text/html;charset=utf-8
|
9
|
+
|
10
|
+
{"24.73.187.202":{"longitude":-86.8066024780273,"country_name":"United States","postal_code":"35209","region":"AL","locality":"Birmingham","country_code":"US","dma_code":630,"latitude":33.4667015075684,"country_code3":"USA","area_code":205}}
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
HTTP/1.1 200 OK
|
2
|
+
Date: Fri, 27 Jan 2012 21:36:11 GMT
|
3
|
+
Server: Apache/2.2.14 (Ubuntu)
|
4
|
+
X-Powered-By: Phusion Passenger (mod_rails/mod_rack) 3.0.4
|
5
|
+
Content-Length: 241
|
6
|
+
Status: 200
|
7
|
+
Vary: Accept-Encoding
|
8
|
+
Content-Type: text/html;charset=utf-8
|
9
|
+
|
10
|
+
"[{\"gender\":\"m\",\"first_name\":\"Nicholas\",\"title\":\"\",\"surnames\":\"Fine\",\"start_index\":92,\"end_index\":105,\"matched_string\":\"Nicholas Fine\"}]"
|
metadata
CHANGED
@@ -1,19 +1,20 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_science_theater_3000
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 1.0.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Tad Hosford
|
9
|
+
- Nicholas Fine
|
9
10
|
autorequire:
|
10
11
|
bindir: bin
|
11
12
|
cert_chain: []
|
12
|
-
date: 2012-01-
|
13
|
+
date: 2012-01-28 00:00:00.000000000Z
|
13
14
|
dependencies:
|
14
15
|
- !ruby/object:Gem::Dependency
|
15
16
|
name: active_support
|
16
|
-
requirement: &
|
17
|
+
requirement: &20571340 !ruby/object:Gem::Requirement
|
17
18
|
none: false
|
18
19
|
requirements:
|
19
20
|
- - ! '>='
|
@@ -21,10 +22,10 @@ dependencies:
|
|
21
22
|
version: '0'
|
22
23
|
type: :runtime
|
23
24
|
prerelease: false
|
24
|
-
version_requirements: *
|
25
|
+
version_requirements: *20571340
|
25
26
|
- !ruby/object:Gem::Dependency
|
26
27
|
name: curb
|
27
|
-
requirement: &
|
28
|
+
requirement: &20570440 !ruby/object:Gem::Requirement
|
28
29
|
none: false
|
29
30
|
requirements:
|
30
31
|
- - ! '>='
|
@@ -32,10 +33,10 @@ dependencies:
|
|
32
33
|
version: '0'
|
33
34
|
type: :runtime
|
34
35
|
prerelease: false
|
35
|
-
version_requirements: *
|
36
|
+
version_requirements: *20570440
|
36
37
|
- !ruby/object:Gem::Dependency
|
37
38
|
name: json
|
38
|
-
requirement: &
|
39
|
+
requirement: &20569320 !ruby/object:Gem::Requirement
|
39
40
|
none: false
|
40
41
|
requirements:
|
41
42
|
- - ! '>='
|
@@ -43,7 +44,40 @@ dependencies:
|
|
43
44
|
version: '0'
|
44
45
|
type: :runtime
|
45
46
|
prerelease: false
|
46
|
-
version_requirements: *
|
47
|
+
version_requirements: *20569320
|
48
|
+
- !ruby/object:Gem::Dependency
|
49
|
+
name: minitest
|
50
|
+
requirement: &20568300 !ruby/object:Gem::Requirement
|
51
|
+
none: false
|
52
|
+
requirements:
|
53
|
+
- - ! '>='
|
54
|
+
- !ruby/object:Gem::Version
|
55
|
+
version: '0'
|
56
|
+
type: :development
|
57
|
+
prerelease: false
|
58
|
+
version_requirements: *20568300
|
59
|
+
- !ruby/object:Gem::Dependency
|
60
|
+
name: webmock
|
61
|
+
requirement: &20567700 !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
63
|
+
requirements:
|
64
|
+
- - ! '>='
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
67
|
+
type: :development
|
68
|
+
prerelease: false
|
69
|
+
version_requirements: *20567700
|
70
|
+
- !ruby/object:Gem::Dependency
|
71
|
+
name: ruby-debug19
|
72
|
+
requirement: &20567040 !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
type: :development
|
79
|
+
prerelease: false
|
80
|
+
version_requirements: *20567040
|
47
81
|
description: Ruby interface to issue Data Science Toolkit API calls.
|
48
82
|
email:
|
49
83
|
- tad@isotope11.com
|
@@ -58,7 +92,22 @@ files:
|
|
58
92
|
- data_science_theater_3000.gemspec
|
59
93
|
- doc/sample.png
|
60
94
|
- lib/data_science_theater_3000.rb
|
95
|
+
- lib/data_science_theater_3000/coordinates2politics.rb
|
96
|
+
- lib/data_science_theater_3000/data_science_toolkit.rb
|
97
|
+
- lib/data_science_theater_3000/file2text.rb
|
98
|
+
- lib/data_science_theater_3000/html2story.rb
|
99
|
+
- lib/data_science_theater_3000/html2text.rb
|
100
|
+
- lib/data_science_theater_3000/ip2coordinates.rb
|
101
|
+
- lib/data_science_theater_3000/street2coordinates.rb
|
102
|
+
- lib/data_science_theater_3000/text2people.rb
|
103
|
+
- lib/data_science_theater_3000/text2places.rb
|
104
|
+
- lib/data_science_theater_3000/text2times.rb
|
61
105
|
- lib/data_science_theater_3000/version.rb
|
106
|
+
- test/data_science_toolkit_test.rb
|
107
|
+
- test/file2text.txt
|
108
|
+
- test/ip2coordinates.txt
|
109
|
+
- test/test_helper.rb
|
110
|
+
- test/text2people.txt
|
62
111
|
homepage: http://www.isotope11.com/blog/data-science-theater-3000-a-ruby-interface-for-data-science-toolkit
|
63
112
|
licenses: []
|
64
113
|
post_install_message:
|