datasift 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2011 MediaSift Ltd
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in
11
+ all copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ THE SOFTWARE.
20
+
21
+
data/README.md ADDED
@@ -0,0 +1,38 @@
1
+ DataSift
2
+ ========
3
+
4
+ The official Ruby library for accessing the DataSift API. See http://datasift.net for full details and to sign up for an account.
5
+
6
+ The examples and tests use the username and API key in config.yml.
7
+
8
+ Install Instructions
9
+ --------------------
10
+
11
+ sudo gem install datasift
12
+
13
+ Simple example
14
+ --------------
15
+
16
+ This example looks for anything that contains the word "datasift" and simply prints the content to the screen as they come in.
17
+
18
+ ```ruby
19
+ require 'rubygems'
20
+ require 'datasift'
21
+ user = DataSift::User.new("your username", "your api_key")
22
+ definition = user.createDefinition('interaction.content contains "football"')
23
+ consumer = definition.getConsumer(DataSift::StreamConsumer::TYPE_HTTP)
24
+ consumer.consume(true) do |interaction|
25
+ if interaction
26
+ puts interaction['interaction']['content']
27
+ end
28
+ end
29
+ ```
30
+
31
+ See the DataSift documentation for full details of the data contained within each interaction: http://support.datasift.net/help/kb/rest-api/return-objects
32
+
33
+ License
34
+ -------
35
+
36
+ All code contained in this repository is Copyright 2011 MediaSift Ltd.
37
+
38
+ This code is released under the BSD license. Please see the LICENSE file for more details.
data/Rakefile ADDED
@@ -0,0 +1,36 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/testtask'
4
+ require 'rake/clean'
5
+ require 'rake/gempackagetask'
6
+ require 'rake/rdoctask'
7
+ Rake::TestTask.new(:test) do |test|
8
+ test.libs << 'lib' << 'test'
9
+ test.pattern = 'test/**/test_*.rb'
10
+ test.verbose = true
11
+ end
12
+
13
+ begin
14
+ require 'rcov/rcovtask'
15
+ Rcov::RcovTask.new do |test|
16
+ test.libs << 'test'
17
+ test.pattern = 'test/**/test_*.rb'
18
+ test.verbose = true
19
+ end
20
+ rescue LoadError
21
+ task :rcov do
22
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
23
+ end
24
+ end
25
+
26
+ require 'rdoc/task'
27
+ RDoc::Task.new do |rdoc|
28
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
29
+
30
+ rdoc.rdoc_dir = 'rdoc'
31
+ rdoc.title = "datasift #{version}"
32
+ rdoc.rdoc_files.include('README*')
33
+ rdoc.rdoc_files.include('lib/**/*.rb')
34
+ end
35
+
36
+ task :default => :test
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.2.0
data/config.yml ADDED
@@ -0,0 +1,2 @@
1
+ username: YOUR_USERNAME_HERE
2
+ api_key: YOUR_API_KEY_HERE
data/datasift.gemspec ADDED
@@ -0,0 +1,28 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = 'datasift'
6
+ s.version = File.open('VERSION').first
7
+
8
+ s.authors = ['MediaSift']
9
+ s.email = ['support@datasift.net']
10
+ s.description = %q{The official Ruby library for accessing the DataSift API. See http://datasift.net/ for full details and to sign up for an account.}
11
+ s.summary = %q{DataSit is a simple wrapper for the DataSift API.}
12
+ s.homepage = 'http://github.com/mediasift/datasift-ruby'
13
+
14
+ s.platform = Gem::Platform::RUBY
15
+ s.rubygems_version = %q{1.3.6}
16
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.3.6") if s.respond_to? :required_rubygems_version=
17
+
18
+ s.add_runtime_dependency('rest-client', '~> 1.6.3')
19
+ s.add_runtime_dependency('crack', '~> 0')
20
+ s.add_runtime_dependency('yajl-ruby', '~> 0.8.2')
21
+ s.add_development_dependency('rdoc', '~> 0')
22
+ s.add_development_dependency('shoulda', '~> 2.11.3')
23
+ s.add_development_dependency('rspec', '~> 2.6.0')
24
+
25
+ s.files = `git ls-files`.split("\n")
26
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
27
+ s.require_paths = ["lib"]
28
+ end
data/examples/cost.rb ADDED
@@ -0,0 +1,123 @@
1
+ # This example gets the cost associated with the stream given on the command
2
+ # line or piped/typed into STDIN. It presents it in a nice ASCII table.]
3
+ # Note that the CSDL must be enclosed in quotes if given on the command line.
4
+ #
5
+ # ruby cost.rb 'interaction.content contains "football"'
6
+ # or
7
+ # cat football.csdl | ruby cost.rb
8
+ #
9
+ # NB: Most of the error handling (exception catching) has been removed for
10
+ # the sake of simplicity. Nearly everything in this library may throw
11
+ # exceptions, and production code should catch them. See the documentation
12
+ # for full details.
13
+ #
14
+
15
+ # Include the DataSift library
16
+ require File.dirname(__FILE__) + '/../lib/datasift'
17
+
18
+ # Function to format a number with commas
19
+ def number_with_delimiter(number, delimiter=',')
20
+ number.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1#{delimiter}")
21
+ end
22
+
23
+ # Include the configuration - put your username and API key in this file
24
+ config = YAML::load(File.open(File.join(File.dirname(__FILE__), '..', 'config.yml')))
25
+
26
+ # Get the CSDL from the command line or STDIN
27
+ if ARGV.size > 0
28
+ csdl = ARGV[0]
29
+ else
30
+ csdl = ARGF.read
31
+ end
32
+
33
+ # Authenticate
34
+ puts 'Creating user...'
35
+ user = DataSift::User.new(config['username'], config['api_key'])
36
+
37
+ # Create the definition
38
+ puts 'Creating definition...'
39
+ definition = user.createDefinition(csdl)
40
+
41
+ # Getting cost
42
+ puts 'Getting cost...'
43
+ begin
44
+ cost = definition.getCostBreakdown()
45
+ rescue DataSift::CompileFailedError => e
46
+ puts 'CSDL compilation failed: ' + e
47
+ puts
48
+ exit!
49
+ end
50
+
51
+ costtable = []
52
+ maxlength = {'target' => 'Target'.length, 'times used' => 'Times used'.length, 'complexity' => 'Complexity'.length};
53
+ cost['costs'].each do |tgt,c|
54
+ maxlength['target'] = [maxlength['target'], tgt.length].max()
55
+ maxlength['times used'] = [maxlength['times used'], number_with_delimiter(c['count']).length].max()
56
+ maxlength['complexity'] = [maxlength['complexity'], number_with_delimiter(c['cost']).length].max()
57
+
58
+ costtable.push({
59
+ 'target' => tgt,
60
+ 'times used' => number_with_delimiter(c['count']),
61
+ 'complexity' => number_with_delimiter(c['cost']),
62
+ })
63
+
64
+ c['targets'].each do |tgt2,d|
65
+ maxlength['target'] = [maxlength['target'], 2 + tgt2.length].max()
66
+ maxlength['times used'] = [maxlength['times used'], number_with_delimiter(d['count']).length].max()
67
+ maxlength['complexity'] = [maxlength['complexity'], number_with_delimiter(d['cost']).length].max()
68
+
69
+ costtable.push({
70
+ 'target' => ' ' + tgt2,
71
+ 'times used' => number_with_delimiter(d['count']),
72
+ 'complexity' => number_with_delimiter(d['cost']),
73
+ })
74
+ end
75
+ end
76
+
77
+ maxlength['complexity'] = [maxlength['complexity'], number_with_delimiter(cost['total']).length].max()
78
+
79
+ puts
80
+ print '/-' + ('-' * maxlength['target']) + '---'
81
+ print ('-' * maxlength['times used']) + '---'
82
+ puts ('-' * maxlength['complexity']) + '-\\'
83
+
84
+ print '| ' + 'Target'.ljust(maxlength['target']) + ' | '
85
+ print 'Times Used'.ljust(maxlength['times used']) + ' | '
86
+ puts 'Complexity'.ljust(maxlength['complexity']) + ' |'
87
+
88
+ print '|-' + ('-' * maxlength['target']) + '-+-'
89
+ print ('-' * maxlength['times used']) + '-+-'
90
+ puts ('-' * maxlength['complexity']) + '-|'
91
+
92
+ costtable.each do |row|
93
+ print '| ' + row['target'].ljust(maxlength['target']) + ' | '
94
+ print row['times used'].rjust(maxlength['times used']) + ' | '
95
+ puts row['complexity'].rjust(maxlength['complexity']) + ' |'
96
+ end
97
+
98
+ print '|-' + ('-' * maxlength['target']) + '-+-'
99
+ print ('-' * maxlength['times used']) + '-+-'
100
+ puts ('-' * maxlength['complexity']) + '-|'
101
+
102
+ print '| ' + 'Total'.rjust(maxlength['target'] + 3 + maxlength['times used']) + ' = '
103
+ puts cost['total'].to_s.rjust(maxlength['complexity']) + ' |'
104
+
105
+ print '\\-' + ('-' * maxlength['target']) + '---'
106
+ print ('-' * maxlength['times used']) + '---'
107
+ puts ('-' * maxlength['complexity']) + '-/'
108
+
109
+ puts
110
+
111
+ if cost['total'] > 1000
112
+ tiernum = 3;
113
+ tierdesc = 'high complexity';
114
+ elsif cost['total'] > 100
115
+ tiernum = 2;
116
+ tierdesc = 'medium complexity';
117
+ else
118
+ tiernum = 1;
119
+ tierdesc = 'simple complexity';
120
+ end
121
+
122
+ puts 'A total cost of ' + number_with_delimiter(cost['total']) + ' puts this stream in tier ' + tiernum.to_s + ', ' + tierdesc
123
+ puts
@@ -0,0 +1,50 @@
1
+ # This example constructs a DataSift_Definition object with CSDL that looks
2
+ # for anything containing the word "football". It then sits in a loop,
3
+ # getting buffered interactions once every 10 seconds until it's retrieved
4
+ # 10.
5
+ #
6
+ # NB: Most of the error handling (exception catching) has been removed for
7
+ # the sake of simplicity. Nearly everything in this library may throw
8
+ # exceptions, and production code should catch them. See the documentation
9
+ # for full details.
10
+ #
11
+
12
+ # Include the DataSift library
13
+ require File.dirname(__FILE__) + '/../lib/datasift'
14
+
15
+ # Include the configuration - put your username and API key in this file
16
+ config = YAML::load(File.open(File.join(File.dirname(__FILE__), '..', 'config.yml')))
17
+
18
+ # Authenticate
19
+ puts 'Creating user...'
20
+ user = DataSift::User.new(config['username'], config['api_key'])
21
+
22
+ # Create the definition
23
+ csdl = 'interaction.content contains "football"'
24
+ puts 'Creating definition...'
25
+ puts ' ' + csdl
26
+ definition = user.createDefinition(csdl)
27
+
28
+ # Get buffered interactions until we've had 10
29
+ puts 'Getting buffered interactions...'
30
+ puts '--'
31
+ num = 10
32
+ from_id = false
33
+ begin
34
+ interactions = definition.getBuffered(num, from_id)
35
+ interactions.each do |interaction|
36
+ puts 'Type: ' + interaction['interaction']['type']
37
+ puts 'Content: ' + interaction['interaction']['content']
38
+ puts '--'
39
+ num -= 1
40
+ from_id = interaction['interaction']['id']
41
+ end
42
+
43
+ if num > 0
44
+ sleep(10)
45
+ end
46
+ end while num > 0
47
+
48
+ puts
49
+ puts 'Fetched 10 interactions, we\'re done.'
50
+ puts
@@ -0,0 +1,52 @@
1
+ # This example constructs a DataSift_Definition object with CSDL that looks
2
+ # for anything containing the word "football". It then gets an HTTP
3
+ # consumer for that definition and displays matching interactions to the
4
+ # screen as they come in. It will display 10 interactions and then stop.
5
+ #
6
+ # NB: Most of the error handling (exception catching) has been removed for
7
+ # the sake of simplicity. Nearly everything in this library may throw
8
+ # exceptions, and production code should catch them. See the documentation
9
+ # for full details.
10
+ #
11
+
12
+ # Include the DataSift library
13
+ require File.dirname(__FILE__) + '/../lib/datasift'
14
+
15
+ # Include the configuration - put your username and API key in this file
16
+ config = YAML::load(File.open(File.join(File.dirname(__FILE__), '..', 'config.yml')))
17
+
18
+ # Authenticate
19
+ puts 'Creating user...'
20
+ user = DataSift::User.new(config['username'], config['api_key'])
21
+
22
+ # Create the definition
23
+ csdl = 'interaction.content contains "football"'
24
+ puts 'Creating definition...'
25
+ puts ' ' + csdl
26
+ definition = user.createDefinition(csdl)
27
+
28
+ # Create the consumer
29
+ puts 'Getting the consumer...'
30
+ consumer = definition.getConsumer(DataSift::StreamConsumer::TYPE_HTTP)
31
+
32
+ # And start consuming
33
+ puts 'Consuming...'
34
+ puts '--'
35
+ count = 10
36
+ consumer.consume(true) do |interaction|
37
+ if interaction
38
+ puts 'Type: ' + interaction['interaction']['type']
39
+ puts 'Content: ' + interaction['interaction']['content']
40
+ puts '--'
41
+
42
+ count -= 1
43
+ if count == 0
44
+ puts 'Stopping consumer...'
45
+ consumer.stop()
46
+ end
47
+ end
48
+ end
49
+
50
+ puts
51
+ puts 'Finished consuming'
52
+ puts
@@ -0,0 +1,60 @@
1
+ # This example constructs a DataSift_Definition object with CSDL that looks
2
+ # for anything containing the word "football". It then gets an HTTP
3
+ # consumer for that definition and displays matching interactions to the
4
+ # screen as they come in. It will display 10 interactions and then stop.
5
+ #
6
+ # NB: Most of the error handling (exception catching) has been removed for
7
+ # the sake of simplicity. Nearly everything in this library may throw
8
+ # exceptions, and production code should catch them. See the documentation
9
+ # for full details.
10
+ #
11
+
12
+ # Make sure we have some arguments
13
+ if ARGV.size == 0
14
+ puts 'ERR: Please specify the words and/or phrases to track!'
15
+ puts
16
+ puts
17
+ exit!
18
+ end
19
+
20
+ # Include the DataSift library
21
+ require File.dirname(__FILE__) + '/../lib/datasift'
22
+
23
+ # Include the configuration - put your username and API key in this file
24
+ config = YAML::load(File.open(File.join(File.dirname(__FILE__), '..', 'config.yml')))
25
+
26
+ # Authenticate
27
+ puts 'Creating user...'
28
+ user = DataSift::User.new(config['username'], config['api_key'])
29
+
30
+ # Create the definition
31
+ csdl = 'interaction.type == "twitter" and (interaction.content contains "' + ARGV.join('" or interaction.content contains "') + '")'
32
+ puts 'Creating definition...'
33
+ puts ' ' + csdl
34
+ definition = user.createDefinition(csdl)
35
+
36
+ # Create the consumer
37
+ puts 'Getting the consumer...'
38
+ consumer = definition.getConsumer(DataSift::StreamConsumer::TYPE_HTTP)
39
+
40
+ # Setting up the onStopped handler
41
+ consumer.onStopped do |reason|
42
+ puts
43
+ puts 'Stopped: ' + reason
44
+ puts
45
+ end
46
+
47
+ # And start consuming
48
+ puts 'Consuming...'
49
+ puts '--'
50
+ consumer.consume(true) do |interaction|
51
+ if interaction
52
+ puts 'Type: ' + interaction['interaction']['type']
53
+ puts 'Content: ' + interaction['interaction']['content']
54
+ puts '--'
55
+ end
56
+ end
57
+
58
+ puts
59
+ puts 'Finished consuming'
60
+ puts
@@ -0,0 +1,79 @@
1
+ #
2
+ # apiclient.rb - This file contains the ApiClient class.
3
+ #
4
+ # Copyright (C) 2011 MediaSift Ltd
5
+ #
6
+ # == Overview
7
+ #
8
+ # The ApiClient class wraps the functionality that makes calls to the
9
+ # DataSift API.
10
+
11
+ require 'rest_client'
12
+ require 'crack'
13
+
14
+ module DataSift
15
+ # ApiCLient class.
16
+ #
17
+ # == Introduction
18
+ #
19
+ # The ApiClient class wraps the functionality that makes calls to the
20
+ # DataSift API.
21
+ #
22
+ class ApiClient
23
+ # Make a call to a DataSift API endpoint.
24
+ # === Parameters
25
+ #
26
+ # * +endpoint+ - The endpoint of the API call.
27
+ # * +params+ - The parameters to be passed along with the request.
28
+ # * +username+ - The username for the Auth header
29
+ # * +api_key+ - The API key for the Auth header
30
+ def call(username, api_key, endpoint, params = {}, user_agent = 'DataSiftPHP/0.0')
31
+ # Build the full endpoint URL
32
+ url = 'http://' + User::API_BASE_URL + endpoint + '.json?' + hashToQuerystring(params)
33
+
34
+ retval = {
35
+ 'response_code' => 500,
36
+ 'data' => { 'error' => 'Unknown error' },
37
+ 'rate_limit' => -1,
38
+ 'rate_limit_remaining' => -1,
39
+ }
40
+
41
+ begin
42
+ # Make the call
43
+ res = RestClient.get(url, { 'Auth' => username + ':' + api_key, 'User-Agent' => user_agent })
44
+
45
+ # Success
46
+ retval['response_code'] = 200
47
+
48
+ # Parse the JSON response
49
+ retval['data'] = Crack::JSON.parse(res)
50
+
51
+ # Rate limit headers
52
+ if (res.headers[:x_ratelimit_limit])
53
+ retval['rate_limit'] = res.headers[:x_ratelimit_limit]
54
+ end
55
+
56
+ if (res.headers[:x_ratelimit_remaining])
57
+ retval['rate_limit_remaining'] = res.headers[:x_ratelimit_remaining]
58
+ end
59
+ rescue RestClient::ExceptionWithResponse => err
60
+ # Set the response code
61
+ retval['response_code'] = err.http_code
62
+
63
+ # And set the data
64
+ retval['data'] = Crack::JSON.parse(err.response)
65
+ end
66
+
67
+ retval
68
+ end
69
+
70
+ private
71
+
72
+ def hashToQuerystring(hash)
73
+ hash.keys.inject('') do |query_string, key|
74
+ query_string << '&' unless key == hash.keys.first
75
+ query_string << "#{URI.encode(key.to_s)}=#{URI.encode(hash[key].to_s)}"
76
+ end
77
+ end
78
+ end
79
+ end