datasift 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ Copyright (c) 2011 MediaSift Ltd
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining a copy
4
+ of this software and associated documentation files (the "Software"), to deal
5
+ in the Software without restriction, including without limitation the rights
6
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
7
+ copies of the Software, and to permit persons to whom the Software is
8
+ furnished to do so, subject to the following conditions:
9
+
10
+ The above copyright notice and this permission notice shall be included in
11
+ all copies or substantial portions of the Software.
12
+
13
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19
+ THE SOFTWARE.
20
+
21
+
data/README.md ADDED
@@ -0,0 +1,38 @@
1
+ DataSift
2
+ ========
3
+
4
+ The official Ruby library for accessing the DataSift API. See http://datasift.net for full details and to sign up for an account.
5
+
6
+ The examples and tests use the username and API key in config.yml.
7
+
8
+ Install Instructions
9
+ --------------------
10
+
11
+ sudo gem install datasift
12
+
13
+ Simple example
14
+ --------------
15
+
16
+ This example looks for anything that contains the word "datasift" and simply prints the content to the screen as they come in.
17
+
18
+ ```ruby
19
+ require 'rubygems'
20
+ require 'datasift'
21
+ user = DataSift::User.new("your username", "your api_key")
22
+ definition = user.createDefinition('interaction.content contains "football"')
23
+ consumer = definition.getConsumer(DataSift::StreamConsumer::TYPE_HTTP)
24
+ consumer.consume(true) do |interaction|
25
+ if interaction
26
+ puts interaction['interaction']['content']
27
+ end
28
+ end
29
+ ```
30
+
31
+ See the DataSift documentation for full details of the data contained within each interaction: http://support.datasift.net/help/kb/rest-api/return-objects
32
+
33
+ License
34
+ -------
35
+
36
+ All code contained in this repository is Copyright 2011 MediaSift Ltd.
37
+
38
+ This code is released under the BSD license. Please see the LICENSE file for more details.
data/Rakefile ADDED
@@ -0,0 +1,36 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+ require 'rake/testtask'
4
+ require 'rake/clean'
5
+ require 'rake/gempackagetask'
6
+ require 'rake/rdoctask'
7
+ Rake::TestTask.new(:test) do |test|
8
+ test.libs << 'lib' << 'test'
9
+ test.pattern = 'test/**/test_*.rb'
10
+ test.verbose = true
11
+ end
12
+
13
+ begin
14
+ require 'rcov/rcovtask'
15
+ Rcov::RcovTask.new do |test|
16
+ test.libs << 'test'
17
+ test.pattern = 'test/**/test_*.rb'
18
+ test.verbose = true
19
+ end
20
+ rescue LoadError
21
+ task :rcov do
22
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
23
+ end
24
+ end
25
+
26
+ require 'rdoc/task'
27
+ RDoc::Task.new do |rdoc|
28
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
29
+
30
+ rdoc.rdoc_dir = 'rdoc'
31
+ rdoc.title = "datasift #{version}"
32
+ rdoc.rdoc_files.include('README*')
33
+ rdoc.rdoc_files.include('lib/**/*.rb')
34
+ end
35
+
36
+ task :default => :test
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.2.0
data/config.yml ADDED
@@ -0,0 +1,2 @@
1
+ username: YOUR_USERNAME_HERE
2
+ api_key: YOUR_API_KEY_HERE
data/datasift.gemspec ADDED
@@ -0,0 +1,28 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+
4
+ Gem::Specification.new do |s|
5
+ s.name = 'datasift'
6
+ s.version = File.open('VERSION').first
7
+
8
+ s.authors = ['MediaSift']
9
+ s.email = ['support@datasift.net']
10
+ s.description = %q{The official Ruby library for accessing the DataSift API. See http://datasift.net/ for full details and to sign up for an account.}
11
+ s.summary = %q{DataSit is a simple wrapper for the DataSift API.}
12
+ s.homepage = 'http://github.com/mediasift/datasift-ruby'
13
+
14
+ s.platform = Gem::Platform::RUBY
15
+ s.rubygems_version = %q{1.3.6}
16
+ s.required_rubygems_version = Gem::Requirement.new(">= 1.3.6") if s.respond_to? :required_rubygems_version=
17
+
18
+ s.add_runtime_dependency('rest-client', '~> 1.6.3')
19
+ s.add_runtime_dependency('crack', '~> 0')
20
+ s.add_runtime_dependency('yajl-ruby', '~> 0.8.2')
21
+ s.add_development_dependency('rdoc', '~> 0')
22
+ s.add_development_dependency('shoulda', '~> 2.11.3')
23
+ s.add_development_dependency('rspec', '~> 2.6.0')
24
+
25
+ s.files = `git ls-files`.split("\n")
26
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
27
+ s.require_paths = ["lib"]
28
+ end
data/examples/cost.rb ADDED
@@ -0,0 +1,123 @@
1
+ # This example gets the cost associated with the stream given on the command
2
+ # line or piped/typed into STDIN. It presents it in a nice ASCII table.]
3
+ # Note that the CSDL must be enclosed in quotes if given on the command line.
4
+ #
5
+ # ruby cost.rb 'interaction.content contains "football"'
6
+ # or
7
+ # cat football.csdl | ruby cost.rb
8
+ #
9
+ # NB: Most of the error handling (exception catching) has been removed for
10
+ # the sake of simplicity. Nearly everything in this library may throw
11
+ # exceptions, and production code should catch them. See the documentation
12
+ # for full details.
13
+ #
14
+
15
+ # Include the DataSift library
16
+ require File.dirname(__FILE__) + '/../lib/datasift'
17
+
18
+ # Function to format a number with commas
19
+ def number_with_delimiter(number, delimiter=',')
20
+ number.to_s.gsub(/(\d)(?=(\d\d\d)+(?!\d))/, "\\1#{delimiter}")
21
+ end
22
+
23
+ # Include the configuration - put your username and API key in this file
24
+ config = YAML::load(File.open(File.join(File.dirname(__FILE__), '..', 'config.yml')))
25
+
26
+ # Get the CSDL from the command line or STDIN
27
+ if ARGV.size > 0
28
+ csdl = ARGV[0]
29
+ else
30
+ csdl = ARGF.read
31
+ end
32
+
33
+ # Authenticate
34
+ puts 'Creating user...'
35
+ user = DataSift::User.new(config['username'], config['api_key'])
36
+
37
+ # Create the definition
38
+ puts 'Creating definition...'
39
+ definition = user.createDefinition(csdl)
40
+
41
+ # Getting cost
42
+ puts 'Getting cost...'
43
+ begin
44
+ cost = definition.getCostBreakdown()
45
+ rescue DataSift::CompileFailedError => e
46
+ puts 'CSDL compilation failed: ' + e
47
+ puts
48
+ exit!
49
+ end
50
+
51
+ costtable = []
52
+ maxlength = {'target' => 'Target'.length, 'times used' => 'Times used'.length, 'complexity' => 'Complexity'.length};
53
+ cost['costs'].each do |tgt,c|
54
+ maxlength['target'] = [maxlength['target'], tgt.length].max()
55
+ maxlength['times used'] = [maxlength['times used'], number_with_delimiter(c['count']).length].max()
56
+ maxlength['complexity'] = [maxlength['complexity'], number_with_delimiter(c['cost']).length].max()
57
+
58
+ costtable.push({
59
+ 'target' => tgt,
60
+ 'times used' => number_with_delimiter(c['count']),
61
+ 'complexity' => number_with_delimiter(c['cost']),
62
+ })
63
+
64
+ c['targets'].each do |tgt2,d|
65
+ maxlength['target'] = [maxlength['target'], 2 + tgt2.length].max()
66
+ maxlength['times used'] = [maxlength['times used'], number_with_delimiter(d['count']).length].max()
67
+ maxlength['complexity'] = [maxlength['complexity'], number_with_delimiter(d['cost']).length].max()
68
+
69
+ costtable.push({
70
+ 'target' => ' ' + tgt2,
71
+ 'times used' => number_with_delimiter(d['count']),
72
+ 'complexity' => number_with_delimiter(d['cost']),
73
+ })
74
+ end
75
+ end
76
+
77
+ maxlength['complexity'] = [maxlength['complexity'], number_with_delimiter(cost['total']).length].max()
78
+
79
+ puts
80
+ print '/-' + ('-' * maxlength['target']) + '---'
81
+ print ('-' * maxlength['times used']) + '---'
82
+ puts ('-' * maxlength['complexity']) + '-\\'
83
+
84
+ print '| ' + 'Target'.ljust(maxlength['target']) + ' | '
85
+ print 'Times Used'.ljust(maxlength['times used']) + ' | '
86
+ puts 'Complexity'.ljust(maxlength['complexity']) + ' |'
87
+
88
+ print '|-' + ('-' * maxlength['target']) + '-+-'
89
+ print ('-' * maxlength['times used']) + '-+-'
90
+ puts ('-' * maxlength['complexity']) + '-|'
91
+
92
+ costtable.each do |row|
93
+ print '| ' + row['target'].ljust(maxlength['target']) + ' | '
94
+ print row['times used'].rjust(maxlength['times used']) + ' | '
95
+ puts row['complexity'].rjust(maxlength['complexity']) + ' |'
96
+ end
97
+
98
+ print '|-' + ('-' * maxlength['target']) + '-+-'
99
+ print ('-' * maxlength['times used']) + '-+-'
100
+ puts ('-' * maxlength['complexity']) + '-|'
101
+
102
+ print '| ' + 'Total'.rjust(maxlength['target'] + 3 + maxlength['times used']) + ' = '
103
+ puts cost['total'].to_s.rjust(maxlength['complexity']) + ' |'
104
+
105
+ print '\\-' + ('-' * maxlength['target']) + '---'
106
+ print ('-' * maxlength['times used']) + '---'
107
+ puts ('-' * maxlength['complexity']) + '-/'
108
+
109
+ puts
110
+
111
+ if cost['total'] > 1000
112
+ tiernum = 3;
113
+ tierdesc = 'high complexity';
114
+ elsif cost['total'] > 100
115
+ tiernum = 2;
116
+ tierdesc = 'medium complexity';
117
+ else
118
+ tiernum = 1;
119
+ tierdesc = 'simple complexity';
120
+ end
121
+
122
+ puts 'A total cost of ' + number_with_delimiter(cost['total']) + ' puts this stream in tier ' + tiernum.to_s + ', ' + tierdesc
123
+ puts
@@ -0,0 +1,50 @@
1
+ # This example constructs a DataSift_Definition object with CSDL that looks
2
+ # for anything containing the word "football". It then sits in a loop,
3
+ # getting buffered interactions once every 10 seconds until it's retrieved
4
+ # 10.
5
+ #
6
+ # NB: Most of the error handling (exception catching) has been removed for
7
+ # the sake of simplicity. Nearly everything in this library may throw
8
+ # exceptions, and production code should catch them. See the documentation
9
+ # for full details.
10
+ #
11
+
12
+ # Include the DataSift library
13
+ require File.dirname(__FILE__) + '/../lib/datasift'
14
+
15
+ # Include the configuration - put your username and API key in this file
16
+ config = YAML::load(File.open(File.join(File.dirname(__FILE__), '..', 'config.yml')))
17
+
18
+ # Authenticate
19
+ puts 'Creating user...'
20
+ user = DataSift::User.new(config['username'], config['api_key'])
21
+
22
+ # Create the definition
23
+ csdl = 'interaction.content contains "football"'
24
+ puts 'Creating definition...'
25
+ puts ' ' + csdl
26
+ definition = user.createDefinition(csdl)
27
+
28
+ # Get buffered interactions until we've had 10
29
+ puts 'Getting buffered interactions...'
30
+ puts '--'
31
+ num = 10
32
+ from_id = false
33
+ begin
34
+ interactions = definition.getBuffered(num, from_id)
35
+ interactions.each do |interaction|
36
+ puts 'Type: ' + interaction['interaction']['type']
37
+ puts 'Content: ' + interaction['interaction']['content']
38
+ puts '--'
39
+ num -= 1
40
+ from_id = interaction['interaction']['id']
41
+ end
42
+
43
+ if num > 0
44
+ sleep(10)
45
+ end
46
+ end while num > 0
47
+
48
+ puts
49
+ puts 'Fetched 10 interactions, we\'re done.'
50
+ puts
@@ -0,0 +1,52 @@
1
+ # This example constructs a DataSift_Definition object with CSDL that looks
2
+ # for anything containing the word "football". It then gets an HTTP
3
+ # consumer for that definition and displays matching interactions to the
4
+ # screen as they come in. It will display 10 interactions and then stop.
5
+ #
6
+ # NB: Most of the error handling (exception catching) has been removed for
7
+ # the sake of simplicity. Nearly everything in this library may throw
8
+ # exceptions, and production code should catch them. See the documentation
9
+ # for full details.
10
+ #
11
+
12
+ # Include the DataSift library
13
+ require File.dirname(__FILE__) + '/../lib/datasift'
14
+
15
+ # Include the configuration - put your username and API key in this file
16
+ config = YAML::load(File.open(File.join(File.dirname(__FILE__), '..', 'config.yml')))
17
+
18
+ # Authenticate
19
+ puts 'Creating user...'
20
+ user = DataSift::User.new(config['username'], config['api_key'])
21
+
22
+ # Create the definition
23
+ csdl = 'interaction.content contains "football"'
24
+ puts 'Creating definition...'
25
+ puts ' ' + csdl
26
+ definition = user.createDefinition(csdl)
27
+
28
+ # Create the consumer
29
+ puts 'Getting the consumer...'
30
+ consumer = definition.getConsumer(DataSift::StreamConsumer::TYPE_HTTP)
31
+
32
+ # And start consuming
33
+ puts 'Consuming...'
34
+ puts '--'
35
+ count = 10
36
+ consumer.consume(true) do |interaction|
37
+ if interaction
38
+ puts 'Type: ' + interaction['interaction']['type']
39
+ puts 'Content: ' + interaction['interaction']['content']
40
+ puts '--'
41
+
42
+ count -= 1
43
+ if count == 0
44
+ puts 'Stopping consumer...'
45
+ consumer.stop()
46
+ end
47
+ end
48
+ end
49
+
50
+ puts
51
+ puts 'Finished consuming'
52
+ puts
@@ -0,0 +1,60 @@
1
+ # This example constructs a DataSift_Definition object with CSDL that looks
2
+ # for anything containing the word "football". It then gets an HTTP
3
+ # consumer for that definition and displays matching interactions to the
4
+ # screen as they come in. It will display 10 interactions and then stop.
5
+ #
6
+ # NB: Most of the error handling (exception catching) has been removed for
7
+ # the sake of simplicity. Nearly everything in this library may throw
8
+ # exceptions, and production code should catch them. See the documentation
9
+ # for full details.
10
+ #
11
+
12
+ # Make sure we have some arguments
13
+ if ARGV.size == 0
14
+ puts 'ERR: Please specify the words and/or phrases to track!'
15
+ puts
16
+ puts
17
+ exit!
18
+ end
19
+
20
+ # Include the DataSift library
21
+ require File.dirname(__FILE__) + '/../lib/datasift'
22
+
23
+ # Include the configuration - put your username and API key in this file
24
+ config = YAML::load(File.open(File.join(File.dirname(__FILE__), '..', 'config.yml')))
25
+
26
+ # Authenticate
27
+ puts 'Creating user...'
28
+ user = DataSift::User.new(config['username'], config['api_key'])
29
+
30
+ # Create the definition
31
+ csdl = 'interaction.type == "twitter" and (interaction.content contains "' + ARGV.join('" or interaction.content contains "') + '")'
32
+ puts 'Creating definition...'
33
+ puts ' ' + csdl
34
+ definition = user.createDefinition(csdl)
35
+
36
+ # Create the consumer
37
+ puts 'Getting the consumer...'
38
+ consumer = definition.getConsumer(DataSift::StreamConsumer::TYPE_HTTP)
39
+
40
+ # Setting up the onStopped handler
41
+ consumer.onStopped do |reason|
42
+ puts
43
+ puts 'Stopped: ' + reason
44
+ puts
45
+ end
46
+
47
+ # And start consuming
48
+ puts 'Consuming...'
49
+ puts '--'
50
+ consumer.consume(true) do |interaction|
51
+ if interaction
52
+ puts 'Type: ' + interaction['interaction']['type']
53
+ puts 'Content: ' + interaction['interaction']['content']
54
+ puts '--'
55
+ end
56
+ end
57
+
58
+ puts
59
+ puts 'Finished consuming'
60
+ puts
@@ -0,0 +1,79 @@
1
+ #
2
+ # apiclient.rb - This file contains the ApiClient class.
3
+ #
4
+ # Copyright (C) 2011 MediaSift Ltd
5
+ #
6
+ # == Overview
7
+ #
8
+ # The ApiClient class wraps the functionality that makes calls to the
9
+ # DataSift API.
10
+
11
+ require 'rest_client'
12
+ require 'crack'
13
+
14
+ module DataSift
15
+ # ApiCLient class.
16
+ #
17
+ # == Introduction
18
+ #
19
+ # The ApiClient class wraps the functionality that makes calls to the
20
+ # DataSift API.
21
+ #
22
+ class ApiClient
23
+ # Make a call to a DataSift API endpoint.
24
+ # === Parameters
25
+ #
26
+ # * +endpoint+ - The endpoint of the API call.
27
+ # * +params+ - The parameters to be passed along with the request.
28
+ # * +username+ - The username for the Auth header
29
+ # * +api_key+ - The API key for the Auth header
30
+ def call(username, api_key, endpoint, params = {}, user_agent = 'DataSiftPHP/0.0')
31
+ # Build the full endpoint URL
32
+ url = 'http://' + User::API_BASE_URL + endpoint + '.json?' + hashToQuerystring(params)
33
+
34
+ retval = {
35
+ 'response_code' => 500,
36
+ 'data' => { 'error' => 'Unknown error' },
37
+ 'rate_limit' => -1,
38
+ 'rate_limit_remaining' => -1,
39
+ }
40
+
41
+ begin
42
+ # Make the call
43
+ res = RestClient.get(url, { 'Auth' => username + ':' + api_key, 'User-Agent' => user_agent })
44
+
45
+ # Success
46
+ retval['response_code'] = 200
47
+
48
+ # Parse the JSON response
49
+ retval['data'] = Crack::JSON.parse(res)
50
+
51
+ # Rate limit headers
52
+ if (res.headers[:x_ratelimit_limit])
53
+ retval['rate_limit'] = res.headers[:x_ratelimit_limit]
54
+ end
55
+
56
+ if (res.headers[:x_ratelimit_remaining])
57
+ retval['rate_limit_remaining'] = res.headers[:x_ratelimit_remaining]
58
+ end
59
+ rescue RestClient::ExceptionWithResponse => err
60
+ # Set the response code
61
+ retval['response_code'] = err.http_code
62
+
63
+ # And set the data
64
+ retval['data'] = Crack::JSON.parse(err.response)
65
+ end
66
+
67
+ retval
68
+ end
69
+
70
+ private
71
+
72
+ def hashToQuerystring(hash)
73
+ hash.keys.inject('') do |query_string, key|
74
+ query_string << '&' unless key == hash.keys.first
75
+ query_string << "#{URI.encode(key.to_s)}=#{URI.encode(hash[key].to_s)}"
76
+ end
77
+ end
78
+ end
79
+ end