datasift 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,154 @@
1
+ #
2
+ # definition.rb - This file contains the Definition class.
3
+ #
4
+ # Copyright (C) 2011 MediaSift Ltd
5
+ #
6
+ # == Overview
7
+ #
8
+ # The User class represents a user of the API. Applications should start their
9
+ # API interactions by creating an instance of this class. Once initialised it
10
+ # provides factory methods for all of the functionality in the API.
11
+
12
+ module DataSift
13
+
14
+ # Definition class.
15
+ #
16
+ # == Introduction
17
+ #
18
+ # The Definition class represents a stream definition.
19
+ #
20
+ class Definition
21
+ attr_reader :csdl, :total_cost, :created_at
22
+
23
+ # Constructor. A User object is required, and you can optionally supply a
24
+ # default CSDL string.
25
+ # === Parameters
26
+ #
27
+ # * +user+ - The DataSift::User object.
28
+ # * +csdl+ - Optional default CSDL string.
29
+ # * +hash+ - Optional default hash string.
30
+ #
31
+ def initialize(user, csdl = '', hash = false)
32
+ raise InvalidDataError, 'Please supply a valid User object when creating a Definition object.' unless user.is_a? DataSift::User
33
+ @user = user
34
+ clearHash()
35
+ @hash = hash
36
+ self.csdl = csdl
37
+ end
38
+
39
+ # CSDL setter. Strips the incoming string and resets the hash if it's changed.
40
+ def csdl=(csdl)
41
+ raise InvalidDataError, 'The CSDL must be a string.' unless csdl.is_a? String
42
+ csdl.strip!
43
+ clearHash() unless csdl == @csdl
44
+ @csdl = csdl
45
+ end
46
+
47
+ # Hash getter. If the hash has not yet been obtained the CSDL will be
48
+ # compiled first.
49
+ def hash
50
+ if @hash == false
51
+ begin
52
+ compile()
53
+ rescue DataSift::CompileFailedError
54
+ # Ignore
55
+ end
56
+ end
57
+
58
+ @hash
59
+ end
60
+
61
+ # Reset the hash to false. The effect of this is to mark the definition as
62
+ # requiring compilation.
63
+ def clearHash()
64
+ @hash = false
65
+ @total_cost = false
66
+ @created_at = false
67
+ end
68
+
69
+ # Call the DataSift API to compile this definition. On success it will
70
+ # store the returned hash.
71
+ def compile()
72
+ raise InvalidDataError, 'Cannot compile an empty definition.' unless @csdl.length > 0
73
+
74
+ begin
75
+ res = @user.callAPI('compile', { 'csdl' => @csdl })
76
+
77
+ if res.has_key?('hash')
78
+ @hash = res['hash']
79
+ else
80
+ raise CompileFailedError, 'Compiled successfully but no hash in the response'
81
+ end
82
+
83
+ if res.has_key?('cost')
84
+ @total_cost = Integer(res['cost'])
85
+ else
86
+ raise CompileFailedError, 'Compiled successfully but no cost in the response'
87
+ end
88
+
89
+ if res.has_key?('created_at')
90
+ @created_at = Date.parse(res['created_at'])
91
+ else
92
+ raise CompileFailedError, 'Compiled successfully but no created_at in the response'
93
+ end
94
+ rescue APIError => err
95
+ clearHash()
96
+
97
+ case err.http_code
98
+ when 400
99
+ raise CompileFailedError, err
100
+ else
101
+ raise CompileFailedError, 'Unexpected APIError code: ' + err.http_code.to_s + ' [' + err + ']'
102
+ end
103
+ end
104
+ end
105
+
106
+ # Call the DataSift API to get the cost for this definition. Returns an
107
+ # array containing...
108
+ # costs => The breakdown of running the rule
109
+ # tags => The tags associated with the rule
110
+ # total => The total cost of the rule
111
+ #
112
+ def getCostBreakdown()
113
+ raise InvalidDataError, "Cannot get the cost for an empty definition." unless @csdl.length > 0
114
+
115
+ @user.callAPI('cost', { 'hash' => self.hash })
116
+ end
117
+
118
+ # Call the DataSift API to get buffered interactions.
119
+ # === Parameters
120
+ #
121
+ # * +count+ - Optional number of interactions to return (max 200).
122
+ # * +from_id+ - Optional start ID.
123
+ #
124
+ def getBuffered(count = false, from_id = false)
125
+ raise InvalidDataError, "Cannot get buffered interactions for an empty definition." unless @csdl.length > 0
126
+
127
+ params = { 'hash' => self.hash }
128
+
129
+ if count
130
+ params['count'] = count
131
+ end
132
+
133
+ if from_id
134
+ params['interaction_id'] = from_id
135
+ end
136
+
137
+ retval = @user.callAPI('stream', params)
138
+
139
+ raise APIError, 'No data in the response' unless retval.has_key?('stream')
140
+
141
+ retval['stream']
142
+ end
143
+
144
+ # Returns a StreamConsumer-derived object for this definition, for the
145
+ # given type.
146
+ # === Parameters
147
+ #
148
+ # * +type+ - The consumer type for which to construct a consumer.
149
+ #
150
+ def getConsumer(type = nil, on_interaction = nil, on_stopped = nil)
151
+ StreamConsumer.factory(@user, type, self)
152
+ end
153
+ end
154
+ end
@@ -0,0 +1,16 @@
1
+ module DataSift
2
+ class AccessDeniedError < StandardError; end
3
+ class CompileFailedError < StandardError; end
4
+ class InvalidDataError < StandardError; end
5
+ class NotYetImplementedError < StandardError; end
6
+ class RateLimitExceededError < StandardError; end
7
+ class StreamError < StandardError; end
8
+
9
+ class APIError < StandardError
10
+ attr_reader :http_code
11
+
12
+ def initialize(http_code)
13
+ @http_code = http_code
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,55 @@
1
+ #
2
+ # mockapiclient.rb - This file contains the MockApiClient class.
3
+ #
4
+ # Copyright (C) 2011 MediaSift Ltd
5
+ #
6
+ # == Overview
7
+ #
8
+ # The MockApiClient class implements a fake DataSift API interface.
9
+
10
+ module DataSift
11
+ # MockApiCLient class.
12
+ #
13
+ # == Introduction
14
+ #
15
+ # The ApiClient class implements a fake DataSift API interface.
16
+ #
17
+ class MockApiClient
18
+ # Set the response to be returned by the call method
19
+ # === Parameters
20
+ #
21
+ # * +code+ - The HTTP response code
22
+ # * +data+ - The dictionary that would have come from the response body
23
+ # * +rate_limit+ - The new rate_limit value
24
+ # * +rate_limit_remaining+ - The new rate_limit_remaining value
25
+ def setResponse(code, data, rate_limit, rate_limit_remaining)
26
+ @response = {
27
+ 'response_code' => code,
28
+ 'data' => data,
29
+ 'rate_limit' => rate_limit,
30
+ 'rate_limit_remaining' => rate_limit_remaining,
31
+ }
32
+ end
33
+
34
+ # Clear the response so we throw an exception if we get called again
35
+ # without a new response being set.
36
+ #
37
+ def clearResponse()
38
+ @response = false
39
+ end
40
+
41
+ # Fake a call to a DataSift API endpoint.
42
+ # === Parameters
43
+ #
44
+ # * +endpoint+ - The endpoint of the API call.
45
+ # * +params+ - The parameters to be passed along with the request.
46
+ # * +username+ - The username for the Auth header
47
+ # * +api_key+ - The API key for the Auth header
48
+ def call(username, api_key, endpoint, params = {}, user_agent = 'DataSiftPHP/0.0')
49
+ if !@response
50
+ raise StandardError, 'Expected response not set in mock object'
51
+ end
52
+ @response
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,124 @@
1
+ #
2
+ # stream_consumer.rb - This file contains the StreamConsumer class.
3
+ #
4
+ # Copyright (C) 2011 MediaSift Ltd
5
+ #
6
+ # == Overview
7
+ #
8
+ # The StreamConsumer class is base class for various stream consumers.
9
+
10
+ module DataSift
11
+
12
+ # StreamConsumer class.
13
+ #
14
+ class StreamConsumer
15
+ TYPE_HTTP = 'HTTP'
16
+
17
+ STATE_STOPPED = 0
18
+ STATE_STARTING = 1
19
+ STATE_RUNNING = 2
20
+ STATE_STOPPING = 3
21
+
22
+ # Factory function. Creates a StreamConsumer-derived object for the given
23
+ # type.
24
+ # === Parameters
25
+ #
26
+ # * +type+ - Use the TYPE_ constants
27
+ # * +definition+ - CSDL string or a Definition object.
28
+ #
29
+ def self.factory(user, type, definition)
30
+ type ||= TYPE_HTTP
31
+ @klass = Module.const_get('DataSift').const_get('StreamConsumer_' + type)
32
+ @klass.new(user, definition)
33
+ end
34
+
35
+ attr_accessor :auto_reconnect
36
+ attr_reader :state, :stop_reason
37
+
38
+ # Constructor. Do not use this directly, use the factory method instead.
39
+ # === Parameters
40
+ #
41
+ # * +user+ - The user this consumer will run as.
42
+ # * +definition+ - CSDL string or a Definition object.
43
+ #
44
+ def initialize(user, definition)
45
+ raise InvalidDataError, 'Please supply a valid User object when creating a Definition object.' unless user.is_a? DataSift::User
46
+
47
+ if definition.is_a? String
48
+ @definition = user.createDefinition(definition)
49
+ elsif definition.is_a? Definition
50
+ @definition = definition
51
+ else
52
+ raise InvalidDataError, 'The definition must be a CSDL string or a DataSift_Definition object'
53
+ end
54
+
55
+ @user = user
56
+ @auto_reconnect = true
57
+ @stop_reason = 'Unknown reason'
58
+ @state = STATE_STOPPED
59
+
60
+ # Compile the definition to ensure it's valid for use
61
+ @definition.compile()
62
+ end
63
+
64
+ # This is called when the consumer is stopped.
65
+ # === Parameters
66
+ #
67
+ # * +reason+ - The reason why the consumer stopped.
68
+ #
69
+ def onStopped(&block)
70
+ if block_given?
71
+ @on_stopped = block
72
+ self
73
+ else
74
+ @on_stopped
75
+ end
76
+ end
77
+
78
+ # Once an instance of a StreamConsumer is ready for use, call this to
79
+ # start consuming. Extending classes should implement onStart to handle
80
+ # actually starting.
81
+ # === Parameters
82
+ #
83
+ # * +auto_reconnect+ - Whether the consumer should automatically reconnect.
84
+ # * +block+ - An optional block to receive incoming interactions.
85
+ #
86
+ def consume(auto_reconnect = true, &block)
87
+ @auto_reconnect = auto_reconnect;
88
+
89
+ # Start consuming
90
+ @state = STATE_STARTING
91
+ onStart(&block)
92
+ end
93
+
94
+ # Called when the consumer should start consuming the stream.
95
+ #
96
+ def onStart()
97
+ puts 'onStart method has not been overridden!'
98
+ end
99
+
100
+ # This method can be called at any time to *request* that the consumer
101
+ # stop consuming. This method sets the state to STATE_STOPPING and it's
102
+ # up to the consumer implementation to notice that this has changed, stop
103
+ # consuming and call the onStopped method.
104
+ #
105
+ def stop()
106
+ raise InvalidDataError, 'Consumer state must be RUNNING before it can be stopped' unless @state = StreamConsumer::STATE_RUNNING
107
+ @state = StreamConsumer::STATE_STOPPING
108
+ end
109
+
110
+ # Default implementation of onStop. It's unlikely that this method will
111
+ # ever be used in isolation, but rather it should be called as the final
112
+ # step in the extending class's implementation.
113
+ # === Parameters
114
+ #
115
+ # * +reason+ - The reason why the consumer stopped.
116
+ #
117
+ def onStop(reason = '')
118
+ reason = 'Unexpected' unless @state != StreamConsumer::STATE_STOPPING and reason.length == 0
119
+ @state = StreamConsumer::STATE_STOPPED
120
+ @stop_reason = reason
121
+ onStopped.call(reason) unless onStopped.nil?
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,160 @@
1
+ #
2
+ # stream_consumer_http.rb - This file contains the StreamConsumer_HTTP class.
3
+ #
4
+ # Copyright (C) 2011 MediaSift Ltd
5
+ #
6
+ # == Overview
7
+ #
8
+ # The StreamConsumer_HTTP class implements HTTP streaming.
9
+
10
+ $LOAD_PATH.unshift(File.dirname(__FILE__) + '/../')
11
+
12
+ require 'uri'
13
+ require 'socket'
14
+ require 'yajl'
15
+
16
+ module DataSift
17
+
18
+ class StreamConsumer_HTTP < StreamConsumer
19
+
20
+ # Constructor. Requires valid user and definition objects.
21
+ def initialize(user, definition)
22
+ super
23
+ end
24
+
25
+ def onStart(&block)
26
+ begin
27
+ reconnect() unless !@socket.nil? and !@socket.closed?
28
+
29
+ parser = Yajl::Parser.new
30
+ parser.on_parse_complete = block if block_given?
31
+ if @response_head[:headers]["Transfer-Encoding"] == 'chunked'
32
+ if block_given?
33
+ chunkLeft = 0
34
+ while !@socket.eof? && (line = @socket.gets)
35
+ break if line.match /^0.*?\r\n/
36
+ next if line == "\r\n"
37
+ size = line.hex
38
+ json = @socket.read(size)
39
+ next if json.nil?
40
+ chunkLeft = size-json.size
41
+ if chunkLeft == 0
42
+ parser << json
43
+ else
44
+ # received only part of the chunk, grab the rest
45
+ parser << @socket.read(chunkLeft)
46
+ end
47
+ end
48
+ else
49
+ raise StreamError, 'Chunked responses detected, but no block given to handle the chunks.'
50
+ end
51
+ else
52
+ content_type = @response_head[:headers]['Content-Type'].split(';')
53
+ content_type = content_type.first
54
+ if ALLOWED_MIME_TYPES.include?(content_type)
55
+ case @response_head[:headers]['Content-Encoding']
56
+ when 'gzip'
57
+ return Yajl::Gzip::StreamReader.parse(@socket, opts, &block)
58
+ when 'deflate'
59
+ return Yajl::Deflate::StreamReader.parse(@socket, opts.merge({:deflate_options => -Zlib::MAX_WBITS}), &block)
60
+ when 'bzip2'
61
+ return Yajl::Bzip2::StreamReader.parse(@socket, opts, &block)
62
+ else
63
+ return parser.parse(@socket)
64
+ end
65
+ else
66
+ raise StreamError, 'Unhandled response MIME type ' + content_type
67
+ end
68
+ end
69
+ end while @auto_reconnect and @state == StreamConsumer::STATE_RUNNING
70
+
71
+ disconnect()
72
+
73
+ if @state == StreamConsumer::STATE_STOPPING
74
+ @stop_reason = 'Stop requested'
75
+ else
76
+ @stop_reason = 'Connection dropped'
77
+ end
78
+
79
+ onStop(@stop_reason)
80
+ end
81
+
82
+ def reconnect()
83
+ uri = URI.parse('http://' + User::STREAM_BASE_URL + @definition.hash +
84
+ '?username=' + CGI.escape(@user.username) + '&api_key=' + CGI.escape(@user.api_key))
85
+
86
+ user_agent = @user.getUserAgent()
87
+
88
+ request = "GET #{uri.path}#{uri.query ? "?"+uri.query : nil} HTTP/1.1\r\n"
89
+ request << "Host: #{uri.host}\r\n"
90
+ request << "User-Agent: #{user_agent}\r\n"
91
+ request << "Accept: */*\r\n"
92
+ request << "\r\n"
93
+
94
+ connection_delay = 0
95
+
96
+ begin
97
+ # Close the socket if it's open
98
+ disconnect()
99
+
100
+ # Back off a bit if required
101
+ sleep(connection_delay) if connection_delay > 0
102
+
103
+ begin
104
+ @socket = TCPSocket.new(uri.host, uri.port)
105
+
106
+ @socket.write(request)
107
+ @response_head = {}
108
+ @response_head[:headers] = {}
109
+
110
+ # Read the headers
111
+ @socket.each_line do |line|
112
+ if line == "\r\n" # end of the headers
113
+ break
114
+ else
115
+ header = line.split(": ")
116
+ if header.size == 1
117
+ header = header[0].split(" ")
118
+ @response_head[:version] = header[0]
119
+ @response_head[:code] = header[1].to_i
120
+ @response_head[:msg] = header[2]
121
+ else
122
+ @response_head[:headers][header[0]] = header[1].strip
123
+ end
124
+ end
125
+ end
126
+
127
+ if @response_head[:code] == 200
128
+ # Success!
129
+ @state = StreamConsumer::STATE_RUNNING
130
+ elsif @response_head[:code] == 404
131
+ raise StreamError, 'Hash not found!'
132
+ else
133
+ puts 'Connection failed: ' + @response_head[:code] + ' ' + @response_head[:msg]
134
+ if connection_delay == 0
135
+ connection_delay = 10;
136
+ elsif connection_delay < 240
137
+ connection_delay *= 2;
138
+ else
139
+ raise StreamError, 'Connection failed: ' + @response_head[:code] + ' ' + @response_head[:msg]
140
+ end
141
+ end
142
+ #rescue
143
+ # if connection_delay == 0
144
+ # connection_delay = 1
145
+ # elsif connection_delay <= 16
146
+ # connection_delay += 1
147
+ # else
148
+ # raise StreamError, 'Connection failed due to a network error'
149
+ # end
150
+ end
151
+ end while @state != StreamConsumer::STATE_RUNNING
152
+ end
153
+
154
+ def disconnect()
155
+ @socket.close if !@socket.nil? and !@socket.closed?
156
+ end
157
+
158
+ end
159
+
160
+ end