datasift 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,154 @@
1
+ #
2
+ # definition.rb - This file contains the Definition class.
3
+ #
4
+ # Copyright (C) 2011 MediaSift Ltd
5
+ #
6
+ # == Overview
7
+ #
8
+ # The User class represents a user of the API. Applications should start their
9
+ # API interactions by creating an instance of this class. Once initialised it
10
+ # provides factory methods for all of the functionality in the API.
11
+
12
+ module DataSift
13
+
14
+ # Definition class.
15
+ #
16
+ # == Introduction
17
+ #
18
+ # The Definition class represents a stream definition.
19
+ #
20
+ class Definition
21
+ attr_reader :csdl, :total_cost, :created_at
22
+
23
+ # Constructor. A User object is required, and you can optionally supply a
24
+ # default CSDL string.
25
+ # === Parameters
26
+ #
27
+ # * +user+ - The DataSift::User object.
28
+ # * +csdl+ - Optional default CSDL string.
29
+ # * +hash+ - Optional default hash string.
30
+ #
31
+ def initialize(user, csdl = '', hash = false)
32
+ raise InvalidDataError, 'Please supply a valid User object when creating a Definition object.' unless user.is_a? DataSift::User
33
+ @user = user
34
+ clearHash()
35
+ @hash = hash
36
+ self.csdl = csdl
37
+ end
38
+
39
+ # CSDL setter. Strips the incoming string and resets the hash if it's changed.
40
+ def csdl=(csdl)
41
+ raise InvalidDataError, 'The CSDL must be a string.' unless csdl.is_a? String
42
+ csdl.strip!
43
+ clearHash() unless csdl == @csdl
44
+ @csdl = csdl
45
+ end
46
+
47
+ # Hash getter. If the hash has not yet been obtained the CSDL will be
48
+ # compiled first.
49
+ def hash
50
+ if @hash == false
51
+ begin
52
+ compile()
53
+ rescue DataSift::CompileFailedError
54
+ # Ignore
55
+ end
56
+ end
57
+
58
+ @hash
59
+ end
60
+
61
+ # Reset the hash to false. The effect of this is to mark the definition as
62
+ # requiring compilation.
63
+ def clearHash()
64
+ @hash = false
65
+ @total_cost = false
66
+ @created_at = false
67
+ end
68
+
69
+ # Call the DataSift API to compile this definition. On success it will
70
+ # store the returned hash.
71
+ def compile()
72
+ raise InvalidDataError, 'Cannot compile an empty definition.' unless @csdl.length > 0
73
+
74
+ begin
75
+ res = @user.callAPI('compile', { 'csdl' => @csdl })
76
+
77
+ if res.has_key?('hash')
78
+ @hash = res['hash']
79
+ else
80
+ raise CompileFailedError, 'Compiled successfully but no hash in the response'
81
+ end
82
+
83
+ if res.has_key?('cost')
84
+ @total_cost = Integer(res['cost'])
85
+ else
86
+ raise CompileFailedError, 'Compiled successfully but no cost in the response'
87
+ end
88
+
89
+ if res.has_key?('created_at')
90
+ @created_at = Date.parse(res['created_at'])
91
+ else
92
+ raise CompileFailedError, 'Compiled successfully but no created_at in the response'
93
+ end
94
+ rescue APIError => err
95
+ clearHash()
96
+
97
+ case err.http_code
98
+ when 400
99
+ raise CompileFailedError, err
100
+ else
101
+ raise CompileFailedError, 'Unexpected APIError code: ' + err.http_code.to_s + ' [' + err + ']'
102
+ end
103
+ end
104
+ end
105
+
106
+ # Call the DataSift API to get the cost for this definition. Returns an
107
+ # array containing...
108
+ # costs => The breakdown of running the rule
109
+ # tags => The tags associated with the rule
110
+ # total => The total cost of the rule
111
+ #
112
+ def getCostBreakdown()
113
+ raise InvalidDataError, "Cannot get the cost for an empty definition." unless @csdl.length > 0
114
+
115
+ @user.callAPI('cost', { 'hash' => self.hash })
116
+ end
117
+
118
+ # Call the DataSift API to get buffered interactions.
119
+ # === Parameters
120
+ #
121
+ # * +count+ - Optional number of interactions to return (max 200).
122
+ # * +from_id+ - Optional start ID.
123
+ #
124
+ def getBuffered(count = false, from_id = false)
125
+ raise InvalidDataError, "Cannot get buffered interactions for an empty definition." unless @csdl.length > 0
126
+
127
+ params = { 'hash' => self.hash }
128
+
129
+ if count
130
+ params['count'] = count
131
+ end
132
+
133
+ if from_id
134
+ params['interaction_id'] = from_id
135
+ end
136
+
137
+ retval = @user.callAPI('stream', params)
138
+
139
+ raise APIError, 'No data in the response' unless retval.has_key?('stream')
140
+
141
+ retval['stream']
142
+ end
143
+
144
+ # Returns a StreamConsumer-derived object for this definition, for the
145
+ # given type.
146
+ # === Parameters
147
+ #
148
+ # * +type+ - The consumer type for which to construct a consumer.
149
+ #
150
+ def getConsumer(type = nil, on_interaction = nil, on_stopped = nil)
151
+ StreamConsumer.factory(@user, type, self)
152
+ end
153
+ end
154
+ end
@@ -0,0 +1,16 @@
1
+ module DataSift
2
+ class AccessDeniedError < StandardError; end
3
+ class CompileFailedError < StandardError; end
4
+ class InvalidDataError < StandardError; end
5
+ class NotYetImplementedError < StandardError; end
6
+ class RateLimitExceededError < StandardError; end
7
+ class StreamError < StandardError; end
8
+
9
+ class APIError < StandardError
10
+ attr_reader :http_code
11
+
12
+ def initialize(http_code)
13
+ @http_code = http_code
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,55 @@
1
+ #
2
+ # mockapiclient.rb - This file contains the MockApiClient class.
3
+ #
4
+ # Copyright (C) 2011 MediaSift Ltd
5
+ #
6
+ # == Overview
7
+ #
8
+ # The MockApiClient class implements a fake DataSift API interface.
9
+
10
+ module DataSift
11
+ # MockApiCLient class.
12
+ #
13
+ # == Introduction
14
+ #
15
+ # The ApiClient class implements a fake DataSift API interface.
16
+ #
17
+ class MockApiClient
18
+ # Set the response to be returned by the call method
19
+ # === Parameters
20
+ #
21
+ # * +code+ - The HTTP response code
22
+ # * +data+ - The dictionary that would have come from the response body
23
+ # * +rate_limit+ - The new rate_limit value
24
+ # * +rate_limit_remaining+ - The new rate_limit_remaining value
25
+ def setResponse(code, data, rate_limit, rate_limit_remaining)
26
+ @response = {
27
+ 'response_code' => code,
28
+ 'data' => data,
29
+ 'rate_limit' => rate_limit,
30
+ 'rate_limit_remaining' => rate_limit_remaining,
31
+ }
32
+ end
33
+
34
+ # Clear the response so we throw an exception if we get called again
35
+ # without a new response being set.
36
+ #
37
+ def clearResponse()
38
+ @response = false
39
+ end
40
+
41
+ # Fake a call to a DataSift API endpoint.
42
+ # === Parameters
43
+ #
44
+ # * +endpoint+ - The endpoint of the API call.
45
+ # * +params+ - The parameters to be passed along with the request.
46
+ # * +username+ - The username for the Auth header
47
+ # * +api_key+ - The API key for the Auth header
48
+ def call(username, api_key, endpoint, params = {}, user_agent = 'DataSiftPHP/0.0')
49
+ if !@response
50
+ raise StandardError, 'Expected response not set in mock object'
51
+ end
52
+ @response
53
+ end
54
+ end
55
+ end
@@ -0,0 +1,124 @@
1
+ #
2
+ # stream_consumer.rb - This file contains the StreamConsumer class.
3
+ #
4
+ # Copyright (C) 2011 MediaSift Ltd
5
+ #
6
+ # == Overview
7
+ #
8
+ # The StreamConsumer class is base class for various stream consumers.
9
+
10
+ module DataSift
11
+
12
+ # StreamConsumer class.
13
+ #
14
+ class StreamConsumer
15
+ TYPE_HTTP = 'HTTP'
16
+
17
+ STATE_STOPPED = 0
18
+ STATE_STARTING = 1
19
+ STATE_RUNNING = 2
20
+ STATE_STOPPING = 3
21
+
22
+ # Factory function. Creates a StreamConsumer-derived object for the given
23
+ # type.
24
+ # === Parameters
25
+ #
26
+ # * +type+ - Use the TYPE_ constants
27
+ # * +definition+ - CSDL string or a Definition object.
28
+ #
29
+ def self.factory(user, type, definition)
30
+ type ||= TYPE_HTTP
31
+ @klass = Module.const_get('DataSift').const_get('StreamConsumer_' + type)
32
+ @klass.new(user, definition)
33
+ end
34
+
35
+ attr_accessor :auto_reconnect
36
+ attr_reader :state, :stop_reason
37
+
38
+ # Constructor. Do not use this directly, use the factory method instead.
39
+ # === Parameters
40
+ #
41
+ # * +user+ - The user this consumer will run as.
42
+ # * +definition+ - CSDL string or a Definition object.
43
+ #
44
+ def initialize(user, definition)
45
+ raise InvalidDataError, 'Please supply a valid User object when creating a Definition object.' unless user.is_a? DataSift::User
46
+
47
+ if definition.is_a? String
48
+ @definition = user.createDefinition(definition)
49
+ elsif definition.is_a? Definition
50
+ @definition = definition
51
+ else
52
+ raise InvalidDataError, 'The definition must be a CSDL string or a DataSift_Definition object'
53
+ end
54
+
55
+ @user = user
56
+ @auto_reconnect = true
57
+ @stop_reason = 'Unknown reason'
58
+ @state = STATE_STOPPED
59
+
60
+ # Compile the definition to ensure it's valid for use
61
+ @definition.compile()
62
+ end
63
+
64
+ # This is called when the consumer is stopped.
65
+ # === Parameters
66
+ #
67
+ # * +reason+ - The reason why the consumer stopped.
68
+ #
69
+ def onStopped(&block)
70
+ if block_given?
71
+ @on_stopped = block
72
+ self
73
+ else
74
+ @on_stopped
75
+ end
76
+ end
77
+
78
+ # Once an instance of a StreamConsumer is ready for use, call this to
79
+ # start consuming. Extending classes should implement onStart to handle
80
+ # actually starting.
81
+ # === Parameters
82
+ #
83
+ # * +auto_reconnect+ - Whether the consumer should automatically reconnect.
84
+ # * +block+ - An optional block to receive incoming interactions.
85
+ #
86
+ def consume(auto_reconnect = true, &block)
87
+ @auto_reconnect = auto_reconnect;
88
+
89
+ # Start consuming
90
+ @state = STATE_STARTING
91
+ onStart(&block)
92
+ end
93
+
94
+ # Called when the consumer should start consuming the stream.
95
+ #
96
+ def onStart()
97
+ puts 'onStart method has not been overridden!'
98
+ end
99
+
100
+ # This method can be called at any time to *request* that the consumer
101
+ # stop consuming. This method sets the state to STATE_STOPPING and it's
102
+ # up to the consumer implementation to notice that this has changed, stop
103
+ # consuming and call the onStopped method.
104
+ #
105
+ def stop()
106
+ raise InvalidDataError, 'Consumer state must be RUNNING before it can be stopped' unless @state = StreamConsumer::STATE_RUNNING
107
+ @state = StreamConsumer::STATE_STOPPING
108
+ end
109
+
110
+ # Default implementation of onStop. It's unlikely that this method will
111
+ # ever be used in isolation, but rather it should be called as the final
112
+ # step in the extending class's implementation.
113
+ # === Parameters
114
+ #
115
+ # * +reason+ - The reason why the consumer stopped.
116
+ #
117
+ def onStop(reason = '')
118
+ reason = 'Unexpected' unless @state != StreamConsumer::STATE_STOPPING and reason.length == 0
119
+ @state = StreamConsumer::STATE_STOPPED
120
+ @stop_reason = reason
121
+ onStopped.call(reason) unless onStopped.nil?
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,160 @@
1
+ #
2
+ # stream_consumer_http.rb - This file contains the StreamConsumer_HTTP class.
3
+ #
4
+ # Copyright (C) 2011 MediaSift Ltd
5
+ #
6
+ # == Overview
7
+ #
8
+ # The StreamConsumer_HTTP class implements HTTP streaming.
9
+
10
+ $LOAD_PATH.unshift(File.dirname(__FILE__) + '/../')
11
+
12
+ require 'uri'
13
+ require 'socket'
14
+ require 'yajl'
15
+
16
+ module DataSift
17
+
18
+ class StreamConsumer_HTTP < StreamConsumer
19
+
20
+ # Constructor. Requires valid user and definition objects.
21
+ def initialize(user, definition)
22
+ super
23
+ end
24
+
25
+ def onStart(&block)
26
+ begin
27
+ reconnect() unless !@socket.nil? and !@socket.closed?
28
+
29
+ parser = Yajl::Parser.new
30
+ parser.on_parse_complete = block if block_given?
31
+ if @response_head[:headers]["Transfer-Encoding"] == 'chunked'
32
+ if block_given?
33
+ chunkLeft = 0
34
+ while !@socket.eof? && (line = @socket.gets)
35
+ break if line.match /^0.*?\r\n/
36
+ next if line == "\r\n"
37
+ size = line.hex
38
+ json = @socket.read(size)
39
+ next if json.nil?
40
+ chunkLeft = size-json.size
41
+ if chunkLeft == 0
42
+ parser << json
43
+ else
44
+ # received only part of the chunk, grab the rest
45
+ parser << @socket.read(chunkLeft)
46
+ end
47
+ end
48
+ else
49
+ raise StreamError, 'Chunked responses detected, but no block given to handle the chunks.'
50
+ end
51
+ else
52
+ content_type = @response_head[:headers]['Content-Type'].split(';')
53
+ content_type = content_type.first
54
+ if ALLOWED_MIME_TYPES.include?(content_type)
55
+ case @response_head[:headers]['Content-Encoding']
56
+ when 'gzip'
57
+ return Yajl::Gzip::StreamReader.parse(@socket, opts, &block)
58
+ when 'deflate'
59
+ return Yajl::Deflate::StreamReader.parse(@socket, opts.merge({:deflate_options => -Zlib::MAX_WBITS}), &block)
60
+ when 'bzip2'
61
+ return Yajl::Bzip2::StreamReader.parse(@socket, opts, &block)
62
+ else
63
+ return parser.parse(@socket)
64
+ end
65
+ else
66
+ raise StreamError, 'Unhandled response MIME type ' + content_type
67
+ end
68
+ end
69
+ end while @auto_reconnect and @state == StreamConsumer::STATE_RUNNING
70
+
71
+ disconnect()
72
+
73
+ if @state == StreamConsumer::STATE_STOPPING
74
+ @stop_reason = 'Stop requested'
75
+ else
76
+ @stop_reason = 'Connection dropped'
77
+ end
78
+
79
+ onStop(@stop_reason)
80
+ end
81
+
82
+ def reconnect()
83
+ uri = URI.parse('http://' + User::STREAM_BASE_URL + @definition.hash +
84
+ '?username=' + CGI.escape(@user.username) + '&api_key=' + CGI.escape(@user.api_key))
85
+
86
+ user_agent = @user.getUserAgent()
87
+
88
+ request = "GET #{uri.path}#{uri.query ? "?"+uri.query : nil} HTTP/1.1\r\n"
89
+ request << "Host: #{uri.host}\r\n"
90
+ request << "User-Agent: #{user_agent}\r\n"
91
+ request << "Accept: */*\r\n"
92
+ request << "\r\n"
93
+
94
+ connection_delay = 0
95
+
96
+ begin
97
+ # Close the socket if it's open
98
+ disconnect()
99
+
100
+ # Back off a bit if required
101
+ sleep(connection_delay) if connection_delay > 0
102
+
103
+ begin
104
+ @socket = TCPSocket.new(uri.host, uri.port)
105
+
106
+ @socket.write(request)
107
+ @response_head = {}
108
+ @response_head[:headers] = {}
109
+
110
+ # Read the headers
111
+ @socket.each_line do |line|
112
+ if line == "\r\n" # end of the headers
113
+ break
114
+ else
115
+ header = line.split(": ")
116
+ if header.size == 1
117
+ header = header[0].split(" ")
118
+ @response_head[:version] = header[0]
119
+ @response_head[:code] = header[1].to_i
120
+ @response_head[:msg] = header[2]
121
+ else
122
+ @response_head[:headers][header[0]] = header[1].strip
123
+ end
124
+ end
125
+ end
126
+
127
+ if @response_head[:code] == 200
128
+ # Success!
129
+ @state = StreamConsumer::STATE_RUNNING
130
+ elsif @response_head[:code] == 404
131
+ raise StreamError, 'Hash not found!'
132
+ else
133
+ puts 'Connection failed: ' + @response_head[:code] + ' ' + @response_head[:msg]
134
+ if connection_delay == 0
135
+ connection_delay = 10;
136
+ elsif connection_delay < 240
137
+ connection_delay *= 2;
138
+ else
139
+ raise StreamError, 'Connection failed: ' + @response_head[:code] + ' ' + @response_head[:msg]
140
+ end
141
+ end
142
+ #rescue
143
+ # if connection_delay == 0
144
+ # connection_delay = 1
145
+ # elsif connection_delay <= 16
146
+ # connection_delay += 1
147
+ # else
148
+ # raise StreamError, 'Connection failed due to a network error'
149
+ # end
150
+ end
151
+ end while @state != StreamConsumer::STATE_RUNNING
152
+ end
153
+
154
+ def disconnect()
155
+ @socket.close if !@socket.nil? and !@socket.closed?
156
+ end
157
+
158
+ end
159
+
160
+ end