shopikon_import_io_connector 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ec3d798b75de4022baa5c204727f0ac5f1c9b2d0
4
- data.tar.gz: 9fcdcc841fed877ae3ce29bb9958a886a82da7f9
3
+ metadata.gz: e0f199117ee26114e7ad364e888576b260137c41
4
+ data.tar.gz: 8bebb3fd1ecfb5014f78e7fc218b3e5b7854215d
5
5
  SHA512:
6
- metadata.gz: 56e06bd2f9f1e6ea8e9e136757b0b0b47ceb86998472597d9276dc8705aa094cfaa72d5a3f1029198f5c6ade4dd69ac060ee00aadf77b422bc3779642d770218
7
- data.tar.gz: 18d93e043f432fea5a9163a5db510d32f51120c5b76a680c0c267016e5c1b050ec35a1725c153c6524aaa00d202de5f612f309b0837c164dee5552b239e1fe73
6
+ metadata.gz: 08453c4eff290d4f6b4839f62bdd752e430ee92454646bd29ef012e37bf000504a50c27058c0b075d092b87680c380aa3854ae5f0d1490e32c911b5ee4ca27a5
7
+ data.tar.gz: 958d10249739d694fbd0b9ea79be170df112d0e5b95b2f8ab5243faaf5df91f6ddf7f97d535e61fb181a6a2d4874e85ae7a4e75b6dd7c54f4aa06ccd83440e4e
@@ -0,0 +1,20 @@
1
+ class CurlRequestBuilder
2
+ def initialize(options = {})
3
+ @request = Curl::Easy.new(options[:url]) do |curl|
4
+ curl.headers["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36"
5
+ curl.enable_cookies = true
6
+ curl.follow_location = true
7
+ curl.max_redirects = 3
8
+ curl.encoding = ""
9
+ curl.autoreferer = true
10
+ curl.connect_timeout = 5
11
+ curl.timeout = 120
12
+ curl.ssl_verify_host = 0
13
+ curl.ssl_verify_host = false
14
+ end
15
+ end
16
+
17
+ def request
18
+ @request
19
+ end
20
+ end
@@ -0,0 +1,140 @@
1
+ #
2
+ # import.io client library - client classes
3
+ #
4
+ # This file contains the main classes required to connect to and query import.io APIs
5
+ #
6
+ # Dependencies: Ruby 1.9, http-cookie
7
+ #
8
+ # @author: dev@import.io
9
+ # @source: https://github.com/import-io/importio-client-libs/tree/master/python
10
+ #
11
+
12
+ require "net/http"
13
+ require "uri"
14
+ require "thread"
15
+ require "http-cookie"
16
+ require "cgi"
17
+ require "json"
18
+ require "securerandom"
19
+
20
+ class ImportIO
21
+ # The main import.io client, used for managing the message channel and sending queries and receiving data
22
+
23
+ def initialize(user_id=nil, api_key=nil, host="https://query.import.io")
24
+ # Initialises the client library with its configuration
25
+ @host = host
26
+ @proxy_host = nil
27
+ @proxy_port = nil
28
+ @user_id = user_id
29
+ @api_key = api_key
30
+ @username = nil
31
+ @password = nil
32
+ @login_host = nil
33
+ @session = nil
34
+ @queue = Queue.new
35
+ end
36
+
37
+ # We use this only for a specific test case
38
+ attr_reader :session
39
+
40
+ def proxy(host, port)
41
+ # If you want to configure an HTTP proxy, use this method to do so
42
+ @proxy_host = host
43
+ @proxy_port = port
44
+ end
45
+
46
+ def login(username, password, host="https://api.import.io")
47
+ # If you want to use cookie-based authentication, this method will log you in with a username and password to get a session
48
+ @username = username
49
+ @password = password
50
+ @login_host = host
51
+
52
+ # If we don't have a session, then connect one
53
+ if @session == nil
54
+ connect()
55
+ end
56
+
57
+ # Once connected, do the login
58
+ @session.login(@username, @password, @login_host)
59
+ end
60
+
61
+ def reconnect
62
+ # Reconnects the client to the platform by establishing a new session
63
+
64
+ # Disconnect an old session, if there is one
65
+ if @session != nil
66
+ disconnect()
67
+ end
68
+
69
+ if @username != nil
70
+ login(@username, @password, @login_host)
71
+ else
72
+ connect()
73
+ end
74
+ end
75
+
76
+ def connect
77
+ # Connect this client to the import.io server if not already connected
78
+
79
+ # Check if there is a session already first
80
+ if @session != nil
81
+ return
82
+ end
83
+
84
+ @session = ImportIoSession::new(self, @host, @user_id, @api_key, @proxy_host, @proxy_port)
85
+ @session.connect()
86
+
87
+ # This should be a @queue.clone, but this errors in 2.1 branch of Ruby: #9718
88
+ # q = @queue.clone
89
+ q = Queue.new
90
+ until @queue.empty?
91
+ q.push(@queue.pop(true))
92
+ end
93
+ @queue = Queue.new
94
+
95
+ until q.empty?
96
+ query_data = q.pop(true) rescue nil
97
+ if query_data
98
+ query(query_data.query, query_data.callback)
99
+ end
100
+ end
101
+ end
102
+
103
+ def disconnect
104
+ # Call this method to ask the client library to disconnect from the import.io server
105
+ # It is best practice to disconnect when you are finished with querying, so as to clean
106
+ # up resources on both the client and server
107
+
108
+ if @session != nil
109
+ @session.disconnect()
110
+ @session = nil
111
+ end
112
+ end
113
+
114
+ def stop
115
+ # This method stops all of the threads that are currently running in the session
116
+ if @session != nil
117
+ return @session.stop()
118
+ end
119
+ end
120
+
121
+ def join
122
+ # This method joins the threads that are running together in the session, so we can wait for them to be finished
123
+ if @session != nil
124
+ return @session.join()
125
+ end
126
+ end
127
+
128
+ def query(query, callback)
129
+ # This method takes an import.io Query object and either queues it, or issues it to the server
130
+ # depending on whether the session is connected
131
+
132
+ if @session == nil || !@session.connected
133
+ @queue << {"query"=>query,"callback"=>callback}
134
+ return
135
+ end
136
+
137
+ @session.query(query, callback)
138
+ end
139
+
140
+ end
@@ -0,0 +1,61 @@
1
+ class ImportIoQuery
2
+ # This class represents a single query to the import.io platform
3
+
4
+ def initialize(callback, query)
5
+ # Initialises the new query object with inputs and default state
6
+ @query = query
7
+ @jobs_spawned = 0
8
+ @jobs_started = 0
9
+ @jobs_completed = 0
10
+ @_finished = false
11
+ @_callback = callback
12
+ end
13
+
14
+ def _on_message(data)
15
+ # Method that is called when a new message is received
16
+ #
17
+ # Check the type of the message to see what we are working with
18
+ msg_type = data["type"]
19
+ if msg_type == "SPAWN"
20
+ # A spawn message means that a new job is being initialised on the server
21
+ if @jobs_spawned.present?
22
+ @jobs_spawned += 1
23
+ else
24
+ @jobs_spawned = 1
25
+ end
26
+ elsif msg_type == "INIT" or msg_type == "START"
27
+ # Init and start indicate that a page of work has been started on the server
28
+ if @jobs_started.present?
29
+ @jobs_started += 1
30
+ else
31
+ @jobs_started = 1
32
+ end
33
+ elsif msg_type == "STOP"
34
+ # Stop indicates that a job has finished on the server
35
+ if @jobs_completed.present?
36
+ @jobs_completed += 1
37
+ else
38
+ @jobs_completed = 1
39
+ end
40
+ end
41
+
42
+ # Update the finished state
43
+ # The query is finished if we have started some jobs, we have finished as many as we started, and we have started as many as we have spawned
44
+ # There is a +1 on jobs_spawned because there is an initial spawn to cover initialising all of the jobs for the query
45
+ @_finished = (@jobs_started == @jobs_completed and @jobs_spawned + 1 == @jobs_started and @jobs_started > 0)
46
+
47
+ # These error conditions mean the query has been terminated on the server
48
+ # It either errored on the import.io end, the user was not logged in, or the query was cancelled on the server
49
+ if msg_type == "ERROR" or msg_type == "UNAUTH" or msg_type == "CANCEL"
50
+ @_finished = true
51
+ end
52
+
53
+ # Now we have processed the query state, we can return the data from the message back to listeners
54
+ @_callback.call(self, data)
55
+ end
56
+
57
+ def finished
58
+ # Returns boolean - true if the query has been completed or terminated
59
+ return @_finished
60
+ end
61
+ end
@@ -0,0 +1,327 @@
1
+ class ImportIoSession
2
+ # Session manager, used for managing the message channel, sending queries and receiving data
3
+
4
+ def initialize(io, host="https://query.import.io", user_id=nil, api_key=nil, proxy_host=nil, proxy_port=nil)
5
+ # Initialises the client library with its configuration
6
+ @io = io
7
+ @msg_id = 1
8
+ @client_id = nil
9
+ @url = "#{host}/query/comet/"
10
+ @messaging_channel = "/messaging"
11
+ @queries = Hash.new
12
+ @user_id = user_id
13
+ @api_key = api_key
14
+ @queue = Queue.new
15
+ @connected = false
16
+ @connecting = false
17
+ @disconnecting = false
18
+ @polling = false
19
+ # These variables serve to identify this client and its version to the server
20
+ @clientName = "import.io Ruby client"
21
+ @clientVersion = "2.0.0"
22
+ @cj = HTTP::CookieJar.new
23
+ @proxy_host = proxy_host
24
+ @proxy_port = proxy_port
25
+ end
26
+
27
+ # We use this only for a specific test case
28
+ attr_reader :client_id
29
+ attr_writer :client_id
30
+ attr_reader :connected
31
+
32
+ def make_request(url, data)
33
+ # Helper method that generates a request object
34
+ uri = URI(url)
35
+ request = Net::HTTP::Post.new(uri.request_uri)
36
+ request.body = data
37
+ http = Net::HTTP.new(uri.host, uri.port, @proxy_host, @proxy_port)
38
+ http.use_ssl = uri.scheme == "https"
39
+ return uri, http, request
40
+ end
41
+
42
+ def open(uri, http, request)
43
+ # Makes a network request
44
+ response = http.request(request)
45
+ cookies = response.get_fields("set-cookie")
46
+ if cookies != nil
47
+ cookies.each { |value|
48
+ @cj.parse(value, uri)
49
+ }
50
+ end
51
+ return response
52
+ end
53
+
54
+ def encode(dict)
55
+ # Encodes a dictionary to x-www-form format
56
+ dict.map{|k,v| "#{CGI.escape(k)}=#{CGI.escape(v)}"}.join("&")
57
+ end
58
+
59
+ def login(username, password, host="https://api.import.io")
60
+ # If you want to use cookie-based authentication, this method will log you in with a username and password to get a session
61
+ data = encode({'username' => username, 'password'=> password})
62
+ uri, http, req = make_request("#{host}/auth/login", data )
63
+ r = open(uri, http, req)
64
+
65
+ if r.code != "200"
66
+ raise "Could not log in, code #{r.code}"
67
+ end
68
+ end
69
+
70
+ def request(channel, path="", data={}, throw=true)
71
+ # Helper method that makes a generic request on the messaging channel
72
+
73
+ # These are CometD configuration values that are common to all requests we need to send
74
+ data["channel"] = channel
75
+ data["connectionType"] = "long-polling"
76
+
77
+ # We need to increment the message ID with each request that we send
78
+ data["id"] = @msg_id
79
+ @msg_id += 1
80
+
81
+ # If we have a client ID, then we need to send that (will be provided on handshake)
82
+ if @client_id != nil
83
+ data["clientId"] = @client_id
84
+ end
85
+
86
+ # Build the URL that we are going to request
87
+ url = "#{@url}#{path}"
88
+
89
+ # If the user has chosen API key authentication, we need to send the API key with each request
90
+ if @api_key != nil
91
+ q = encode({ "_user" => @user_id, "_apikey" => @api_key })
92
+ url = "#{url}?#{q}"
93
+ end
94
+
95
+ # Build the request object we are going to use to initialise the request
96
+ body = JSON.dump([data])
97
+ uri, http, request = make_request(url, body)
98
+ request.content_type = "application/json;charset=UTF-8"
99
+ request["Cookie"] = HTTP::Cookie.cookie_value(@cj.cookies(uri))
100
+ request["import-io-client"] = @clientName
101
+ request["import-io-client-version"] = @clientVersion
102
+
103
+ # Send the request itself
104
+ response = open(uri, http, request)
105
+
106
+ # Don't process the response if we've disconnected in the meantime
107
+ if !@connected and !@connecting
108
+ return
109
+ end
110
+
111
+ # If the server responds non-200 we have a serious issue (configuration wrong or server down)
112
+ if response.code != "200"
113
+ error_message = "Unable to connect to import.io, status #{response.code} for url #{url}"
114
+ if throw
115
+ raise error_message
116
+ else
117
+ puts error_message
118
+ end
119
+ end
120
+
121
+ response.body = JSON.parse(response.body)
122
+
123
+ # Iterate through each of the messages in the response content
124
+ for msg in response.body do
125
+ # If the message is not successful, i.e. an import.io server error has occurred, decide what action to take
126
+ if msg.has_key?("successful") and msg["successful"] != true
127
+ error_message = "Unsuccessful request: #{msg}"
128
+ if !@disconnecting and @connected and !@connecting
129
+ # If we get a 402 unknown client we need to reconnect
130
+ if msg["error"] == "402::Unknown client"
131
+ puts "402 received, reconnecting"
132
+ @io.reconnect()
133
+ elsif throw
134
+ raise error_message
135
+ else
136
+ puts error_message
137
+ end
138
+ else
139
+ next
140
+ end
141
+ end
142
+
143
+ # Ignore messages that come back on a CometD channel that we have not subscribed to
144
+ if msg["channel"] != @messaging_channel
145
+ next
146
+ end
147
+
148
+ # Now we have a valid message on the right channel, queue it up to be processed
149
+ @queue.push(msg["data"])
150
+ end
151
+
152
+ return response
153
+ end
154
+
155
+ def handshake
156
+ # This method uses the request helper to make a CometD handshake request to register the client on the server
157
+ handshake = request("/meta/handshake", path="handshake", data={"version"=>"1.0","minimumVersion"=>"0.9","supportedConnectionTypes"=>["long-polling"],"advice"=>{"timeout"=>60000,"interval"=>0}})
158
+
159
+ if handshake == nil
160
+ return
161
+ end
162
+
163
+ # Set the Client ID from the handshake's response
164
+ @client_id = handshake.body[0]["clientId"]
165
+ end
166
+
167
+ def subscribe(channel)
168
+ # This method uses the request helper to issue a CometD subscription request for this client on the server
169
+ return request("/meta/subscribe", "", {"subscription"=>channel})
170
+ end
171
+
172
+ def connect
173
+ # Connect this client to the import.io server if not already connected
174
+ # Don't connect again if we're already connected
175
+ if @connected || @connecting
176
+ return
177
+ end
178
+
179
+ @connecting = true
180
+
181
+ # Do the hanshake request to register the client on the server
182
+ handshake
183
+
184
+ # Register this client with a subscription to our chosen message channel
185
+ subscribe(@messaging_channel)
186
+
187
+ # Now we are subscribed, we can set the client as connected
188
+ @connected = true
189
+
190
+ # Ruby's HTTP requests are synchronous - so that user apps can run while we are waiting for long connections
191
+ # from the import.io server, we need to pass the long-polling connection off to a thread so it doesn't block
192
+ # anything else
193
+ @threads = []
194
+ @threads << Thread.new(self) { |context|
195
+ context.poll
196
+ }
197
+
198
+ # Similarly with the polling, we need to handle queued messages in a separate thread too
199
+ @threads << Thread.new(self) { |context|
200
+ context.poll_queue
201
+ }
202
+
203
+ @connecting = false
204
+ end
205
+
206
+ def disconnect
207
+ # Call this method to ask the client library to disconnect from the import.io server
208
+ # It is best practice to disconnect when you are finished with querying, so as to clean
209
+ # up resources on both the client and server
210
+
211
+ # Maintain a local value of the queries, and then erase them from the class
212
+ q = @queries.clone
213
+ @queries = Hash.new
214
+
215
+ # Set the flag to notify handlers that we are disconnecting, i.e. open connect calls will fail
216
+ @disconnecting = true
217
+
218
+ # Set the connection status flag in the library to prevent any other requests going out
219
+ @connected = false
220
+
221
+ # Make the disconnect request to the server
222
+ request("/meta/disconnect");
223
+
224
+ # Now we are disconnected we need to remove the client ID
225
+ @client_id = nil
226
+
227
+ # We are done disconnecting so reset the flag
228
+ @disconnecting = false
229
+
230
+ # Send a "disconnected" message to all of the current queries, and then remove them
231
+ q.each { |key, query|
232
+ query._on_message({"type"=>"DISCONNECT","requestId"=>key})
233
+ }
234
+ end
235
+
236
+ def stop
237
+ # This method stops all of the threads that are currently running
238
+ @threads.each { |thread|
239
+ thread.terminate
240
+ }
241
+ end
242
+
243
+ def join
244
+ # This method joins the threads that are running together, so we can wait for them to be finished
245
+ while @connected
246
+ if @queries.length == 0
247
+ # When there are no more queries, stop all the threads
248
+ stop()
249
+ return
250
+ end
251
+ sleep 1
252
+ end
253
+ end
254
+
255
+ def poll_queue
256
+ # This method is called in a new thread to poll the queue of messages returned from the server
257
+ # and process them
258
+
259
+ # This while will mean the thread keeps going until the client library is disconnected
260
+ while @connected
261
+ begin
262
+ # Attempt to process the last message on the queue
263
+ process_message @queue.pop
264
+ rescue => exception
265
+ puts exception.backtrace
266
+ end
267
+ end
268
+ end
269
+
270
+ def poll
271
+ # This method is called in a new thread to open long-polling HTTP connections to the import.io
272
+ # CometD server so that we can wait for any messages that the server needs to send to us
273
+
274
+ if @polling
275
+ return
276
+ end
277
+
278
+ @polling = true
279
+
280
+ # While loop means we keep making connections until manually disconnected
281
+ while @connected
282
+ # Use the request helper to make the connect call to the CometD endpoint
283
+ request("/meta/connect", "connect", {}, false)
284
+ end
285
+
286
+ @polling = false
287
+ end
288
+
289
+ def process_message(data)
290
+ # This method is called by the queue poller to handle messages that are received from the import.io
291
+ # CometD server
292
+ begin
293
+ # First we need to look up which query object the message corresponds to, based on its request ID
294
+ request_id = data["requestId"]
295
+ query = @queries[request_id]
296
+
297
+ # If we don't recognise the client ID, then do not process the message
298
+ if query == nil
299
+ puts "No open query #{query}:"
300
+ puts JSON.pretty_generate(data)
301
+ return
302
+ end
303
+
304
+ # Call the message callback on the query object with the data
305
+ query._on_message(data)
306
+
307
+ # Clean up the query map if the query itself is finished
308
+ if query.finished
309
+ @queries.delete(request_id)
310
+ end
311
+ end
312
+ end
313
+
314
+ def query(query, callback)
315
+ # This method takes an import.io Query object and issues it to the server, calling the callback
316
+ # whenever a relevant message is received
317
+
318
+ # Set the request ID to a random GUID
319
+ # This allows us to track which messages correspond to which query
320
+ query["requestId"] = SecureRandom.uuid
321
+ # Construct a new query state tracker and store it in our map of currently running queries
322
+ @queries[query["requestId"]] = ImportIoQuery.new(callback, query)
323
+ # Issue the query to the server
324
+ request("/service/query", "", { "data"=>query })
325
+ end
326
+
327
+ end
@@ -1,3 +1,3 @@
1
1
  module ShopikonImportIoConnector
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: shopikon_import_io_connector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paweł Sobolewski
@@ -65,6 +65,10 @@ files:
65
65
  - README.md
66
66
  - Rakefile
67
67
  - lib/shopikon_import_io_connector.rb
68
+ - lib/shopikon_import_io_connector/curl_request_builder.rb
69
+ - lib/shopikon_import_io_connector/import_io.rb
70
+ - lib/shopikon_import_io_connector/import_io_query.rb
71
+ - lib/shopikon_import_io_connector/import_io_session.rb
68
72
  - lib/shopikon_import_io_connector/version.rb
69
73
  - shopikon_import_io_connector.gemspec
70
74
  homepage: http://www.shopikon.com