importio 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/importio.rb +520 -0
  3. metadata +43 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: c88411bc3b3ad7a892cee030ccd84e3f103ad9b6
4
+ data.tar.gz: 8bbe03c69a4befab391c34131180008d0b98c1b9
5
+ SHA512:
6
+ metadata.gz: d54dc02a3a35b5acfd1662bb794bcacb700b28a58a93d6f24061e085b52dfa95f91d7a91167c4a795abc19c2563f35a75b14e056c3e75ca5d45dbacf926e6273
7
+ data.tar.gz: e1011c09bed2b57f46e94e4fafe859a695c349dcd14364d6dc03e47ef4faa6ee3b065da58f89f9206b201ed1f0decb6054fa5ba7e8851971f4e524b1bde9e1e0
data/lib/importio.rb ADDED
@@ -0,0 +1,520 @@
1
+ #
2
+ # import.io client library - client classes
3
+ #
4
+ # This file contains the main classes required to connect to and query import.io APIs
5
+ #
6
+ # Dependencies: Ruby 1.9, http-cookie
7
+ #
8
+ # @author: dev@import.io
9
+ # @source: https://github.com/import-io/importio-client-libs/tree/master/python
10
+ #
11
+
12
+ require "net/http"
13
+ require "uri"
14
+ require "thread"
15
+ require "http-cookie"
16
+ require "cgi"
17
+ require "json"
18
+ require "securerandom"
19
+
20
+ class Query
21
+ # This class represents a single query to the import.io platform
22
+
23
+ def initialize(callback, query)
24
+ # Initialises the new query object with inputs and default state
25
+ @query = query
26
+ @jobs_spawned = 0
27
+ @jobs_started = 0
28
+ @jobs_completed = 0
29
+ @_finished = false
30
+ @_callback = callback
31
+ end
32
+
33
+ def _on_message(data)
34
+ # Method that is called when a new message is received
35
+ #
36
+ # Check the type of the message to see what we are working with
37
+ msg_type = data["type"]
38
+ if msg_type == "SPAWN"
39
+ # A spawn message means that a new job is being initialised on the server
40
+ @jobs_spawned+=1
41
+ elsif msg_type == "INIT" or msg_type == "START"
42
+ # Init and start indicate that a page of work has been started on the server
43
+ @jobs_started+=1
44
+ elsif msg_type == "STOP"
45
+ # Stop indicates that a job has finished on the server
46
+ @jobs_completed+=1
47
+ end
48
+
49
+ # Update the finished state
50
+ # The query is finished if we have started some jobs, we have finished as many as we started, and we have started as many as we have spawned
51
+ # There is a +1 on jobs_spawned because there is an initial spawn to cover initialising all of the jobs for the query
52
+ @_finished = (@jobs_started == @jobs_completed and @jobs_spawned + 1 == @jobs_started and @jobs_started > 0)
53
+
54
+ # These error conditions mean the query has been terminated on the server
55
+ # It either errored on the import.io end, the user was not logged in, or the query was cancelled on the server
56
+ if msg_type == "ERROR" or msg_type == "UNAUTH" or msg_type == "CANCEL"
57
+ @_finished = true
58
+ end
59
+
60
+ # Now we have processed the query state, we can return the data from the message back to listeners
61
+ @_callback.call(self, data)
62
+ end
63
+
64
+ def finished
65
+ # Returns boolean - true if the query has been completed or terminated
66
+ return @_finished
67
+ end
68
+ end
69
+
70
+ class Importio
71
+ # The main import.io client, used for managing the message channel and sending queries and receiving data
72
+
73
+ def initialize(user_id=nil, api_key=nil, host="https://query.import.io")
74
+ # Initialises the client library with its configuration
75
+ @host = host
76
+ @proxy_host = nil
77
+ @proxy_port = nil
78
+ @user_id = user_id
79
+ @api_key = api_key
80
+ @username = nil
81
+ @password = nil
82
+ @login_host = nil
83
+ @session = nil
84
+ @queue = Queue.new
85
+ end
86
+
87
+ # We use this only for a specific test case
88
+ attr_reader :session
89
+
90
+ def proxy(host, port)
91
+ # If you want to configure an HTTP proxy, use this method to do so
92
+ @proxy_host = host
93
+ @proxy_port = port
94
+ end
95
+
96
+ def login(username, password, host="https://api.import.io")
97
+ # If you want to use cookie-based authentication, this method will log you in with a username and password to get a session
98
+ @username = username
99
+ @password = password
100
+ @login_host = host
101
+
102
+ # If we don't have a session, then connect one
103
+ if @session == nil
104
+ connect()
105
+ end
106
+
107
+ # Once connected, do the login
108
+ @session.login(@username, @password, @login_host)
109
+ end
110
+
111
+ def reconnect
112
+ # Reconnects the client to the platform by establishing a new session
113
+
114
+ # Disconnect an old session, if there is one
115
+ if @session != nil
116
+ disconnect()
117
+ end
118
+
119
+ if @username != nil
120
+ login(@username, @password, @login_host)
121
+ else
122
+ connect()
123
+ end
124
+ end
125
+
126
+ def connect
127
+ # Connect this client to the import.io server if not already connected
128
+
129
+ # Check if there is a session already first
130
+ if @session != nil
131
+ return
132
+ end
133
+
134
+ @session = Session::new(self, @host, @user_id, @api_key, @proxy_host, @proxy_port)
135
+ @session.connect()
136
+
137
+ # This should be a @queue.clone, but this errors in 2.1 branch of Ruby: #9718
138
+ # q = @queue.clone
139
+ q = Queue.new
140
+ until @queue.empty?
141
+ q.push(@queue.pop(true))
142
+ end
143
+ @queue = Queue.new
144
+
145
+ until q.empty?
146
+ query_data = q.pop(true) rescue nil
147
+ if query_data
148
+ query(query_data.query, query_data.callback)
149
+ end
150
+ end
151
+ end
152
+
153
+ def disconnect
154
+ # Call this method to ask the client library to disconnect from the import.io server
155
+ # It is best practice to disconnect when you are finished with querying, so as to clean
156
+ # up resources on both the client and server
157
+
158
+ if @session != nil
159
+ @session.disconnect()
160
+ @session = nil
161
+ end
162
+ end
163
+
164
+ def stop
165
+ # This method stops all of the threads that are currently running in the session
166
+ if @session != nil
167
+ return @session.stop()
168
+ end
169
+ end
170
+
171
+ def join
172
+ # This method joins the threads that are running together in the session, so we can wait for them to be finished
173
+ if @session != nil
174
+ return @session.join()
175
+ end
176
+ end
177
+
178
+ def query(query, callback)
179
+ # This method takes an import.io Query object and either queues it, or issues it to the server
180
+ # depending on whether the session is connected
181
+
182
+ if @session == nil || !@session.connected
183
+ @queue << {"query"=>query,"callback"=>callback}
184
+ return
185
+ end
186
+
187
+ @session.query(query, callback)
188
+ end
189
+
190
+ end
191
+
192
+ class Session
193
+ # Session manager, used for managing the message channel, sending queries and receiving data
194
+
195
+ def initialize(io, host="https://query.import.io", user_id=nil, api_key=nil, proxy_host=nil, proxy_port=nil)
196
+ # Initialises the client library with its configuration
197
+ @io = io
198
+ @msg_id = 1
199
+ @client_id = nil
200
+ @url = "#{host}/query/comet/"
201
+ @messaging_channel = "/messaging"
202
+ @queries = Hash.new
203
+ @user_id = user_id
204
+ @api_key = api_key
205
+ @queue = Queue.new
206
+ @connected = false
207
+ @connecting = false
208
+ @disconnecting = false
209
+ @polling = false
210
+ # These variables serve to identify this client and its version to the server
211
+ @clientName = "import.io Ruby client"
212
+ @clientVersion = "2.0.0"
213
+ @cj = HTTP::CookieJar.new
214
+ @proxy_host = proxy_host
215
+ @proxy_port = proxy_port
216
+ end
217
+
218
+ # We use this only for a specific test case
219
+ attr_reader :client_id
220
+ attr_writer :client_id
221
+ attr_reader :connected
222
+
223
+ def make_request(url, data)
224
+ # Helper method that generates a request object
225
+ uri = URI(url)
226
+ request = Net::HTTP::Post.new(uri.request_uri)
227
+ request.body = data
228
+ http = Net::HTTP.new(uri.host, uri.port, @proxy_host, @proxy_port)
229
+ http.use_ssl = uri.scheme == "https"
230
+ return uri, http, request
231
+ end
232
+
233
+ def open(uri, http, request)
234
+ # Makes a network request
235
+ response = http.request(request)
236
+ cookies = response.get_fields("set-cookie")
237
+ if cookies != nil
238
+ cookies.each { |value|
239
+ @cj.parse(value, uri)
240
+ }
241
+ end
242
+ return response
243
+ end
244
+
245
+ def encode(dict)
246
+ # Encodes a dictionary to x-www-form format
247
+ dict.map{|k,v| "#{CGI.escape(k)}=#{CGI.escape(v)}"}.join("&")
248
+ end
249
+
250
+ def login(username, password, host="https://api.import.io")
251
+ # If you want to use cookie-based authentication, this method will log you in with a username and password to get a session
252
+ data = encode({'username' => username, 'password'=> password})
253
+ uri, http, req = make_request("#{host}/auth/login", data )
254
+ r = open(uri, http, req)
255
+
256
+ if r.code != "200"
257
+ raise "Could not log in, code #{r.code}"
258
+ end
259
+ end
260
+
261
+ def request(channel, path="", data={}, throw=true)
262
+ # Helper method that makes a generic request on the messaging channel
263
+
264
+ # These are CometD configuration values that are common to all requests we need to send
265
+ data["channel"] = channel
266
+ data["connectionType"] = "long-polling"
267
+
268
+ # We need to increment the message ID with each request that we send
269
+ data["id"] = @msg_id
270
+ @msg_id += 1
271
+
272
+ # If we have a client ID, then we need to send that (will be provided on handshake)
273
+ if @client_id != nil
274
+ data["clientId"] = @client_id
275
+ end
276
+
277
+ # Build the URL that we are going to request
278
+ url = "#{@url}#{path}"
279
+
280
+ # If the user has chosen API key authentication, we need to send the API key with each request
281
+ if @api_key != nil
282
+ q = encode({ "_user" => @user_id, "_apikey" => @api_key })
283
+ url = "#{url}?#{q}"
284
+ end
285
+
286
+ # Build the request object we are going to use to initialise the request
287
+ body = JSON.dump([data])
288
+ uri, http, request = make_request(url, body)
289
+ request.content_type = "application/json;charset=UTF-8"
290
+ request["Cookie"] = HTTP::Cookie.cookie_value(@cj.cookies(uri))
291
+ request["import-io-client"] = @clientName
292
+ request["import-io-client-version"] = @clientVersion
293
+
294
+ # Send the request itself
295
+ response = open(uri, http, request)
296
+
297
+ # Don't process the response if we've disconnected in the meantime
298
+ if !@connected and !@connecting
299
+ return
300
+ end
301
+
302
+ # If the server responds non-200 we have a serious issue (configuration wrong or server down)
303
+ if response.code != "200"
304
+ error_message = "Unable to connect to import.io, status #{response.code} for url #{url}"
305
+ if throw
306
+ raise error_message
307
+ else
308
+ puts error_message
309
+ end
310
+ end
311
+
312
+ response.body = JSON.parse(response.body)
313
+
314
+ # Iterate through each of the messages in the response content
315
+ for msg in response.body do
316
+ # If the message is not successful, i.e. an import.io server error has occurred, decide what action to take
317
+ if msg.has_key?("successful") and msg["successful"] != true
318
+ error_message = "Unsuccessful request: #{msg}"
319
+ if !@disconnecting and @connected and !@connecting
320
+ # If we get a 402 unknown client we need to reconnect
321
+ if msg["error"] == "402::Unknown client"
322
+ puts "402 received, reconnecting"
323
+ @io.reconnect()
324
+ elsif throw
325
+ raise error_message
326
+ else
327
+ puts error_message
328
+ end
329
+ else
330
+ next
331
+ end
332
+ end
333
+
334
+ # Ignore messages that come back on a CometD channel that we have not subscribed to
335
+ if msg["channel"] != @messaging_channel
336
+ next
337
+ end
338
+
339
+ # Now we have a valid message on the right channel, queue it up to be processed
340
+ @queue.push(msg["data"])
341
+ end
342
+
343
+ return response
344
+ end
345
+
346
+ def handshake
347
+ # This method uses the request helper to make a CometD handshake request to register the client on the server
348
+ handshake = request("/meta/handshake", path="handshake", data={"version"=>"1.0","minimumVersion"=>"0.9","supportedConnectionTypes"=>["long-polling"],"advice"=>{"timeout"=>60000,"interval"=>0}})
349
+
350
+ if handshake == nil
351
+ return
352
+ end
353
+
354
+ # Set the Client ID from the handshake's response
355
+ @client_id = handshake.body[0]["clientId"]
356
+ end
357
+
358
+ def subscribe(channel)
359
+ # This method uses the request helper to issue a CometD subscription request for this client on the server
360
+ return request("/meta/subscribe", "", {"subscription"=>channel})
361
+ end
362
+
363
+ def connect
364
+ # Connect this client to the import.io server if not already connected
365
+ # Don't connect again if we're already connected
366
+ if @connected || @connecting
367
+ return
368
+ end
369
+
370
+ @connecting = true
371
+
372
+ # Do the hanshake request to register the client on the server
373
+ handshake
374
+
375
+ # Register this client with a subscription to our chosen message channel
376
+ subscribe(@messaging_channel)
377
+
378
+ # Now we are subscribed, we can set the client as connected
379
+ @connected = true
380
+
381
+ # Ruby's HTTP requests are synchronous - so that user apps can run while we are waiting for long connections
382
+ # from the import.io server, we need to pass the long-polling connection off to a thread so it doesn't block
383
+ # anything else
384
+ @threads = []
385
+ @threads << Thread.new(self) { |context|
386
+ context.poll
387
+ }
388
+
389
+ # Similarly with the polling, we need to handle queued messages in a separate thread too
390
+ @threads << Thread.new(self) { |context|
391
+ context.poll_queue
392
+ }
393
+
394
+ @connecting = false
395
+ end
396
+
397
+ def disconnect
398
+ # Call this method to ask the client library to disconnect from the import.io server
399
+ # It is best practice to disconnect when you are finished with querying, so as to clean
400
+ # up resources on both the client and server
401
+
402
+ # Maintain a local value of the queries, and then erase them from the class
403
+ q = @queries.clone
404
+ @queries = Hash.new
405
+
406
+ # Set the flag to notify handlers that we are disconnecting, i.e. open connect calls will fail
407
+ @disconnecting = true
408
+
409
+ # Set the connection status flag in the library to prevent any other requests going out
410
+ @connected = false
411
+
412
+ # Make the disconnect request to the server
413
+ request("/meta/disconnect");
414
+
415
+ # Now we are disconnected we need to remove the client ID
416
+ @client_id = nil
417
+
418
+ # We are done disconnecting so reset the flag
419
+ @disconnecting = false
420
+
421
+ # Send a "disconnected" message to all of the current queries, and then remove them
422
+ q.each { |key, query|
423
+ query._on_message({"type"=>"DISCONNECT","requestId"=>key})
424
+ }
425
+ end
426
+
427
+ def stop
428
+ # This method stops all of the threads that are currently running
429
+ @threads.each { |thread|
430
+ thread.terminate
431
+ }
432
+ end
433
+
434
+ def join
435
+ # This method joins the threads that are running together, so we can wait for them to be finished
436
+ while @connected
437
+ if @queries.length == 0
438
+ # When there are no more queries, stop all the threads
439
+ stop()
440
+ return
441
+ end
442
+ sleep 1
443
+ end
444
+ end
445
+
446
+ def poll_queue
447
+ # This method is called in a new thread to poll the queue of messages returned from the server
448
+ # and process them
449
+
450
+ # This while will mean the thread keeps going until the client library is disconnected
451
+ while @connected
452
+ begin
453
+ # Attempt to process the last message on the queue
454
+ process_message @queue.pop
455
+ rescue => exception
456
+ puts exception.backtrace
457
+ end
458
+ end
459
+ end
460
+
461
+ def poll
462
+ # This method is called in a new thread to open long-polling HTTP connections to the import.io
463
+ # CometD server so that we can wait for any messages that the server needs to send to us
464
+
465
+ if @polling
466
+ return
467
+ end
468
+
469
+ @polling = true
470
+
471
+ # While loop means we keep making connections until manually disconnected
472
+ while @connected
473
+ # Use the request helper to make the connect call to the CometD endpoint
474
+ request("/meta/connect", "connect", {}, false)
475
+ end
476
+
477
+ @polling = false
478
+ end
479
+
480
+ def process_message(data)
481
+ # This method is called by the queue poller to handle messages that are received from the import.io
482
+ # CometD server
483
+ begin
484
+ # First we need to look up which query object the message corresponds to, based on its request ID
485
+ request_id = data["requestId"]
486
+ query = @queries[request_id]
487
+
488
+ # If we don't recognise the client ID, then do not process the message
489
+ if query == nil
490
+ puts "No open query #{query}:"
491
+ puts JSON.pretty_generate(data)
492
+ return
493
+ end
494
+
495
+ # Call the message callback on the query object with the data
496
+ query._on_message(data)
497
+
498
+ # Clean up the query map if the query itself is finished
499
+ if query.finished
500
+ @queries.delete(request_id)
501
+ end
502
+ rescue => exception
503
+ puts exception.backtrace
504
+ end
505
+ end
506
+
507
+ def query(query, callback)
508
+ # This method takes an import.io Query object and issues it to the server, calling the callback
509
+ # whenever a relevant message is received
510
+
511
+ # Set the request ID to a random GUID
512
+ # This allows us to track which messages correspond to which query
513
+ query["requestId"] = SecureRandom.uuid
514
+ # Construct a new query state tracker and store it in our map of currently running queries
515
+ @queries[query["requestId"]] = Query::new(callback, query)
516
+ # Issue the query to the server
517
+ request("/service/query", "", { "data"=>query })
518
+ end
519
+
520
+ end
metadata ADDED
@@ -0,0 +1,43 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: importio
3
+ version: !ruby/object:Gem::Version
4
+ version: 2.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Import.io developers
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-06-18 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Connect to the import.io APIs using your Ruby application
14
+ email: dev@import.io
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/importio.rb
20
+ homepage: https://import.io/data/integrate/#ruby
21
+ licenses: []
22
+ metadata: {}
23
+ post_install_message:
24
+ rdoc_options: []
25
+ require_paths:
26
+ - lib
27
+ required_ruby_version: !ruby/object:Gem::Requirement
28
+ requirements:
29
+ - - ">="
30
+ - !ruby/object:Gem::Version
31
+ version: '0'
32
+ required_rubygems_version: !ruby/object:Gem::Requirement
33
+ requirements:
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: '0'
37
+ requirements: []
38
+ rubyforge_project:
39
+ rubygems_version: 2.2.2
40
+ signing_key:
41
+ specification_version: 4
42
+ summary: Ruby client library for import.io
43
+ test_files: []