importio 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/importio.rb +520 -0
  3. metadata +43 -0
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: c88411bc3b3ad7a892cee030ccd84e3f103ad9b6
4
+ data.tar.gz: 8bbe03c69a4befab391c34131180008d0b98c1b9
5
+ SHA512:
6
+ metadata.gz: d54dc02a3a35b5acfd1662bb794bcacb700b28a58a93d6f24061e085b52dfa95f91d7a91167c4a795abc19c2563f35a75b14e056c3e75ca5d45dbacf926e6273
7
+ data.tar.gz: e1011c09bed2b57f46e94e4fafe859a695c349dcd14364d6dc03e47ef4faa6ee3b065da58f89f9206b201ed1f0decb6054fa5ba7e8851971f4e524b1bde9e1e0
data/lib/importio.rb ADDED
@@ -0,0 +1,520 @@
1
+ #
2
+ # import.io client library - client classes
3
+ #
4
+ # This file contains the main classes required to connect to and query import.io APIs
5
+ #
6
+ # Dependencies: Ruby 1.9, http-cookie
7
+ #
8
+ # @author: dev@import.io
9
+ # @source: https://github.com/import-io/importio-client-libs/tree/master/python
10
+ #
11
+
12
+ require "net/http"
13
+ require "uri"
14
+ require "thread"
15
+ require "http-cookie"
16
+ require "cgi"
17
+ require "json"
18
+ require "securerandom"
19
+
20
+ class Query
21
+ # This class represents a single query to the import.io platform
22
+
23
+ def initialize(callback, query)
24
+ # Initialises the new query object with inputs and default state
25
+ @query = query
26
+ @jobs_spawned = 0
27
+ @jobs_started = 0
28
+ @jobs_completed = 0
29
+ @_finished = false
30
+ @_callback = callback
31
+ end
32
+
33
+ def _on_message(data)
34
+ # Method that is called when a new message is received
35
+ #
36
+ # Check the type of the message to see what we are working with
37
+ msg_type = data["type"]
38
+ if msg_type == "SPAWN"
39
+ # A spawn message means that a new job is being initialised on the server
40
+ @jobs_spawned+=1
41
+ elsif msg_type == "INIT" or msg_type == "START"
42
+ # Init and start indicate that a page of work has been started on the server
43
+ @jobs_started+=1
44
+ elsif msg_type == "STOP"
45
+ # Stop indicates that a job has finished on the server
46
+ @jobs_completed+=1
47
+ end
48
+
49
+ # Update the finished state
50
+ # The query is finished if we have started some jobs, we have finished as many as we started, and we have started as many as we have spawned
51
+ # There is a +1 on jobs_spawned because there is an initial spawn to cover initialising all of the jobs for the query
52
+ @_finished = (@jobs_started == @jobs_completed and @jobs_spawned + 1 == @jobs_started and @jobs_started > 0)
53
+
54
+ # These error conditions mean the query has been terminated on the server
55
+ # It either errored on the import.io end, the user was not logged in, or the query was cancelled on the server
56
+ if msg_type == "ERROR" or msg_type == "UNAUTH" or msg_type == "CANCEL"
57
+ @_finished = true
58
+ end
59
+
60
+ # Now we have processed the query state, we can return the data from the message back to listeners
61
+ @_callback.call(self, data)
62
+ end
63
+
64
+ def finished
65
+ # Returns boolean - true if the query has been completed or terminated
66
+ return @_finished
67
+ end
68
+ end
69
+
70
+ class Importio
71
+ # The main import.io client, used for managing the message channel and sending queries and receiving data
72
+
73
+ def initialize(user_id=nil, api_key=nil, host="https://query.import.io")
74
+ # Initialises the client library with its configuration
75
+ @host = host
76
+ @proxy_host = nil
77
+ @proxy_port = nil
78
+ @user_id = user_id
79
+ @api_key = api_key
80
+ @username = nil
81
+ @password = nil
82
+ @login_host = nil
83
+ @session = nil
84
+ @queue = Queue.new
85
+ end
86
+
87
+ # We use this only for a specific test case
88
+ attr_reader :session
89
+
90
+ def proxy(host, port)
91
+ # If you want to configure an HTTP proxy, use this method to do so
92
+ @proxy_host = host
93
+ @proxy_port = port
94
+ end
95
+
96
+ def login(username, password, host="https://api.import.io")
97
+ # If you want to use cookie-based authentication, this method will log you in with a username and password to get a session
98
+ @username = username
99
+ @password = password
100
+ @login_host = host
101
+
102
+ # If we don't have a session, then connect one
103
+ if @session == nil
104
+ connect()
105
+ end
106
+
107
+ # Once connected, do the login
108
+ @session.login(@username, @password, @login_host)
109
+ end
110
+
111
+ def reconnect
112
+ # Reconnects the client to the platform by establishing a new session
113
+
114
+ # Disconnect an old session, if there is one
115
+ if @session != nil
116
+ disconnect()
117
+ end
118
+
119
+ if @username != nil
120
+ login(@username, @password, @login_host)
121
+ else
122
+ connect()
123
+ end
124
+ end
125
+
126
+ def connect
127
+ # Connect this client to the import.io server if not already connected
128
+
129
+ # Check if there is a session already first
130
+ if @session != nil
131
+ return
132
+ end
133
+
134
+ @session = Session::new(self, @host, @user_id, @api_key, @proxy_host, @proxy_port)
135
+ @session.connect()
136
+
137
+ # This should be a @queue.clone, but this errors in 2.1 branch of Ruby: #9718
138
+ # q = @queue.clone
139
+ q = Queue.new
140
+ until @queue.empty?
141
+ q.push(@queue.pop(true))
142
+ end
143
+ @queue = Queue.new
144
+
145
+ until q.empty?
146
+ query_data = q.pop(true) rescue nil
147
+ if query_data
148
+ query(query_data.query, query_data.callback)
149
+ end
150
+ end
151
+ end
152
+
153
+ def disconnect
154
+ # Call this method to ask the client library to disconnect from the import.io server
155
+ # It is best practice to disconnect when you are finished with querying, so as to clean
156
+ # up resources on both the client and server
157
+
158
+ if @session != nil
159
+ @session.disconnect()
160
+ @session = nil
161
+ end
162
+ end
163
+
164
+ def stop
165
+ # This method stops all of the threads that are currently running in the session
166
+ if @session != nil
167
+ return @session.stop()
168
+ end
169
+ end
170
+
171
+ def join
172
+ # This method joins the threads that are running together in the session, so we can wait for them to be finished
173
+ if @session != nil
174
+ return @session.join()
175
+ end
176
+ end
177
+
178
+ def query(query, callback)
179
+ # This method takes an import.io Query object and either queues it, or issues it to the server
180
+ # depending on whether the session is connected
181
+
182
+ if @session == nil || !@session.connected
183
+ @queue << {"query"=>query,"callback"=>callback}
184
+ return
185
+ end
186
+
187
+ @session.query(query, callback)
188
+ end
189
+
190
+ end
191
+
192
+ class Session
193
+ # Session manager, used for managing the message channel, sending queries and receiving data
194
+
195
+ def initialize(io, host="https://query.import.io", user_id=nil, api_key=nil, proxy_host=nil, proxy_port=nil)
196
+ # Initialises the client library with its configuration
197
+ @io = io
198
+ @msg_id = 1
199
+ @client_id = nil
200
+ @url = "#{host}/query/comet/"
201
+ @messaging_channel = "/messaging"
202
+ @queries = Hash.new
203
+ @user_id = user_id
204
+ @api_key = api_key
205
+ @queue = Queue.new
206
+ @connected = false
207
+ @connecting = false
208
+ @disconnecting = false
209
+ @polling = false
210
+ # These variables serve to identify this client and its version to the server
211
+ @clientName = "import.io Ruby client"
212
+ @clientVersion = "2.0.0"
213
+ @cj = HTTP::CookieJar.new
214
+ @proxy_host = proxy_host
215
+ @proxy_port = proxy_port
216
+ end
217
+
218
+ # We use this only for a specific test case
219
+ attr_reader :client_id
220
+ attr_writer :client_id
221
+ attr_reader :connected
222
+
223
+ def make_request(url, data)
224
+ # Helper method that generates a request object
225
+ uri = URI(url)
226
+ request = Net::HTTP::Post.new(uri.request_uri)
227
+ request.body = data
228
+ http = Net::HTTP.new(uri.host, uri.port, @proxy_host, @proxy_port)
229
+ http.use_ssl = uri.scheme == "https"
230
+ return uri, http, request
231
+ end
232
+
233
+ def open(uri, http, request)
234
+ # Makes a network request
235
+ response = http.request(request)
236
+ cookies = response.get_fields("set-cookie")
237
+ if cookies != nil
238
+ cookies.each { |value|
239
+ @cj.parse(value, uri)
240
+ }
241
+ end
242
+ return response
243
+ end
244
+
245
+ def encode(dict)
246
+ # Encodes a dictionary to x-www-form format
247
+ dict.map{|k,v| "#{CGI.escape(k)}=#{CGI.escape(v)}"}.join("&")
248
+ end
249
+
250
+ def login(username, password, host="https://api.import.io")
251
+ # If you want to use cookie-based authentication, this method will log you in with a username and password to get a session
252
+ data = encode({'username' => username, 'password'=> password})
253
+ uri, http, req = make_request("#{host}/auth/login", data )
254
+ r = open(uri, http, req)
255
+
256
+ if r.code != "200"
257
+ raise "Could not log in, code #{r.code}"
258
+ end
259
+ end
260
+
261
+ def request(channel, path="", data={}, throw=true)
262
+ # Helper method that makes a generic request on the messaging channel
263
+
264
+ # These are CometD configuration values that are common to all requests we need to send
265
+ data["channel"] = channel
266
+ data["connectionType"] = "long-polling"
267
+
268
+ # We need to increment the message ID with each request that we send
269
+ data["id"] = @msg_id
270
+ @msg_id += 1
271
+
272
+ # If we have a client ID, then we need to send that (will be provided on handshake)
273
+ if @client_id != nil
274
+ data["clientId"] = @client_id
275
+ end
276
+
277
+ # Build the URL that we are going to request
278
+ url = "#{@url}#{path}"
279
+
280
+ # If the user has chosen API key authentication, we need to send the API key with each request
281
+ if @api_key != nil
282
+ q = encode({ "_user" => @user_id, "_apikey" => @api_key })
283
+ url = "#{url}?#{q}"
284
+ end
285
+
286
+ # Build the request object we are going to use to initialise the request
287
+ body = JSON.dump([data])
288
+ uri, http, request = make_request(url, body)
289
+ request.content_type = "application/json;charset=UTF-8"
290
+ request["Cookie"] = HTTP::Cookie.cookie_value(@cj.cookies(uri))
291
+ request["import-io-client"] = @clientName
292
+ request["import-io-client-version"] = @clientVersion
293
+
294
+ # Send the request itself
295
+ response = open(uri, http, request)
296
+
297
+ # Don't process the response if we've disconnected in the meantime
298
+ if !@connected and !@connecting
299
+ return
300
+ end
301
+
302
+ # If the server responds non-200 we have a serious issue (configuration wrong or server down)
303
+ if response.code != "200"
304
+ error_message = "Unable to connect to import.io, status #{response.code} for url #{url}"
305
+ if throw
306
+ raise error_message
307
+ else
308
+ puts error_message
309
+ end
310
+ end
311
+
312
+ response.body = JSON.parse(response.body)
313
+
314
+ # Iterate through each of the messages in the response content
315
+ for msg in response.body do
316
+ # If the message is not successful, i.e. an import.io server error has occurred, decide what action to take
317
+ if msg.has_key?("successful") and msg["successful"] != true
318
+ error_message = "Unsuccessful request: #{msg}"
319
+ if !@disconnecting and @connected and !@connecting
320
+ # If we get a 402 unknown client we need to reconnect
321
+ if msg["error"] == "402::Unknown client"
322
+ puts "402 received, reconnecting"
323
+ @io.reconnect()
324
+ elsif throw
325
+ raise error_message
326
+ else
327
+ puts error_message
328
+ end
329
+ else
330
+ next
331
+ end
332
+ end
333
+
334
+ # Ignore messages that come back on a CometD channel that we have not subscribed to
335
+ if msg["channel"] != @messaging_channel
336
+ next
337
+ end
338
+
339
+ # Now we have a valid message on the right channel, queue it up to be processed
340
+ @queue.push(msg["data"])
341
+ end
342
+
343
+ return response
344
+ end
345
+
346
+ def handshake
347
+ # This method uses the request helper to make a CometD handshake request to register the client on the server
348
+ handshake = request("/meta/handshake", path="handshake", data={"version"=>"1.0","minimumVersion"=>"0.9","supportedConnectionTypes"=>["long-polling"],"advice"=>{"timeout"=>60000,"interval"=>0}})
349
+
350
+ if handshake == nil
351
+ return
352
+ end
353
+
354
+ # Set the Client ID from the handshake's response
355
+ @client_id = handshake.body[0]["clientId"]
356
+ end
357
+
358
+ def subscribe(channel)
359
+ # This method uses the request helper to issue a CometD subscription request for this client on the server
360
+ return request("/meta/subscribe", "", {"subscription"=>channel})
361
+ end
362
+
363
+ def connect
364
+ # Connect this client to the import.io server if not already connected
365
+ # Don't connect again if we're already connected
366
+ if @connected || @connecting
367
+ return
368
+ end
369
+
370
+ @connecting = true
371
+
372
+ # Do the hanshake request to register the client on the server
373
+ handshake
374
+
375
+ # Register this client with a subscription to our chosen message channel
376
+ subscribe(@messaging_channel)
377
+
378
+ # Now we are subscribed, we can set the client as connected
379
+ @connected = true
380
+
381
+ # Ruby's HTTP requests are synchronous - so that user apps can run while we are waiting for long connections
382
+ # from the import.io server, we need to pass the long-polling connection off to a thread so it doesn't block
383
+ # anything else
384
+ @threads = []
385
+ @threads << Thread.new(self) { |context|
386
+ context.poll
387
+ }
388
+
389
+ # Similarly with the polling, we need to handle queued messages in a separate thread too
390
+ @threads << Thread.new(self) { |context|
391
+ context.poll_queue
392
+ }
393
+
394
+ @connecting = false
395
+ end
396
+
397
+ def disconnect
398
+ # Call this method to ask the client library to disconnect from the import.io server
399
+ # It is best practice to disconnect when you are finished with querying, so as to clean
400
+ # up resources on both the client and server
401
+
402
+ # Maintain a local value of the queries, and then erase them from the class
403
+ q = @queries.clone
404
+ @queries = Hash.new
405
+
406
+ # Set the flag to notify handlers that we are disconnecting, i.e. open connect calls will fail
407
+ @disconnecting = true
408
+
409
+ # Set the connection status flag in the library to prevent any other requests going out
410
+ @connected = false
411
+
412
+ # Make the disconnect request to the server
413
+ request("/meta/disconnect");
414
+
415
+ # Now we are disconnected we need to remove the client ID
416
+ @client_id = nil
417
+
418
+ # We are done disconnecting so reset the flag
419
+ @disconnecting = false
420
+
421
+ # Send a "disconnected" message to all of the current queries, and then remove them
422
+ q.each { |key, query|
423
+ query._on_message({"type"=>"DISCONNECT","requestId"=>key})
424
+ }
425
+ end
426
+
427
+ def stop
428
+ # This method stops all of the threads that are currently running
429
+ @threads.each { |thread|
430
+ thread.terminate
431
+ }
432
+ end
433
+
434
+ def join
435
+ # This method joins the threads that are running together, so we can wait for them to be finished
436
+ while @connected
437
+ if @queries.length == 0
438
+ # When there are no more queries, stop all the threads
439
+ stop()
440
+ return
441
+ end
442
+ sleep 1
443
+ end
444
+ end
445
+
446
+ def poll_queue
447
+ # This method is called in a new thread to poll the queue of messages returned from the server
448
+ # and process them
449
+
450
+ # This while will mean the thread keeps going until the client library is disconnected
451
+ while @connected
452
+ begin
453
+ # Attempt to process the last message on the queue
454
+ process_message @queue.pop
455
+ rescue => exception
456
+ puts exception.backtrace
457
+ end
458
+ end
459
+ end
460
+
461
+ def poll
462
+ # This method is called in a new thread to open long-polling HTTP connections to the import.io
463
+ # CometD server so that we can wait for any messages that the server needs to send to us
464
+
465
+ if @polling
466
+ return
467
+ end
468
+
469
+ @polling = true
470
+
471
+ # While loop means we keep making connections until manually disconnected
472
+ while @connected
473
+ # Use the request helper to make the connect call to the CometD endpoint
474
+ request("/meta/connect", "connect", {}, false)
475
+ end
476
+
477
+ @polling = false
478
+ end
479
+
480
+ def process_message(data)
481
+ # This method is called by the queue poller to handle messages that are received from the import.io
482
+ # CometD server
483
+ begin
484
+ # First we need to look up which query object the message corresponds to, based on its request ID
485
+ request_id = data["requestId"]
486
+ query = @queries[request_id]
487
+
488
+ # If we don't recognise the client ID, then do not process the message
489
+ if query == nil
490
+ puts "No open query #{query}:"
491
+ puts JSON.pretty_generate(data)
492
+ return
493
+ end
494
+
495
+ # Call the message callback on the query object with the data
496
+ query._on_message(data)
497
+
498
+ # Clean up the query map if the query itself is finished
499
+ if query.finished
500
+ @queries.delete(request_id)
501
+ end
502
+ rescue => exception
503
+ puts exception.backtrace
504
+ end
505
+ end
506
+
507
+ def query(query, callback)
508
+ # This method takes an import.io Query object and issues it to the server, calling the callback
509
+ # whenever a relevant message is received
510
+
511
+ # Set the request ID to a random GUID
512
+ # This allows us to track which messages correspond to which query
513
+ query["requestId"] = SecureRandom.uuid
514
+ # Construct a new query state tracker and store it in our map of currently running queries
515
+ @queries[query["requestId"]] = Query::new(callback, query)
516
+ # Issue the query to the server
517
+ request("/service/query", "", { "data"=>query })
518
+ end
519
+
520
+ end
metadata ADDED
@@ -0,0 +1,43 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: importio
3
+ version: !ruby/object:Gem::Version
4
+ version: 2.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Import.io developers
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2014-06-18 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Connect to the import.io APIs using your Ruby application
14
+ email: dev@import.io
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - lib/importio.rb
20
+ homepage: https://import.io/data/integrate/#ruby
21
+ licenses: []
22
+ metadata: {}
23
+ post_install_message:
24
+ rdoc_options: []
25
+ require_paths:
26
+ - lib
27
+ required_ruby_version: !ruby/object:Gem::Requirement
28
+ requirements:
29
+ - - ">="
30
+ - !ruby/object:Gem::Version
31
+ version: '0'
32
+ required_rubygems_version: !ruby/object:Gem::Requirement
33
+ requirements:
34
+ - - ">="
35
+ - !ruby/object:Gem::Version
36
+ version: '0'
37
+ requirements: []
38
+ rubyforge_project:
39
+ rubygems_version: 2.2.2
40
+ signing_key:
41
+ specification_version: 4
42
+ summary: Ruby client library for import.io
43
+ test_files: []