shopikon_import_io_connector 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: ec3d798b75de4022baa5c204727f0ac5f1c9b2d0
4
- data.tar.gz: 9fcdcc841fed877ae3ce29bb9958a886a82da7f9
3
+ metadata.gz: e0f199117ee26114e7ad364e888576b260137c41
4
+ data.tar.gz: 8bebb3fd1ecfb5014f78e7fc218b3e5b7854215d
5
5
  SHA512:
6
- metadata.gz: 56e06bd2f9f1e6ea8e9e136757b0b0b47ceb86998472597d9276dc8705aa094cfaa72d5a3f1029198f5c6ade4dd69ac060ee00aadf77b422bc3779642d770218
7
- data.tar.gz: 18d93e043f432fea5a9163a5db510d32f51120c5b76a680c0c267016e5c1b050ec35a1725c153c6524aaa00d202de5f612f309b0837c164dee5552b239e1fe73
6
+ metadata.gz: 08453c4eff290d4f6b4839f62bdd752e430ee92454646bd29ef012e37bf000504a50c27058c0b075d092b87680c380aa3854ae5f0d1490e32c911b5ee4ca27a5
7
+ data.tar.gz: 958d10249739d694fbd0b9ea79be170df112d0e5b95b2f8ab5243faaf5df91f6ddf7f97d535e61fb181a6a2d4874e85ae7a4e75b6dd7c54f4aa06ccd83440e4e
@@ -0,0 +1,20 @@
1
+ class CurlRequestBuilder
2
+ def initialize(options = {})
3
+ @request = Curl::Easy.new(options[:url]) do |curl|
4
+ curl.headers["User-Agent"] = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36"
5
+ curl.enable_cookies = true
6
+ curl.follow_location = true
7
+ curl.max_redirects = 3
8
+ curl.encoding = ""
9
+ curl.autoreferer = true
10
+ curl.connect_timeout = 5
11
+ curl.timeout = 120
12
+ curl.ssl_verify_host = 0
13
+ curl.ssl_verify_host = false
14
+ end
15
+ end
16
+
17
+ def request
18
+ @request
19
+ end
20
+ end
@@ -0,0 +1,140 @@
1
+ #
2
+ # import.io client library - client classes
3
+ #
4
+ # This file contains the main classes required to connect to and query import.io APIs
5
+ #
6
+ # Dependencies: Ruby 1.9, http-cookie
7
+ #
8
+ # @author: dev@import.io
9
+ # @source: https://github.com/import-io/importio-client-libs/tree/master/python
10
+ #
11
+
12
+ require "net/http"
13
+ require "uri"
14
+ require "thread"
15
+ require "http-cookie"
16
+ require "cgi"
17
+ require "json"
18
+ require "securerandom"
19
+
20
+ class ImportIO
21
+ # The main import.io client, used for managing the message channel and sending queries and receiving data
22
+
23
+ def initialize(user_id=nil, api_key=nil, host="https://query.import.io")
24
+ # Initialises the client library with its configuration
25
+ @host = host
26
+ @proxy_host = nil
27
+ @proxy_port = nil
28
+ @user_id = user_id
29
+ @api_key = api_key
30
+ @username = nil
31
+ @password = nil
32
+ @login_host = nil
33
+ @session = nil
34
+ @queue = Queue.new
35
+ end
36
+
37
+ # We use this only for a specific test case
38
+ attr_reader :session
39
+
40
+ def proxy(host, port)
41
+ # If you want to configure an HTTP proxy, use this method to do so
42
+ @proxy_host = host
43
+ @proxy_port = port
44
+ end
45
+
46
+ def login(username, password, host="https://api.import.io")
47
+ # If you want to use cookie-based authentication, this method will log you in with a username and password to get a session
48
+ @username = username
49
+ @password = password
50
+ @login_host = host
51
+
52
+ # If we don't have a session, then connect one
53
+ if @session == nil
54
+ connect()
55
+ end
56
+
57
+ # Once connected, do the login
58
+ @session.login(@username, @password, @login_host)
59
+ end
60
+
61
+ def reconnect
62
+ # Reconnects the client to the platform by establishing a new session
63
+
64
+ # Disconnect an old session, if there is one
65
+ if @session != nil
66
+ disconnect()
67
+ end
68
+
69
+ if @username != nil
70
+ login(@username, @password, @login_host)
71
+ else
72
+ connect()
73
+ end
74
+ end
75
+
76
+ def connect
77
+ # Connect this client to the import.io server if not already connected
78
+
79
+ # Check if there is a session already first
80
+ if @session != nil
81
+ return
82
+ end
83
+
84
+ @session = ImportIoSession::new(self, @host, @user_id, @api_key, @proxy_host, @proxy_port)
85
+ @session.connect()
86
+
87
+ # This should be a @queue.clone, but this errors in 2.1 branch of Ruby: #9718
88
+ # q = @queue.clone
89
+ q = Queue.new
90
+ until @queue.empty?
91
+ q.push(@queue.pop(true))
92
+ end
93
+ @queue = Queue.new
94
+
95
+ until q.empty?
96
+ query_data = q.pop(true) rescue nil
97
+ if query_data
98
+ query(query_data.query, query_data.callback)
99
+ end
100
+ end
101
+ end
102
+
103
+ def disconnect
104
+ # Call this method to ask the client library to disconnect from the import.io server
105
+ # It is best practice to disconnect when you are finished with querying, so as to clean
106
+ # up resources on both the client and server
107
+
108
+ if @session != nil
109
+ @session.disconnect()
110
+ @session = nil
111
+ end
112
+ end
113
+
114
+ def stop
115
+ # This method stops all of the threads that are currently running in the session
116
+ if @session != nil
117
+ return @session.stop()
118
+ end
119
+ end
120
+
121
+ def join
122
+ # This method joins the threads that are running together in the session, so we can wait for them to be finished
123
+ if @session != nil
124
+ return @session.join()
125
+ end
126
+ end
127
+
128
+ def query(query, callback)
129
+ # This method takes an import.io Query object and either queues it, or issues it to the server
130
+ # depending on whether the session is connected
131
+
132
+ if @session == nil || !@session.connected
133
+ @queue << {"query"=>query,"callback"=>callback}
134
+ return
135
+ end
136
+
137
+ @session.query(query, callback)
138
+ end
139
+
140
+ end
@@ -0,0 +1,61 @@
1
+ class ImportIoQuery
2
+ # This class represents a single query to the import.io platform
3
+
4
+ def initialize(callback, query)
5
+ # Initialises the new query object with inputs and default state
6
+ @query = query
7
+ @jobs_spawned = 0
8
+ @jobs_started = 0
9
+ @jobs_completed = 0
10
+ @_finished = false
11
+ @_callback = callback
12
+ end
13
+
14
+ def _on_message(data)
15
+ # Method that is called when a new message is received
16
+ #
17
+ # Check the type of the message to see what we are working with
18
+ msg_type = data["type"]
19
+ if msg_type == "SPAWN"
20
+ # A spawn message means that a new job is being initialised on the server
21
+ if @jobs_spawned.present?
22
+ @jobs_spawned += 1
23
+ else
24
+ @jobs_spawned = 1
25
+ end
26
+ elsif msg_type == "INIT" or msg_type == "START"
27
+ # Init and start indicate that a page of work has been started on the server
28
+ if @jobs_started.present?
29
+ @jobs_started += 1
30
+ else
31
+ @jobs_started = 1
32
+ end
33
+ elsif msg_type == "STOP"
34
+ # Stop indicates that a job has finished on the server
35
+ if @jobs_completed.present?
36
+ @jobs_completed += 1
37
+ else
38
+ @jobs_completed = 1
39
+ end
40
+ end
41
+
42
+ # Update the finished state
43
+ # The query is finished if we have started some jobs, we have finished as many as we started, and we have started as many as we have spawned
44
+ # There is a +1 on jobs_spawned because there is an initial spawn to cover initialising all of the jobs for the query
45
+ @_finished = (@jobs_started == @jobs_completed and @jobs_spawned + 1 == @jobs_started and @jobs_started > 0)
46
+
47
+ # These error conditions mean the query has been terminated on the server
48
+ # It either errored on the import.io end, the user was not logged in, or the query was cancelled on the server
49
+ if msg_type == "ERROR" or msg_type == "UNAUTH" or msg_type == "CANCEL"
50
+ @_finished = true
51
+ end
52
+
53
+ # Now we have processed the query state, we can return the data from the message back to listeners
54
+ @_callback.call(self, data)
55
+ end
56
+
57
+ def finished
58
+ # Returns boolean - true if the query has been completed or terminated
59
+ return @_finished
60
+ end
61
+ end
@@ -0,0 +1,327 @@
1
+ class ImportIoSession
2
+ # Session manager, used for managing the message channel, sending queries and receiving data
3
+
4
+ def initialize(io, host="https://query.import.io", user_id=nil, api_key=nil, proxy_host=nil, proxy_port=nil)
5
+ # Initialises the client library with its configuration
6
+ @io = io
7
+ @msg_id = 1
8
+ @client_id = nil
9
+ @url = "#{host}/query/comet/"
10
+ @messaging_channel = "/messaging"
11
+ @queries = Hash.new
12
+ @user_id = user_id
13
+ @api_key = api_key
14
+ @queue = Queue.new
15
+ @connected = false
16
+ @connecting = false
17
+ @disconnecting = false
18
+ @polling = false
19
+ # These variables serve to identify this client and its version to the server
20
+ @clientName = "import.io Ruby client"
21
+ @clientVersion = "2.0.0"
22
+ @cj = HTTP::CookieJar.new
23
+ @proxy_host = proxy_host
24
+ @proxy_port = proxy_port
25
+ end
26
+
27
+ # We use this only for a specific test case
28
+ attr_reader :client_id
29
+ attr_writer :client_id
30
+ attr_reader :connected
31
+
32
+ def make_request(url, data)
33
+ # Helper method that generates a request object
34
+ uri = URI(url)
35
+ request = Net::HTTP::Post.new(uri.request_uri)
36
+ request.body = data
37
+ http = Net::HTTP.new(uri.host, uri.port, @proxy_host, @proxy_port)
38
+ http.use_ssl = uri.scheme == "https"
39
+ return uri, http, request
40
+ end
41
+
42
+ def open(uri, http, request)
43
+ # Makes a network request
44
+ response = http.request(request)
45
+ cookies = response.get_fields("set-cookie")
46
+ if cookies != nil
47
+ cookies.each { |value|
48
+ @cj.parse(value, uri)
49
+ }
50
+ end
51
+ return response
52
+ end
53
+
54
+ def encode(dict)
55
+ # Encodes a dictionary to x-www-form format
56
+ dict.map{|k,v| "#{CGI.escape(k)}=#{CGI.escape(v)}"}.join("&")
57
+ end
58
+
59
+ def login(username, password, host="https://api.import.io")
60
+ # If you want to use cookie-based authentication, this method will log you in with a username and password to get a session
61
+ data = encode({'username' => username, 'password'=> password})
62
+ uri, http, req = make_request("#{host}/auth/login", data )
63
+ r = open(uri, http, req)
64
+
65
+ if r.code != "200"
66
+ raise "Could not log in, code #{r.code}"
67
+ end
68
+ end
69
+
70
+ def request(channel, path="", data={}, throw=true)
71
+ # Helper method that makes a generic request on the messaging channel
72
+
73
+ # These are CometD configuration values that are common to all requests we need to send
74
+ data["channel"] = channel
75
+ data["connectionType"] = "long-polling"
76
+
77
+ # We need to increment the message ID with each request that we send
78
+ data["id"] = @msg_id
79
+ @msg_id += 1
80
+
81
+ # If we have a client ID, then we need to send that (will be provided on handshake)
82
+ if @client_id != nil
83
+ data["clientId"] = @client_id
84
+ end
85
+
86
+ # Build the URL that we are going to request
87
+ url = "#{@url}#{path}"
88
+
89
+ # If the user has chosen API key authentication, we need to send the API key with each request
90
+ if @api_key != nil
91
+ q = encode({ "_user" => @user_id, "_apikey" => @api_key })
92
+ url = "#{url}?#{q}"
93
+ end
94
+
95
+ # Build the request object we are going to use to initialise the request
96
+ body = JSON.dump([data])
97
+ uri, http, request = make_request(url, body)
98
+ request.content_type = "application/json;charset=UTF-8"
99
+ request["Cookie"] = HTTP::Cookie.cookie_value(@cj.cookies(uri))
100
+ request["import-io-client"] = @clientName
101
+ request["import-io-client-version"] = @clientVersion
102
+
103
+ # Send the request itself
104
+ response = open(uri, http, request)
105
+
106
+ # Don't process the response if we've disconnected in the meantime
107
+ if !@connected and !@connecting
108
+ return
109
+ end
110
+
111
+ # If the server responds non-200 we have a serious issue (configuration wrong or server down)
112
+ if response.code != "200"
113
+ error_message = "Unable to connect to import.io, status #{response.code} for url #{url}"
114
+ if throw
115
+ raise error_message
116
+ else
117
+ puts error_message
118
+ end
119
+ end
120
+
121
+ response.body = JSON.parse(response.body)
122
+
123
+ # Iterate through each of the messages in the response content
124
+ for msg in response.body do
125
+ # If the message is not successful, i.e. an import.io server error has occurred, decide what action to take
126
+ if msg.has_key?("successful") and msg["successful"] != true
127
+ error_message = "Unsuccessful request: #{msg}"
128
+ if !@disconnecting and @connected and !@connecting
129
+ # If we get a 402 unknown client we need to reconnect
130
+ if msg["error"] == "402::Unknown client"
131
+ puts "402 received, reconnecting"
132
+ @io.reconnect()
133
+ elsif throw
134
+ raise error_message
135
+ else
136
+ puts error_message
137
+ end
138
+ else
139
+ next
140
+ end
141
+ end
142
+
143
+ # Ignore messages that come back on a CometD channel that we have not subscribed to
144
+ if msg["channel"] != @messaging_channel
145
+ next
146
+ end
147
+
148
+ # Now we have a valid message on the right channel, queue it up to be processed
149
+ @queue.push(msg["data"])
150
+ end
151
+
152
+ return response
153
+ end
154
+
155
+ def handshake
156
+ # This method uses the request helper to make a CometD handshake request to register the client on the server
157
+ handshake = request("/meta/handshake", path="handshake", data={"version"=>"1.0","minimumVersion"=>"0.9","supportedConnectionTypes"=>["long-polling"],"advice"=>{"timeout"=>60000,"interval"=>0}})
158
+
159
+ if handshake == nil
160
+ return
161
+ end
162
+
163
+ # Set the Client ID from the handshake's response
164
+ @client_id = handshake.body[0]["clientId"]
165
+ end
166
+
167
+ def subscribe(channel)
168
+ # This method uses the request helper to issue a CometD subscription request for this client on the server
169
+ return request("/meta/subscribe", "", {"subscription"=>channel})
170
+ end
171
+
172
+ def connect
173
+ # Connect this client to the import.io server if not already connected
174
+ # Don't connect again if we're already connected
175
+ if @connected || @connecting
176
+ return
177
+ end
178
+
179
+ @connecting = true
180
+
181
+ # Do the hanshake request to register the client on the server
182
+ handshake
183
+
184
+ # Register this client with a subscription to our chosen message channel
185
+ subscribe(@messaging_channel)
186
+
187
+ # Now we are subscribed, we can set the client as connected
188
+ @connected = true
189
+
190
+ # Ruby's HTTP requests are synchronous - so that user apps can run while we are waiting for long connections
191
+ # from the import.io server, we need to pass the long-polling connection off to a thread so it doesn't block
192
+ # anything else
193
+ @threads = []
194
+ @threads << Thread.new(self) { |context|
195
+ context.poll
196
+ }
197
+
198
+ # Similarly with the polling, we need to handle queued messages in a separate thread too
199
+ @threads << Thread.new(self) { |context|
200
+ context.poll_queue
201
+ }
202
+
203
+ @connecting = false
204
+ end
205
+
206
+ def disconnect
207
+ # Call this method to ask the client library to disconnect from the import.io server
208
+ # It is best practice to disconnect when you are finished with querying, so as to clean
209
+ # up resources on both the client and server
210
+
211
+ # Maintain a local value of the queries, and then erase them from the class
212
+ q = @queries.clone
213
+ @queries = Hash.new
214
+
215
+ # Set the flag to notify handlers that we are disconnecting, i.e. open connect calls will fail
216
+ @disconnecting = true
217
+
218
+ # Set the connection status flag in the library to prevent any other requests going out
219
+ @connected = false
220
+
221
+ # Make the disconnect request to the server
222
+ request("/meta/disconnect");
223
+
224
+ # Now we are disconnected we need to remove the client ID
225
+ @client_id = nil
226
+
227
+ # We are done disconnecting so reset the flag
228
+ @disconnecting = false
229
+
230
+ # Send a "disconnected" message to all of the current queries, and then remove them
231
+ q.each { |key, query|
232
+ query._on_message({"type"=>"DISCONNECT","requestId"=>key})
233
+ }
234
+ end
235
+
236
+ def stop
237
+ # This method stops all of the threads that are currently running
238
+ @threads.each { |thread|
239
+ thread.terminate
240
+ }
241
+ end
242
+
243
+ def join
244
+ # This method joins the threads that are running together, so we can wait for them to be finished
245
+ while @connected
246
+ if @queries.length == 0
247
+ # When there are no more queries, stop all the threads
248
+ stop()
249
+ return
250
+ end
251
+ sleep 1
252
+ end
253
+ end
254
+
255
+ def poll_queue
256
+ # This method is called in a new thread to poll the queue of messages returned from the server
257
+ # and process them
258
+
259
+ # This while will mean the thread keeps going until the client library is disconnected
260
+ while @connected
261
+ begin
262
+ # Attempt to process the last message on the queue
263
+ process_message @queue.pop
264
+ rescue => exception
265
+ puts exception.backtrace
266
+ end
267
+ end
268
+ end
269
+
270
+ def poll
271
+ # This method is called in a new thread to open long-polling HTTP connections to the import.io
272
+ # CometD server so that we can wait for any messages that the server needs to send to us
273
+
274
+ if @polling
275
+ return
276
+ end
277
+
278
+ @polling = true
279
+
280
+ # While loop means we keep making connections until manually disconnected
281
+ while @connected
282
+ # Use the request helper to make the connect call to the CometD endpoint
283
+ request("/meta/connect", "connect", {}, false)
284
+ end
285
+
286
+ @polling = false
287
+ end
288
+
289
+ def process_message(data)
290
+ # This method is called by the queue poller to handle messages that are received from the import.io
291
+ # CometD server
292
+ begin
293
+ # First we need to look up which query object the message corresponds to, based on its request ID
294
+ request_id = data["requestId"]
295
+ query = @queries[request_id]
296
+
297
+ # If we don't recognise the client ID, then do not process the message
298
+ if query == nil
299
+ puts "No open query #{query}:"
300
+ puts JSON.pretty_generate(data)
301
+ return
302
+ end
303
+
304
+ # Call the message callback on the query object with the data
305
+ query._on_message(data)
306
+
307
+ # Clean up the query map if the query itself is finished
308
+ if query.finished
309
+ @queries.delete(request_id)
310
+ end
311
+ end
312
+ end
313
+
314
+ def query(query, callback)
315
+ # This method takes an import.io Query object and issues it to the server, calling the callback
316
+ # whenever a relevant message is received
317
+
318
+ # Set the request ID to a random GUID
319
+ # This allows us to track which messages correspond to which query
320
+ query["requestId"] = SecureRandom.uuid
321
+ # Construct a new query state tracker and store it in our map of currently running queries
322
+ @queries[query["requestId"]] = ImportIoQuery.new(callback, query)
323
+ # Issue the query to the server
324
+ request("/service/query", "", { "data"=>query })
325
+ end
326
+
327
+ end
@@ -1,3 +1,3 @@
1
1
  module ShopikonImportIoConnector
2
- VERSION = "0.0.2"
2
+ VERSION = "0.0.3"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: shopikon_import_io_connector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Paweł Sobolewski
@@ -65,6 +65,10 @@ files:
65
65
  - README.md
66
66
  - Rakefile
67
67
  - lib/shopikon_import_io_connector.rb
68
+ - lib/shopikon_import_io_connector/curl_request_builder.rb
69
+ - lib/shopikon_import_io_connector/import_io.rb
70
+ - lib/shopikon_import_io_connector/import_io_query.rb
71
+ - lib/shopikon_import_io_connector/import_io_session.rb
68
72
  - lib/shopikon_import_io_connector/version.rb
69
73
  - shopikon_import_io_connector.gemspec
70
74
  homepage: http://www.shopikon.com