promptcloud_data_api 0.0.1.beta1

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in promptcloud_data_api.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2012 PromptCloud Technologies(http://promptcloud.com)
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,54 @@
1
+ # PromptCloudDataAPI
2
+
3
+ This is PromptCloud's (promptcloud.com) API gem. It can be used to query indexed data from promptcloud.
4
+ NOTE: API query requires a valid userid and password.
5
+
6
+ For any queries related to this gem, contact data-api-gem@promptcloud.com.
7
+
8
+ ## Installation
9
+ Option 1-
10
+ Add this line to your application's Gemfile:
11
+
12
+ gem 'promptcloud_data_api'
13
+
14
+ And then execute:
15
+
16
+ $ bundle
17
+
18
+ Option 2-
19
+ Directly install using:
20
+
21
+ $ gem install promptcloud_data_api
22
+
23
+ ## Usage
24
+
25
+ Access using program:
26
+
27
+ require 'promptcloud_data_api'
28
+ obj=PromptCloudApi.new
29
+ obj.download_files({:user => "<your valid user name>", :pass => "<your valid password>", :timestamp=> <timestamp>[optional], :category=> "<category>"[optional]})
30
+ #above method will put the downloaded files in ~/promptcloud/downloads
31
+ #to override promptcloudhome (~/promptcloud), provide arg- :promptcloudhome=>"complete path of other dir"
32
+ #to override download dir provide arg- :download_dir => "<download dir full path>"
33
+ #to override conf dir provide arg- :apiconf => "<api conf full path>"
34
+
35
+ Access using Command line:
36
+
37
+ get_promptcloud_data -h #will display help
38
+ get_promptcloud_data --user <username> --pass <password> [--category <category>] [--timestamp <timestamp>]
39
+ #above command will put the downloaded files in ~/promptcloud/downloads
40
+ #log file can be viewed at ~/promptcloud/log/*log
41
+ #api config file at ~/promptcloud/configs/config.yml
42
+ #to override the downloaded file use option --download_dir "<apidir full path>"
43
+ #to override config dir use option --apiconf "<apiconf full path>"
44
+
45
+ In command line tool, if option --perform_initial_setup is provided along with other options, then initial setup will be performed (create conf file, download dir)
46
+
47
+ ## Contributing
48
+ In order to contribute to this gem,
49
+
50
+ 1. Fork it
51
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
52
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
53
+ 4. Push to the branch (`git push origin my-new-feature`)
54
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,23 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require File.dirname(File.expand_path(__FILE__)) + '/../lib/promptcloud_data_api.rb'
4
+
5
+ options=PromptCloudApiArgParser.parse(ARGV)
6
+ obj=PromptCloudApi.new(options)
7
+ timer=PromptCloudTimer.new
8
+ if options[:display_info]
9
+ obj.display_info(options)
10
+ elsif options[:perform_initial_setup]
11
+ obj.perform_initial_setup(options)
12
+ else
13
+ if options[:loop]
14
+ loop do
15
+ new_feed_exists=obj.download_files(options)
16
+ if new_feed_exists == false
17
+ timer.wait
18
+ end
19
+ end
20
+ else
21
+ obj.download_files(options)
22
+ end
23
+ end
@@ -0,0 +1,377 @@
1
+ require File.dirname(File.expand_path(__FILE__)) + "/promptcloud_data_api/version"
2
+
3
+
4
+ require 'rexml/document'
5
+ require 'open-uri'
6
+ require 'optparse'
7
+ require 'fileutils'
8
+ require 'restclient'
9
+ require 'yaml'
10
+ require 'digest/md5'
11
+
12
+
13
+
14
+ class PromptCloudApi
15
+ @@promptcloudhome="#{ENV["HOME"]}/promptcloud/"
16
+ attr_accessor :api_downtime
17
+ def initialize(args_hash={})
18
+ super()
19
+
20
+ @download_dir=nil
21
+ @client_id=nil
22
+ perform_initial_setup(args_hash)
23
+ end
24
+
25
+ def display_info(options)
26
+ apiconf="#{@@promptcloudhome}/configs/config.yml"
27
+ if options[:apiconf]
28
+ apiconf = options[:apiconf]
29
+ end
30
+
31
+ if File.file?(apiconf)
32
+ conf_hash=YAML::load_file(apiconf)
33
+ conf_hash.each_pair do |key, val|
34
+ puts "#{key} : #{val}"
35
+ end
36
+ else
37
+ $stderr.puts "Config file #{apiconf} doesn't exist"
38
+ end
39
+ end
40
+ #optional argument options={:promptcloudhome=>..., :apiconf=>...., :queried_timestamp_file=>}
41
+ def perform_initial_setup(options={})
42
+ if options[:promptcloudhome]
43
+ @@promptcloudhome=options[:promptcloudhome]
44
+ end
45
+
46
+ if not File.directory?(@@promptcloudhome)
47
+ FileUtils.mkdir_p(@@promptcloudhome)
48
+ end
49
+
50
+ unless options[:apiconf]
51
+ options[:apiconf]="#{@@promptcloudhome}/configs/config.yml"
52
+ end
53
+ if not File.directory?(File.dirname(options[:apiconf]))
54
+ FileUtils.mkdir_p(File.dirname(options[:apiconf]))
55
+ end
56
+
57
+ if not File.file?(options[:apiconf])
58
+ $stderr.puts "#{$@} : Could not find config file : #{options[:apiconf]}"
59
+ $stderr.puts "Please input your id( for example if you use url http://api.promptcloud.com/data/info?id=demo then your user id is demo
60
+ )"
61
+ client_id=STDIN.gets.chomp.strip
62
+ yml_val={"client_id" => client_id, "download_dir" => File.join(@@promptcloudhome, "downloads")}
63
+ File.open(options[:apiconf], "w") do |file|
64
+ file << yml_val.to_yaml
65
+ end
66
+ end
67
+
68
+ unless options[:log_dir]
69
+ options[:log_dir]="#{@@promptcloudhome}/log"
70
+ end
71
+ if not File.directory?(options[:log_dir])
72
+ FileUtils.mkdir_p(options[:log_dir])
73
+ end
74
+
75
+ unless options[:md5_dir]
76
+ options[:md5_dir]="#{@@promptcloudhome}/md5sums"
77
+ end
78
+ if not File.directory?(options[:md5_dir])
79
+ FileUtils.mkdir_p(options[:md5_dir])
80
+ end
81
+
82
+ unless options[:queried_timestamp_file]
83
+ options[:queried_timestamp_file]="#{@@promptcloudhome}/last_queried_ts"
84
+ end
85
+
86
+ @conf_hash=YAML::load_file(options[:apiconf])
87
+ @client_id=@conf_hash["client_id"]
88
+
89
+ unless @client_id
90
+ $stderr.puts "#{$@} : Could not find client id from config file : #{options[:apiconf]}"
91
+ exit 1
92
+ end
93
+
94
+ @download_dir=@conf_hash["download_dir"]
95
+ unless @download_dir
96
+ @download_dir=File.join(@@promptcloudhome, "downloads")
97
+ end
98
+
99
+ if not File.directory?(@download_dir)
100
+ FileUtils.mkdir_p(@download_dir)
101
+ end
102
+ end
103
+
104
+ def download_files(options)
105
+ if not options[:user] or not options[:pass]
106
+ raise Exception.new("You didn't provide username and password, please provide these as hash:{:user=><userid>, :pass=><password>}")
107
+ end
108
+ new_feed_exists=false
109
+ ts=("%10.9f" % (Time.now).to_f).to_s.gsub(/\./, "").to_i
110
+ fetch_log="#{options[:log_dir]}/fetched_urls-#{ts}.log"
111
+ fetch_log_file=File.open(fetch_log, "w")
112
+ urls_ts_map, url_md5_map=get_file_urls(options)
113
+ if not urls_ts_map
114
+ $stderr.puts "#{$@} : Could not obtain file urls to download."
115
+ new_feed_exists
116
+ end
117
+ if urls_ts_map.keys.empty?
118
+ $stderr.puts "No new files to download"
119
+ return new_feed_exists
120
+ end
121
+
122
+ sorted_ts=urls_ts_map.keys.sort
123
+ sorted_ts.each do |ts|
124
+ urls=urls_ts_map[ts]
125
+ urls.each do |url|
126
+ md5sum=url_md5_map[url]
127
+ filename=File.basename(url)
128
+ md5_filename=filename.gsub(/\.gz/, ".md5sum")
129
+ md5_filepath=options[:md5_dir]+ "/#{md5_filename}"
130
+ if File.file?(md5_filepath) and File.open(md5_filepath).read.chomp.strip==md5sum
131
+ $stderr.puts "Skipping file at url : #{url}, it has been downloaded earlier"
132
+ next
133
+ end
134
+ new_feed_exists=true
135
+
136
+ begin
137
+ $stderr.puts "Fetching : #{url}"
138
+ req=RestClient::Request.new({:method=>"get",:user=>options[:user], :password =>options[:pass], :url =>url})
139
+ outfile=File.join(@download_dir, File.basename(url))
140
+ File.open(outfile, "wb") do |file|
141
+ file.write req.execute
142
+ fetch_log_file << "Fetched: #{url}"
143
+ end
144
+ content=""
145
+ Zlib::GzipReader.open(outfile) {|gz|
146
+ content = gz.read
147
+ }
148
+ downloaded_md5 = Digest::MD5.hexdigest(content)
149
+ if md5sum==downloaded_md5
150
+ File.open(md5_filepath, "w"){|file| file.puts md5sum}
151
+ else
152
+ $stderr.puts "Url : #{url} was not downloaded completely, hence deleting the downloaded file"
153
+ File.delete(outfile)
154
+ end
155
+ rescue Exception => e
156
+ $stderr.puts "#{$@} : Failed to fetch url : #{url}"
157
+ fetch_log_file.puts "#{$@} #{e.class}, #{e.message}"
158
+ fetch_log_file.puts "Failed: #{url}"
159
+ end
160
+ end
161
+ end
162
+
163
+ fetch_log_file.close
164
+ $stderr.puts "Log file : #{fetch_log}"
165
+ $stderr.puts "Downloaded files are available at:#{@download_dir}"
166
+ return new_feed_exists
167
+ end
168
+
169
+ private
170
+ def get_api_url(options)
171
+ promptcloud_api_query="http://api.promptcloud.com/data/info?id=#{@client_id}"
172
+ if options[:bcp]
173
+ promptcloud_api_query="http://bcp.promptcloud.com/data/info?id=#{@client_id}"
174
+ end
175
+
176
+ if options[:timestamp]
177
+ promptcloud_api_query+="&ts=#{options[:timestamp]}"
178
+ File.open(options[:queried_timestamp_file], "a") do |file|
179
+ file << options[:timestamp]
180
+ end
181
+ end
182
+
183
+ if options[:category]
184
+ promptcloud_api_query+="&cat=#{options[:category]}"
185
+ end
186
+ return promptcloud_api_query
187
+ end
188
+
189
+ def handle_api_downtime(options)
190
+ if @api_downtime
191
+ total_downtime=Time.now - @api_downtime
192
+ if total_downtime > 1800
193
+ options[:bcp]=true
194
+ end
195
+ else
196
+ @api_downtime=Time.now
197
+ end
198
+ end
199
+
200
+ def disable_bcp(options)
201
+ if options[:bcp]
202
+ options[:bcp]=nil
203
+ @api_downtime=nil
204
+ end
205
+ end
206
+
207
+ def get_file_urls(options)
208
+ url_ts_map={}
209
+ url_md5_map={}
210
+ begin
211
+ promptcloud_api_query=get_api_url(options)
212
+ api_query_output=""
213
+ RestClient.get(promptcloud_api_query) do |response|
214
+ if response.code!=200
215
+ if options[:bcp]
216
+ $stderr.puts "bcp too is down :(, please mail downtime@promptcloud.com "
217
+ disable_bcp(options)
218
+ else
219
+ if options[:loop]
220
+ $stderr.puts "Could not fetch from promptcloud api server, will try after the api server after the sleep and promptcloud bcp after 30 mins"
221
+ else
222
+ $stderr.puts "Main api server seems to be unreachable, you can try --bcp option"
223
+ end
224
+ handle_api_downtime(options)
225
+ end
226
+ else
227
+ api_query_output=response
228
+ disable_bcp(options) #next fetch will be from promtcloud api
229
+ end
230
+ end
231
+ api_query_output=open(promptcloud_api_query)
232
+ doc=REXML::Document.new(api_query_output)
233
+ REXML::XPath.each(doc, '//entry').each do |entry_node|
234
+ updated_node=REXML::XPath.first(entry_node, './updated')
235
+ updated=updated_node.text.chomp.strip.to_i
236
+ url_node=REXML::XPath.first(entry_node, './url')
237
+ url=url_node.text.chomp.strip
238
+ md5_node=REXML::XPath.first(entry_node, './md5sum')
239
+ md5sum=md5_node.text.chomp.strip
240
+ url_md5_map[url]=md5sum
241
+ if url_ts_map[updated]
242
+ url_ts_map[updated].push(url)
243
+ else
244
+ url_ts_map[updated]=[url]
245
+ end
246
+ end
247
+ #REXML::XPath.each(doc, '//url').each{|node| urls.push(node.text)}
248
+ rescue Exception=>e
249
+ $stderr.puts "#{$@} : Api query failed:#{e.class}, #{e.message}"
250
+ return nil, nil
251
+ end
252
+ return url_ts_map, url_md5_map
253
+ end
254
+ end
255
+
256
+ class PromptCloudTimer
257
+ def initialize(args_hash={})
258
+ super()
259
+ if args_hash[:min]
260
+ @min=args_hash[:min]
261
+ else
262
+ @min=10
263
+ end
264
+
265
+ if args_hash[:max]
266
+ @max=args_hash[:max]
267
+ else
268
+ @max=300
269
+ end
270
+ @sleep_interval=@min
271
+ end
272
+
273
+ def wait
274
+ $stderr.puts "Going to sleep for #{@sleep_interval} seconds"
275
+ sleep(@sleep_interval)
276
+ @sleep_interval *=2
277
+ if @sleep_interval > 300
278
+ @sleep_interval=10
279
+ end
280
+ end
281
+ end
282
+
283
+ class PromptCloudApiArgParser
284
+ def initialize()
285
+ super
286
+ end
287
+
288
+ def self.validate(options,mandatory)
289
+ if not options[:perform_initial_setup] and not options[:display_info] and (not options[:user] or not options[:pass])
290
+ $stderr.puts "#{$@} : Please provide options perform_initial_setup/display_info or provide user and password for any other query"
291
+ return false
292
+ end
293
+ return true
294
+ end
295
+
296
+ def self.usage_notes
297
+ script_name=$0
298
+ $stderr.puts <<END
299
+ Example :
300
+ END
301
+ end
302
+
303
+
304
+ def self.parse(args,defaults={},mandatory=[])
305
+ options= {}
306
+ options= options.merge(defaults)
307
+ opts=OptionParser.new do |opts|
308
+ opts.banner = "Usage: #{$0} [options] "
309
+
310
+
311
+ opts.on("--apiconf APICONFPATH",String, "override the config file location, the file which stores information like client_id, downloadir, previous timestamp file") do |v|
312
+ options[:apiconf] = v
313
+ end
314
+
315
+ opts.on("--download_dir DOWNLOADDIR",String, "to override the download dir obtained from apiconf file") do |v|
316
+ options[:download_dir] = v
317
+ end
318
+
319
+ opts.on("--promptcloudhome PROMPTCLOUDHOME",String, "to override the promptcloudhome dir:~/promptcloud") do |v|
320
+ options[:promptcloudhome] = v
321
+ end
322
+
323
+ opts.on("--perform_initial_setup", "Perform initial setup") do |v|
324
+ options[:perform_initial_setup] = v
325
+ end
326
+ opts.on("--display_info", "Display veraiou info ") do |v|
327
+ options[:display_info] = v
328
+ end
329
+
330
+ opts.on("--timestamp TIMESTAMP",Integer, "query promptcloudapi for files newer than or equal to given timestamp") do |v|
331
+ options[:timestamp] = v
332
+ end
333
+
334
+ opts.on("--queried_timestamp_file queriedTIMESTAMPFILE",String, "override default queried_timestamp_file: file that stores last queried timestamp") do |v|
335
+ options[:queried_timestamp_file] = v
336
+ end
337
+
338
+ opts.on("--category CATEGORY ",String, "query promptcloudapi for files of given category. eg: if files of different verticals are placed in different directory under client's parent directory, then files of specific directory can be obtained by specifying that directory name in category option") do |v|
339
+ options[:category] = v
340
+ end
341
+
342
+ opts.on("--user USER",String, "Data api user id") do |v|
343
+ options[:user] = v
344
+ end
345
+
346
+ opts.on("--pass PASSWORD",String, "Data api password") do |v|
347
+ options[:pass] = v
348
+ end
349
+
350
+ opts.on("--loop", "download new data files and keep looking for new one. i.e it doesn't exit, if no new feed is found it will sleep. minimun sleep time is 10 secs and max sleep time is 300 secs") do |v|
351
+ options[:loop] = v
352
+ end
353
+
354
+ opts.on("--noloop", "Download new data files and and exit, this is the default behaviour") do |v|
355
+ options[:noloop] = v
356
+ end
357
+
358
+ opts.on("--bcp", "use bcp.promptcloud.com instead of api.promptcloud.com") do |v|
359
+ options[:bcp] = v
360
+ end
361
+
362
+ opts.on_tail("-h", "--help", "Show this message") do
363
+ puts opts
364
+ usage_notes
365
+ exit(-1)
366
+ end
367
+ end
368
+
369
+ opts.parse!(args)
370
+ if validate(options,mandatory)
371
+ return options
372
+ else
373
+ $stderr.puts "#{$@} Invalid/no args, see use -h command for help"
374
+ exit(-1)
375
+ end
376
+ end
377
+ end
@@ -0,0 +1,3 @@
1
+ module PromptcloudDataApi
2
+ VERSION = "0.0.1.beta1"
3
+ end
@@ -0,0 +1,19 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'promptcloud_data_api/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "promptcloud_data_api"
8
+ gem.version = PromptcloudDataApi::VERSION
9
+ gem.authors = ["PromptCloud"]
10
+ gem.email = ["rubygems@promptcloud.com"]
11
+ gem.description = %q{This gem can be used to download data from Promptcloud data API. You need to be PromptCloud client to get the data data though :)}
12
+ gem.summary = %q{use it to query promptcloud indexed data}
13
+ gem.homepage = "http://promptcloud.com"
14
+ gem.files = `git ls-files`.split($/)
15
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
16
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
17
+ gem.require_paths = ["lib"]
18
+ gem.add_dependency "rest-client"
19
+ end
metadata ADDED
@@ -0,0 +1,71 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: promptcloud_data_api
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1.beta1
5
+ prerelease: 6
6
+ platform: ruby
7
+ authors:
8
+ - PromptCloud
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2012-10-23 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rest-client
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: '0'
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ! '>='
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ description: This gem can be used to download data from Promptcloud data API. You
31
+ need to be PromptCloud client to get the data data though :)
32
+ email:
33
+ - rubygems@promptcloud.com
34
+ executables:
35
+ - get_promptcloud_data
36
+ extensions: []
37
+ extra_rdoc_files: []
38
+ files:
39
+ - Gemfile
40
+ - LICENSE.txt
41
+ - README.md
42
+ - Rakefile
43
+ - bin/get_promptcloud_data
44
+ - lib/promptcloud_data_api.rb
45
+ - lib/promptcloud_data_api/version.rb
46
+ - promptcloud_data_api.gemspec
47
+ homepage: http://promptcloud.com
48
+ licenses: []
49
+ post_install_message:
50
+ rdoc_options: []
51
+ require_paths:
52
+ - lib
53
+ required_ruby_version: !ruby/object:Gem::Requirement
54
+ none: false
55
+ requirements:
56
+ - - ! '>='
57
+ - !ruby/object:Gem::Version
58
+ version: '0'
59
+ required_rubygems_version: !ruby/object:Gem::Requirement
60
+ none: false
61
+ requirements:
62
+ - - ! '>'
63
+ - !ruby/object:Gem::Version
64
+ version: 1.3.1
65
+ requirements: []
66
+ rubyforge_project:
67
+ rubygems_version: 1.8.24
68
+ signing_key:
69
+ specification_version: 3
70
+ summary: use it to query promptcloud indexed data
71
+ test_files: []