promptcloud_data_api 0.0.1.beta3 → 0.0.2.beta1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +109 -32
- data/bin/get_promptcloud_data +9 -11
- data/lib/promptcloud_data_api.rb +220 -183
- data/lib/promptcloud_data_api/version.rb +1 -1
- data/promptcloud_data_api.gemspec +1 -1
- metadata +5 -4
data/README.md
CHANGED
@@ -1,57 +1,134 @@
|
|
1
|
-
#
|
1
|
+
#PromptCloudDataAPI
|
2
2
|
|
3
|
-
This is PromptCloud's
|
3
|
+
This is [PromptCloud's](http://promptcloud.com) data API gem. It can be used to fetch the client specific data from PromptCloud data API. Available API versions are v1 and v2.
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
## Installation
|
10
|
-
Option 1-
|
11
|
-
Add this line to your application's Gemfile:
|
5
|
+
##Installation
|
6
|
+
####Option 1 -
|
7
|
+
1. Add below line to your application's Gemfile.
|
12
8
|
|
13
9
|
gem 'promptcloud_data_api'
|
14
10
|
|
15
|
-
|
11
|
+
2. Now execute below command.
|
16
12
|
|
17
13
|
$ bundle
|
18
14
|
|
19
|
-
Option 2-
|
20
|
-
Directly install using
|
15
|
+
####Option 2 -
|
16
|
+
Directly install using gem. Command to run -
|
21
17
|
|
22
18
|
$ gem install promptcloud_data_api
|
23
19
|
|
24
|
-
## Usage
|
25
|
-
|
26
|
-
|
20
|
+
## Usage: ./get_promptcloud_data [options]
|
21
|
+
|
22
|
+
-v, --api_version VERSION to get data from different api version(available versions are v1 and v2, the defalut version is v1)
|
23
|
+
-u, --user USER data api user id(provided by PromptCloud)
|
24
|
+
-p, --pass PASSWORD data api password(provised by PromptCloud, used for api v1)
|
25
|
+
-k, --client_auth_key AUTHKEY data api client auth key(provided by PromptCloud, used for api v2)
|
26
|
+
-i, --perform_initial_setup to perform initial setup
|
27
|
+
--display_info to display config info
|
28
|
+
--apiconf APICONFPATH to override the config file path(config file stores information like client_id, password, client_auth_key, downloadir etc)
|
29
|
+
--download_dir DOWNLOAD_DIRECTORY
|
30
|
+
to override the download directory(which contains downloaded data files)
|
31
|
+
--promptcloudhome PROMPTCLOUDHOME
|
32
|
+
to override the promptcloudhome dir(~/promptcloud)
|
33
|
+
-t, --timestamp TIMESTAMP to query promptcloud api for files newer than or equal to given timestamp
|
34
|
+
--days DAYS to download the data of last few days
|
35
|
+
--hours DAYS to download the data of last few hours
|
36
|
+
--minutes MINUTES to download the data last few minutes
|
37
|
+
--queried_timestamp_file queried TIMESTAMPFILE
|
38
|
+
to override the last timestamp file(contains last queried timestamp)
|
39
|
+
--category CATEGORY to query promptcloud api for files of the given category(if files of different verticals are placed in different directory under client's parent directory, then files of specific directory can be obtained by specifying that directory name in category option)
|
40
|
+
--site SITE_NAME to query promptcloud api for files of the given site
|
41
|
+
--loop download new data files and keep looking for new one(i.e it doesn't exit, if no new feed is found it will sleep, minimun sleep time is 10 secs and max sleep time is 300 secs)
|
42
|
+
--noloop download new data files and and exit, this is the default behaviour
|
43
|
+
--bcp to download data from PromptCloud backup server(high availability server, should use if main data api server is unreachable)
|
44
|
+
-h, --help Show this message
|
45
|
+
|
46
|
+
Example :
|
47
|
+
# Initial setup(default config)
|
48
|
+
./get_promptcloud_data --perform_initial_setup --user <username> --pass <password> # API v1 requires valid userid and password
|
49
|
+
./get_promptcloud_data --api_version v2 --perform_initial_setup --user <username> --client_auth_kay <auth key> # API v2 requires valid user id and authentication key
|
50
|
+
|
51
|
+
# Download data
|
52
|
+
./get_promptcloud_data # to download data of last 2 days (default)
|
53
|
+
./get_promptcloud_data --timestamp <timestamp> # to use of timestamp param
|
54
|
+
./get_promptcloud_data --site <test_site> --days 7 # to download data of the site test_site uploaded in last 7 days
|
55
|
+
./get_promptcloud_data --category blog --hours 10 # to download data of the category blog uploaded in last 10 hours
|
56
|
+
./get_promptcloud_data --minutes 20 # to download data uploaded in last 20 minutes
|
57
|
+
./get_promptcloud_data --bcp # to download data from bcp(PromptCloud backup server)
|
58
|
+
./get_promptcloud_data --loop # to download data continuously, it will automatically check our API for new data
|
59
|
+
|
60
|
+
# To use own config
|
61
|
+
./get_promptcloud_data --apiconf <apiconf file pull path> # to override apiconf file
|
62
|
+
./get_promptcloud_data --download_dir <download directory full path> # to override download directory
|
63
|
+
./get_promptcloud_data --promptcloudhome <promptcloudhome full path> # to override promptcloudhome home
|
64
|
+
|
65
|
+
|
66
|
+
####Note
|
67
|
+
|
68
|
+
* API v1 requires valid userid and password.
|
69
|
+
* API v2 requires userid and authentication key.
|
70
|
+
* PromptCloud provides userid and password/authentication key to the client.
|
71
|
+
* If option --perform_initial_setup is provided along with other options, then initial setup will be performed(create conf file, download dir).
|
72
|
+
* If we do not pass any of --timestamp, --days, --hours and --minutes, then past 2 days data will be downloaded(default setting).
|
73
|
+
|
74
|
+
For queries related to this gem please contact the folks at promptcloud or open a github issue.
|
75
|
+
|
76
|
+
#### API Help Links
|
77
|
+
API v1 - [https://api.promptcloud.com/data/info?type=help](https://api.promptcloud.com/data/info?type=help)
|
78
|
+
|
79
|
+
API v2 - [https://api.promptcloud.com/v2/data/info?type=help](https://api.promptcloud.com/data/info?type=help)
|
80
|
+
|
81
|
+
#### Access using program
|
27
82
|
|
28
83
|
require 'promptcloud_data_api'
|
29
|
-
obj=PromptCloudApi.new
|
30
|
-
obj.download_files({:user => "<your valid user name>", :pass => "<your valid password>", :timestamp=> <timestamp>[optional], :category=> "<category>"[optional]})
|
31
84
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
85
|
+
For API v1 -
|
86
|
+
|
87
|
+
obj = PromptCloudApi.new({--perform_initial_setup, :user => "your valid user name", :pass => "your valid password"})
|
88
|
+
|
89
|
+
For API v2 -
|
90
|
+
|
91
|
+
obj = PromptCloudApi.new({--perform_initial_setup, :user => "your valid user name", :client_auth_key => "your valid auth key"})
|
92
|
+
|
93
|
+
To download data files(By default it will download the data files which are uploaded in last 2 days) -
|
94
|
+
|
95
|
+
obj.download_files
|
96
|
+
|
97
|
+
To download data files with custom settings, we have to pass an options hash. Example -
|
98
|
+
|
99
|
+
options = {}
|
100
|
+
options[:site] = "test_site"
|
101
|
+
options[:timestamp] = "timestamp"
|
102
|
+
obj.download_files(options)
|
103
|
+
|
104
|
+
Other available options are -
|
105
|
+
|
106
|
+
days
|
107
|
+
hours
|
108
|
+
minutes
|
109
|
+
category
|
110
|
+
bcp
|
111
|
+
loop
|
112
|
+
|
113
|
+
#### Access using command line
|
36
114
|
|
37
|
-
|
115
|
+
For API v1 -
|
38
116
|
|
39
|
-
get_promptcloud_data
|
40
|
-
|
117
|
+
get_promptcloud_data --perform_initial_setup --user "username" --pass "password"
|
118
|
+
|
119
|
+
get_promptcloud_data [--category "category"] [--timestamp "timestamp"]
|
41
120
|
|
42
|
-
|
43
|
-
* Log file can be viewed at ~/promptcloud/log/*log
|
44
|
-
* Api config file at ~/promptcloud/configs/config.yml
|
45
|
-
* To override the downloaded file use option --download_dir "<apidir full path>"
|
46
|
-
* To override config dir use option --apiconf "<apiconf full path>"
|
121
|
+
For API v2 -
|
47
122
|
|
48
|
-
|
123
|
+
get_promptcloud_data --api_version v2 --perform_initial_setup --user "username" --client_auth_kay "auth key"
|
124
|
+
|
125
|
+
get_promptcloud_data --api_version v2 [--category "category"] [--timestamp "timestamp"] # API v2
|
49
126
|
|
50
127
|
## Contributing
|
51
|
-
In order to contribute to this gem
|
128
|
+
In order to contribute to this gem -
|
52
129
|
|
53
130
|
1. Fork it
|
54
131
|
2. Create your feature branch (`git checkout -b my-new-feature`)
|
55
132
|
3. Commit your changes (`git commit -am 'Add some feature'`)
|
56
133
|
4. Push to the branch (`git push origin my-new-feature`)
|
57
|
-
5. Create new
|
134
|
+
5. Create new pull request
|
data/bin/get_promptcloud_data
CHANGED
@@ -2,22 +2,20 @@
|
|
2
2
|
|
3
3
|
require File.dirname(File.expand_path(__FILE__)) + '/../lib/promptcloud_data_api.rb'
|
4
4
|
|
5
|
-
|
6
|
-
|
7
|
-
|
5
|
+
timer = PromptCloudTimer.new
|
6
|
+
options = PromptCloudApiArgParser.parse(ARGV)
|
7
|
+
api_obj = PromptCloudApi.new(options)
|
8
8
|
if options[:display_info]
|
9
|
-
|
10
|
-
elsif options[:perform_initial_setup]
|
11
|
-
|
9
|
+
api_obj.display_info(options)
|
10
|
+
elsif options[:perform_initial_setup]
|
11
|
+
api_obj.perform_initial_setup(options)
|
12
12
|
else
|
13
13
|
if options[:loop]
|
14
14
|
loop do
|
15
|
-
new_feed_exists=
|
16
|
-
if new_feed_exists == false
|
17
|
-
timer.wait
|
18
|
-
end
|
15
|
+
new_feed_exists = api_obj.download_files(options)
|
16
|
+
timer.wait if new_feed_exists == false
|
19
17
|
end
|
20
18
|
else
|
21
|
-
|
19
|
+
api_obj.download_files(options)
|
22
20
|
end
|
23
21
|
end
|
data/lib/promptcloud_data_api.rb
CHANGED
@@ -14,240 +14,233 @@ require 'digest/md5'
|
|
14
14
|
class PromptCloudApi
|
15
15
|
@@promptcloudhome="#{ENV["HOME"]}/promptcloud/"
|
16
16
|
attr_accessor :api_downtime
|
17
|
-
|
17
|
+
|
18
|
+
def initialize(args_hash = {})
|
18
19
|
super()
|
19
|
-
|
20
|
-
@
|
21
|
-
@
|
20
|
+
@download_dir = nil
|
21
|
+
@client_id = nil
|
22
|
+
@password = nil
|
23
|
+
@client_auth_key = nil
|
22
24
|
perform_initial_setup(args_hash)
|
23
25
|
end
|
24
26
|
|
25
|
-
def display_info(
|
26
|
-
apiconf="#{@@promptcloudhome}/configs/config.yml"
|
27
|
-
if
|
28
|
-
apiconf = options[:apiconf]
|
29
|
-
end
|
30
|
-
|
27
|
+
def display_info(args_hash)
|
28
|
+
apiconf = "#{@@promptcloudhome}/configs/config.yml"
|
29
|
+
apiconf = args_hash[:apiconf] if args_hash[:apiconf]
|
31
30
|
if File.file?(apiconf)
|
32
|
-
conf_hash=YAML::load_file(apiconf)
|
31
|
+
conf_hash = YAML::load_file(apiconf)
|
33
32
|
conf_hash.each_pair do |key, val|
|
34
33
|
puts "#{key} : #{val}"
|
35
34
|
end
|
36
35
|
else
|
37
|
-
$stderr.puts "Config file #{apiconf} doesn't exist"
|
36
|
+
$stderr.puts "Config file #{apiconf} doesn't exist, use -i to create config file"
|
38
37
|
end
|
39
38
|
end
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
unless
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
39
|
+
|
40
|
+
#optional argument args_hash={:promptcloudhome=>..., :apiconf=>...., :queried_timestamp_file=>}
|
41
|
+
def perform_initial_setup(args_hash={})
|
42
|
+
@@promptcloudhome = args_hash[:promptcloudhome] if args_hash[:promptcloudhome]
|
43
|
+
FileUtils.mkdir_p(@@promptcloudhome) unless File.directory?(@@promptcloudhome)
|
44
|
+
|
45
|
+
args_hash[:apiconf] = "#{@@promptcloudhome}/configs/config.yml" unless args_hash[:apiconf]
|
46
|
+
FileUtils.mkdir_p(File.dirname(args_hash[:apiconf])) unless File.directory?(File.dirname(args_hash[:apiconf]))
|
47
|
+
|
48
|
+
args_hash[:log_dir]="#{@@promptcloudhome}/log" unless args_hash[:log_dir]
|
49
|
+
FileUtils.mkdir_p(args_hash[:log_dir]) unless File.directory?(args_hash[:log_dir])
|
50
|
+
|
51
|
+
args_hash[:md5_dir]="#{@@promptcloudhome}/md5sums" unless args_hash[:md5_dir]
|
52
|
+
FileUtils.mkdir_p(args_hash[:md5_dir]) unless File.directory?(args_hash[:md5_dir])
|
53
|
+
|
54
|
+
args_hash[:queried_timestamp_file]="#{@@promptcloudhome}/last_queried_ts" unless args_hash[:queried_timestamp_file]
|
55
|
+
|
56
|
+
args_hash["download_dir"] = File.join(@@promptcloudhome, "downloads") unless args_hash["download_dir"]
|
57
|
+
@download_dir = args_hash["download_dir"]
|
58
|
+
FileUtils.mkdir_p(@download_dir) unless File.directory?(@download_dir)
|
59
|
+
|
60
|
+
@conf_hash = {}
|
61
|
+
if File.file?(args_hash[:apiconf])
|
62
|
+
conf_hash = YAML::load_file(args_hash[:apiconf])
|
63
|
+
@conf_hash = conf_hash if conf_hash and conf_hash.is_a?Hash
|
64
|
+
@client_id = @conf_hash["client_id"]
|
65
|
+
if args_hash[:api_version] == "v2"
|
66
|
+
@client_auth_key = @conf_hash["client_auth_key"]
|
67
|
+
else
|
68
|
+
@password = @conf_hash["password"]
|
65
69
|
end
|
66
70
|
end
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
end
|
74
|
-
|
75
|
-
unless options[:md5_dir]
|
76
|
-
options[:md5_dir]="#{@@promptcloudhome}/md5sums"
|
71
|
+
@client_id = args_hash[:user] if args_hash[:user]
|
72
|
+
@client_auth_key = args_hash[:client_auth_key] if args_hash[:client_auth_key]
|
73
|
+
@password = args_hash[:pass] if args_hash[:pass]
|
74
|
+
unless @client_id
|
75
|
+
$stdout.print "\nPlease enter the user id(for example if you use url http://api.promptcloud.com/data/info?id=demo, then your user id is demo)\n:"
|
76
|
+
@client_id = STDIN.gets.chomp.strip
|
77
77
|
end
|
78
|
-
if
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
unless @download_dir
|
96
|
-
@download_dir=File.join(@@promptcloudhome, "downloads")
|
97
|
-
end
|
98
|
-
|
99
|
-
if not File.directory?(@download_dir)
|
100
|
-
FileUtils.mkdir_p(@download_dir)
|
78
|
+
if args_hash[:api_version] == "v2"
|
79
|
+
unless @client_auth_key
|
80
|
+
$stdout.print "\nPlease enter the auth key(Provided by PromptCloud)\n:"
|
81
|
+
@client_auth_key = STDIN.gets.chomp.strip
|
82
|
+
end
|
83
|
+
else
|
84
|
+
unless @password
|
85
|
+
$stdout.print "\nPlease enter the password(Provided by PromptCloud)\n:"
|
86
|
+
@password = STDIN.gets.chomp.strip
|
87
|
+
end
|
88
|
+
end
|
89
|
+
@conf_hash["client_id"] = @client_id
|
90
|
+
@conf_hash["client_auth_key"] = @client_auth_key if args_hash[:api_version] == "v2"
|
91
|
+
@conf_hash["password"] = @password if args_hash[:api_version] == "v1"
|
92
|
+
@conf_hash["download_dir"] = @download_dir
|
93
|
+
File.open(args_hash[:apiconf], "w") do |file|
|
94
|
+
file << @conf_hash.to_yaml
|
101
95
|
end
|
102
96
|
end
|
103
97
|
|
104
|
-
def download_files(
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
if not urls_ts_map
|
114
|
-
$stderr.puts "#{$@} : Could not obtain file urls to download."
|
115
|
-
new_feed_exists
|
98
|
+
def download_files(args_hash)
|
99
|
+
new_feed_exists = false
|
100
|
+
ts = ("%10.9f" % (Time.now).to_f).to_s.gsub(/\./, "").to_i
|
101
|
+
fetch_log = "#{args_hash[:log_dir]}/fetched_urls-#{ts}.log"
|
102
|
+
fetch_log_file = File.open(fetch_log, "w")
|
103
|
+
ts_urls_map, url_md5_map = get_file_urls(args_hash)
|
104
|
+
unless ts_urls_map
|
105
|
+
$stderr.puts "Could not obtain file urls to download."
|
106
|
+
return new_feed_exists
|
116
107
|
end
|
117
|
-
if
|
108
|
+
if ts_urls_map.keys.empty?
|
118
109
|
$stderr.puts "No new files to download"
|
119
110
|
return new_feed_exists
|
120
111
|
end
|
121
|
-
|
122
|
-
sorted_ts=urls_ts_map.keys.sort
|
112
|
+
sorted_ts = ts_urls_map.keys.sort
|
123
113
|
sorted_ts.each do |ts|
|
124
|
-
urls=
|
114
|
+
urls = ts_urls_map[ts]
|
115
|
+
next if not urls
|
125
116
|
urls.each do |url|
|
126
|
-
md5sum=url_md5_map[url]
|
127
|
-
filename=File.basename(url)
|
128
|
-
md5_filename=filename.gsub(/\.gz/, ".md5sum")
|
129
|
-
md5_filepath=
|
130
|
-
if File.file?(md5_filepath) and File.open(md5_filepath).read.chomp.strip==md5sum
|
131
|
-
$stderr.puts "Skipping file
|
117
|
+
md5sum = url_md5_map[url]
|
118
|
+
filename = File.basename(url)
|
119
|
+
md5_filename = filename.gsub(/\.gz/, ".md5sum")
|
120
|
+
md5_filepath = args_hash[:md5_dir]+ "/#{md5_filename}"
|
121
|
+
if File.file?(md5_filepath) and File.open(md5_filepath).read.chomp.strip == md5sum
|
122
|
+
$stderr.puts "Skipping file #{url}, it has been downloaded earlier."
|
132
123
|
next
|
133
124
|
end
|
134
|
-
new_feed_exists=true
|
135
|
-
|
125
|
+
new_feed_exists = true
|
136
126
|
begin
|
137
|
-
$stderr.puts "Fetching
|
138
|
-
req=RestClient::Request.new({:method=>"get"
|
139
|
-
|
127
|
+
$stderr.puts "Fetching file #{url}"
|
128
|
+
req = RestClient::Request.new({:method => "get", :user => @client_id, :password => @password, :url =>url}) if args_hash[:api_version] == "v1"
|
129
|
+
req = RestClient::Request.new({:method=>"get", :url =>url}) if args_hash[:api_version] == "v2"
|
130
|
+
outfile = File.join(@download_dir, File.basename(url))
|
140
131
|
File.open(outfile, "wb") do |file|
|
141
132
|
file.write req.execute
|
142
|
-
fetch_log_file << "Fetched: #{url}"
|
143
133
|
end
|
144
|
-
content=""
|
134
|
+
content = ""
|
145
135
|
Zlib::GzipReader.open(outfile) {|gz|
|
146
136
|
content = gz.read
|
147
137
|
}
|
148
138
|
downloaded_md5 = Digest::MD5.hexdigest(content)
|
149
|
-
if md5sum==downloaded_md5
|
139
|
+
if md5sum == downloaded_md5
|
150
140
|
File.open(md5_filepath, "w"){|file| file.puts md5sum}
|
141
|
+
fetch_log_file << "Fetched: #{url}"
|
151
142
|
else
|
152
143
|
$stderr.puts "Url : #{url} was not downloaded completely, hence deleting the downloaded file"
|
144
|
+
fetch_log_file.puts "Failed: #{url}"
|
153
145
|
File.delete(outfile)
|
154
146
|
end
|
155
147
|
rescue Exception => e
|
156
|
-
$stderr.puts "
|
157
|
-
fetch_log_file.puts "#{$@} #{e.class}, #{e.message}"
|
148
|
+
$stderr.puts "Failed to fetch url: #{url}, Exception: #{e.class}, #{e.message}"
|
158
149
|
fetch_log_file.puts "Failed: #{url}"
|
159
150
|
end
|
160
151
|
end
|
161
152
|
end
|
162
|
-
|
163
153
|
fetch_log_file.close
|
164
|
-
$stderr.puts "
|
165
|
-
$stderr.puts "Downloaded files are available at
|
154
|
+
$stderr.puts "\nLog file : #{fetch_log}"
|
155
|
+
$stderr.puts "Downloaded files are available at : #{@download_dir}\n\n"
|
166
156
|
return new_feed_exists
|
167
157
|
end
|
168
158
|
|
169
159
|
private
|
170
|
-
def get_api_url(
|
171
|
-
|
172
|
-
if
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
file << options[:timestamp]
|
160
|
+
def get_api_url(args_hash)
|
161
|
+
base_url = "https://api.promptcloud.com"
|
162
|
+
base_url = "https://api.bcp.promptcloud.com" if args_hash[:bcp]
|
163
|
+
promptcloud_api_url = base_url + "/data/info?id=#{@client_id}" if args_hash[:api_version] == "v1"
|
164
|
+
promptcloud_api_url = base_url + "/v2/data/info?id=#{@client_id}&client_auth_key=#{@client_auth_key}" if args_hash[:api_version] == "v2"
|
165
|
+
if args_hash[:timestamp]
|
166
|
+
promptcloud_api_url += "&ts=#{args_hash[:timestamp]}"
|
167
|
+
File.open(args_hash[:queried_timestamp_file], "w") do |file|
|
168
|
+
file << args_hash[:timestamp]
|
180
169
|
end
|
181
170
|
end
|
182
|
-
|
183
|
-
if
|
184
|
-
|
185
|
-
|
186
|
-
|
171
|
+
promptcloud_api_url += "&days=#{args_hash[:days]}" if args_hash[:days]
|
172
|
+
promptcloud_api_url += "&hours=#{args_hash[:hours]}" if args_hash[:hours]
|
173
|
+
promptcloud_api_url += "&minutes=#{args_hash[:minutes]}" if args_hash[:minutes]
|
174
|
+
promptcloud_api_url += "&cat=#{args_hash[:category]}" if args_hash[:category]
|
175
|
+
promptcloud_api_url += "&site=#{args_hash[:site]}" if args_hash[:site]
|
176
|
+
return promptcloud_api_url
|
187
177
|
end
|
188
178
|
|
189
|
-
def handle_api_downtime(
|
179
|
+
def handle_api_downtime(args_hash)
|
190
180
|
if @api_downtime
|
191
|
-
total_downtime=Time.now - @api_downtime
|
181
|
+
total_downtime = Time.now - @api_downtime
|
192
182
|
if total_downtime > 1800
|
193
|
-
|
183
|
+
args_hash[:bcp] = true
|
194
184
|
end
|
195
185
|
else
|
196
|
-
@api_downtime=Time.now
|
186
|
+
@api_downtime = Time.now
|
197
187
|
end
|
198
188
|
end
|
199
189
|
|
200
|
-
def disable_bcp(
|
201
|
-
if
|
202
|
-
|
203
|
-
@api_downtime=nil
|
190
|
+
def disable_bcp(args_hash)
|
191
|
+
if args_hash[:bcp]
|
192
|
+
args_hash[:bcp] = nil
|
193
|
+
@api_downtime = nil
|
204
194
|
end
|
205
195
|
end
|
206
196
|
|
207
|
-
def get_file_urls(
|
208
|
-
|
209
|
-
url_md5_map={}
|
197
|
+
def get_file_urls(args_hash)
|
198
|
+
ts_urls_map = {}
|
199
|
+
url_md5_map = {}
|
210
200
|
begin
|
211
|
-
|
212
|
-
|
213
|
-
|
201
|
+
promptcloud_api_url = get_api_url(args_hash)
|
202
|
+
$stdout.puts "Getting files to download from #{promptcloud_api_url}"
|
203
|
+
api_query_output = ""
|
204
|
+
RestClient.get(promptcloud_api_url) do |response, request, result, &block|
|
214
205
|
if [301, 302, 307].include? response.code
|
215
206
|
response.follow_redirection(request, result, &block)
|
216
207
|
else
|
217
208
|
response.return!(request, result, &block)
|
218
209
|
end
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
disable_bcp(options)
|
210
|
+
if response.code != 200
|
211
|
+
if args_hash[:bcp]
|
212
|
+
$stderr.puts "Sorry, our bcp server is also down, please mail to downtime@promptcloud.com"
|
213
|
+
disable_bcp(args_hash)
|
224
214
|
else
|
225
|
-
if
|
226
|
-
$stderr.puts "Could not fetch from
|
215
|
+
if args_hash[:loop]
|
216
|
+
$stderr.puts "Could not fetch from PromptCloud data api server, will try the api server after the sleep and bcp server after 30 mins"
|
227
217
|
else
|
228
218
|
$stderr.puts "Main api server seems to be unreachable, you can try --bcp option"
|
229
219
|
end
|
230
|
-
handle_api_downtime(
|
220
|
+
handle_api_downtime(args_hash)
|
231
221
|
end
|
232
222
|
else
|
233
|
-
api_query_output=response
|
234
|
-
disable_bcp(
|
223
|
+
api_query_output = response
|
224
|
+
disable_bcp(args_hash) #next fetch will be from promtcloud api
|
235
225
|
end
|
236
226
|
end
|
237
|
-
|
238
|
-
doc
|
227
|
+
doc = REXML::Document.new(api_query_output) if api_query_output
|
228
|
+
unless doc
|
229
|
+
$stderr.puts "Could not create xml doc"
|
230
|
+
return nil,nil
|
231
|
+
end
|
239
232
|
REXML::XPath.each(doc, '//entry').each do |entry_node|
|
240
|
-
updated_node=REXML::XPath.first(entry_node, './updated')
|
241
|
-
updated=updated_node.text.chomp.strip.to_i
|
242
|
-
url_node=REXML::XPath.first(entry_node, './url')
|
243
|
-
url=url_node.text.chomp.strip
|
244
|
-
md5_node=REXML::XPath.first(entry_node, './md5sum')
|
245
|
-
md5sum=md5_node.text.chomp.strip
|
246
|
-
url_md5_map[url]=md5sum
|
247
|
-
if
|
248
|
-
|
233
|
+
updated_node = REXML::XPath.first(entry_node, './updated')
|
234
|
+
updated = updated_node.text.chomp.strip.to_i
|
235
|
+
url_node = REXML::XPath.first(entry_node, './url')
|
236
|
+
url = url_node.text.chomp.strip
|
237
|
+
md5_node = REXML::XPath.first(entry_node, './md5sum')
|
238
|
+
md5sum = md5_node.text.chomp.strip
|
239
|
+
url_md5_map[url] = md5sum
|
240
|
+
if ts_urls_map[updated]
|
241
|
+
ts_urls_map[updated].push(url)
|
249
242
|
else
|
250
|
-
|
243
|
+
ts_urls_map[updated]=[url]
|
251
244
|
end
|
252
245
|
end
|
253
246
|
#REXML::XPath.each(doc, '//url').each{|node| urls.push(node.text)}
|
@@ -255,7 +248,7 @@ class PromptCloudApi
|
|
255
248
|
$stderr.puts "#{$@} : Api query failed:#{e.class}, #{e.message}"
|
256
249
|
return nil, nil
|
257
250
|
end
|
258
|
-
return
|
251
|
+
return ts_urls_map, url_md5_map
|
259
252
|
end
|
260
253
|
end
|
261
254
|
|
@@ -287,13 +280,16 @@ class PromptCloudTimer
|
|
287
280
|
end
|
288
281
|
|
289
282
|
class PromptCloudApiArgParser
|
283
|
+
|
290
284
|
def initialize()
|
291
285
|
super
|
292
286
|
end
|
293
287
|
|
294
288
|
def self.validate(options,mandatory)
|
295
|
-
|
296
|
-
|
289
|
+
options[:api_version] = "v1" if not options[:api_version] # default version
|
290
|
+
options[:api_version] = options[:api_version].downcase
|
291
|
+
if not ["v1","v2"].include? options[:api_version]
|
292
|
+
$stderr.puts "#{options[:api_version]} is not a valid api version. Please pass v1 or v2.(v1 is the default)"
|
297
293
|
return false
|
298
294
|
end
|
299
295
|
return true
|
@@ -303,6 +299,23 @@ class PromptCloudApiArgParser
|
|
303
299
|
script_name=$0
|
304
300
|
$stderr.puts <<END
|
305
301
|
Example :
|
302
|
+
# Initial setup(default config)
|
303
|
+
ruby #{script_name} --perform_initial_setup --user <username> --pass <password> # API v1 requires valid userid and password
|
304
|
+
ruby #{script_name} --api_version v2 --perform_initial_setup --user <username> --client_auth_kay <auth key> # API v2 requires valid user id and authentication key
|
305
|
+
|
306
|
+
# Download data
|
307
|
+
ruby #{script_name} # to download data of last 2 days (default)
|
308
|
+
ruby #{script_name} --timestamp <timestamp> # to use of timestamp param
|
309
|
+
ruby #{script_name} --site <test_site> --days 7 # to download data of the site test_site uploaded in last 7 days
|
310
|
+
ruby #{script_name} --category blog --hours 10 # to download data of the category blog uploaded in last 10 hours
|
311
|
+
ruby #{script_name} --minutes 20 # to download data uploaded in last 20 minutes
|
312
|
+
ruby #{script_name} --bcp # to download data from bcp(PromptCloud backup server)
|
313
|
+
ruby #{script_name} --loop # to download data continuously, it will automatically check our API for new data
|
314
|
+
|
315
|
+
# To use own config
|
316
|
+
ruby #{script_name} --apiconf <apiconf file pull path> # to override apiconf file
|
317
|
+
ruby #{script_name} --download_dir <download directory full path> # to override download directory
|
318
|
+
ruby #{script_name} --promptcloudhome <promptcloudhome full path> # to override promptcloudhome home
|
306
319
|
END
|
307
320
|
end
|
308
321
|
|
@@ -312,56 +325,80 @@ END
|
|
312
325
|
options= options.merge(defaults)
|
313
326
|
opts=OptionParser.new do |opts|
|
314
327
|
opts.banner = "Usage: #{$0} [options] "
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
options[:apiconf] = v
|
328
|
+
|
329
|
+
opts.on("-v","--api_version VERSION",String, "to get data from different api version(available versions are v1 and v2, the defalut version is v1)") do |v|
|
330
|
+
options[:api_version] = v
|
319
331
|
end
|
320
332
|
|
321
|
-
opts.on("--
|
322
|
-
options[:
|
333
|
+
opts.on("-u","--user USER",String, "data api user id(provided by PromptCloud)") do |v|
|
334
|
+
options[:user] = v
|
323
335
|
end
|
324
336
|
|
325
|
-
opts.on("--
|
326
|
-
options[:
|
337
|
+
opts.on("-p","--pass PASSWORD",String, "data api password(provised by PromptCloud, used for api v1)") do |v|
|
338
|
+
options[:pass] = v
|
327
339
|
end
|
328
340
|
|
329
|
-
opts.on("--
|
341
|
+
opts.on("-k","--client_auth_key AUTHKEY",String, "data api client auth key(provided by PromptCloud, used for api v2)") do |v|
|
342
|
+
options[:client_auth_key] = v
|
343
|
+
end
|
344
|
+
|
345
|
+
opts.on("-i","--perform_initial_setup", "to perform initial setup") do |v|
|
330
346
|
options[:perform_initial_setup] = v
|
331
347
|
end
|
332
|
-
|
348
|
+
|
349
|
+
opts.on("--display_info", "to display config info") do |v|
|
333
350
|
options[:display_info] = v
|
334
351
|
end
|
335
352
|
|
336
|
-
opts.on("--
|
337
|
-
options[:
|
353
|
+
opts.on("--apiconf APICONFPATH",String, "to override the config file path(config file stores information like client_id, password, client_auth_key, downloadir etc)") do |v|
|
354
|
+
options[:apiconf] = v
|
338
355
|
end
|
339
356
|
|
340
|
-
opts.on("--
|
341
|
-
options[:
|
357
|
+
opts.on("--download_dir DOWNLOAD_DIRECTORY",String, "to override the download directory(which contains downloaded data files)") do |v|
|
358
|
+
options[:download_dir] = v
|
342
359
|
end
|
343
360
|
|
344
|
-
opts.on("--
|
345
|
-
options[:
|
361
|
+
opts.on("--promptcloudhome PROMPTCLOUDHOME",String, "to override the promptcloudhome dir(~/promptcloud)") do |v|
|
362
|
+
options[:promptcloudhome] = v
|
346
363
|
end
|
347
364
|
|
348
|
-
opts.on("--
|
349
|
-
options[:
|
365
|
+
opts.on("-t","--timestamp TIMESTAMP",Integer, "to query promptcloud api for files newer than or equal to given timestamp") do |v|
|
366
|
+
options[:timestamp] = v
|
350
367
|
end
|
351
|
-
|
352
|
-
opts.on("--
|
353
|
-
options[:
|
368
|
+
|
369
|
+
opts.on("--days DAYS",Integer, "to download the data of last few days") do |v|
|
370
|
+
options[:days] = v
|
371
|
+
end
|
372
|
+
|
373
|
+
opts.on("--hours DAYS",Integer, "to download the data of last few hours") do |v|
|
374
|
+
options[:hours] = v
|
375
|
+
end
|
376
|
+
|
377
|
+
opts.on("--minutes MINUTES",Integer, "to download the data last few minutes") do |v|
|
378
|
+
options[:minutes] = v
|
379
|
+
end
|
380
|
+
|
381
|
+
opts.on("--queried_timestamp_file queried TIMESTAMPFILE",String, "to override the last timestamp file(contains last queried timestamp)") do |v|
|
382
|
+
options[:queried_timestamp_file] = v
|
354
383
|
end
|
355
384
|
|
356
|
-
opts.on("--
|
385
|
+
opts.on("--category CATEGORY ",String, "to query promptcloud api for files of the given category(if files of different verticals are placed in different directory under client's parent directory, then files of specific directory can be obtained by specifying that directory name in category option)") do |v|
|
386
|
+
options[:category] = v
|
387
|
+
end
|
388
|
+
|
389
|
+
opts.on("--site SITE_NAME",String, "to query promptcloud api for files of the given site") do |v|
|
390
|
+
options[:site] = v
|
391
|
+
end
|
392
|
+
|
393
|
+
opts.on("--loop", "download new data files and keep looking for new one(i.e it doesn't exit, if no new feed is found it will sleep, minimun sleep time is 10 secs and max sleep time is 300 secs)") do |v|
|
357
394
|
options[:loop] = v
|
358
395
|
end
|
359
396
|
|
360
|
-
opts.on("--noloop", "
|
397
|
+
opts.on("--noloop", "download new data files and and exit, this is the default behaviour") do |v|
|
361
398
|
options[:noloop] = v
|
362
399
|
end
|
363
400
|
|
364
|
-
opts.on("--bcp", "use
|
401
|
+
opts.on("--bcp", "to download data from PromptCloud backup server(high availability server, should use if main data api server is unreachable)") do |v|
|
365
402
|
options[:bcp] = v
|
366
403
|
end
|
367
404
|
|
@@ -8,7 +8,7 @@ Gem::Specification.new do |gem|
|
|
8
8
|
gem.version = PromptcloudDataApi::VERSION
|
9
9
|
gem.authors = ["PromptCloud"]
|
10
10
|
gem.email = ["promptcloud-data-api@promptcloud.com"]
|
11
|
-
gem.description = %q{This gem can be used to download data from Promptcloud data API. You need to be PromptCloud client to get the data
|
11
|
+
gem.description = %q{This gem can be used to download data from Promptcloud data API. It works for both API versions(v1 and v2). You need to be PromptCloud client to get the data though.:)}
|
12
12
|
gem.summary = %q{use it to query promptcloud indexed data}
|
13
13
|
gem.homepage = "http://promptcloud.com"
|
14
14
|
gem.files = `git ls-files`.split($/)
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: promptcloud_data_api
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2.beta1
|
5
5
|
prerelease: 6
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-
|
12
|
+
date: 2015-05-08 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rest-client
|
@@ -27,8 +27,9 @@ dependencies:
|
|
27
27
|
- - ! '>='
|
28
28
|
- !ruby/object:Gem::Version
|
29
29
|
version: '0'
|
30
|
-
description: This gem can be used to download data from Promptcloud data API.
|
31
|
-
need to be PromptCloud client to get the data
|
30
|
+
description: This gem can be used to download data from Promptcloud data API. It works
|
31
|
+
for both API versions(v1 and v2). You need to be PromptCloud client to get the data
|
32
|
+
though.:)
|
32
33
|
email:
|
33
34
|
- promptcloud-data-api@promptcloud.com
|
34
35
|
executables:
|