buzzdata 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +9 -0
- data/Gemfile +4 -0
- data/LICENCE +20 -0
- data/README.md +441 -0
- data/Rakefile +7 -0
- data/buzzdata.gemspec +23 -0
- data/lib/buzzdata.rb +155 -0
- data/lib/buzzdata/error.rb +9 -0
- data/lib/buzzdata/rest_helpers.rb +37 -0
- data/lib/buzzdata/upload.rb +38 -0
- data/lib/buzzdata/version.rb +3 -0
- data/samples/config/buzzdata.yml.sample +1 -0
- data/samples/dataset_overview.rb +22 -0
- data/samples/datasets/kittens_born.csv +23 -0
- data/samples/download_data.rb +11 -0
- data/samples/upload_data.rb +31 -0
- data/spec/buzzdata_spec.rb +42 -0
- data/spec/fixtures/custom.yml +1 -0
- data/spec/fixtures/invalid_yaml.yml +2 -0
- data/spec/fixtures/missing_api_key.yml +1 -0
- data/spec/fixtures/not_a_hash.yml +1 -0
- data/spec/spec.opts +5 -0
- data/spec/spec_helper.rb +3 -0
- metadata +106 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/LICENCE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2011 BuzzData, Inc.
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,441 @@
|
|
1
|
+
# BuzzData Ruby Client Library
|
2
|
+
|
3
|
+
## Getting Started
|
4
|
+
|
5
|
+
Create an instance of the Buzzdata client:
|
6
|
+
|
7
|
+
>> buzzdata = Buzzdata.new('YOUR_API_KEY')
|
8
|
+
|
9
|
+
To make it even simpler, if you create a file, `config/buzzdata.yml` with your `api_key` in it, you can omit the key parameter:
|
10
|
+
|
11
|
+
>> buzzdata = Buzzdata.new
|
12
|
+
|
13
|
+
|
14
|
+
## Downloading Data
|
15
|
+
|
16
|
+
To download data from a dataset, just do this:
|
17
|
+
|
18
|
+
>> buzzdata.download_data 'eviltrout/b-list-celebrities'
|
19
|
+
|
20
|
+
## Dataset Information
|
21
|
+
|
22
|
+
Using `dataset_overview` you can get an overview of a Dataset's information. It returns a hash of attribute names and their values. See the *API Documentation* below for a list of the returned attributes.
|
23
|
+
|
24
|
+
>> ds = buzzdata.dataset_overview 'eviltrout/b-list-celebrities'
|
25
|
+
>> puts ds['name'] # outputs B-List Celebrities
|
26
|
+
|
27
|
+
|
28
|
+
## Listing Datasets
|
29
|
+
|
30
|
+
You can view a user's datasets by calling `datasets_list`. You'll get back an array with information on their datasets.
|
31
|
+
|
32
|
+
>> datasets = buzzdata.datasets_list 'eviltrout'
|
33
|
+
>> datasets.each {|ds| puts ds['id'] }
|
34
|
+
|
35
|
+
|
36
|
+
## Upload History
|
37
|
+
|
38
|
+
You can retrieve a list of uploaded versions of a dataset by calling `dataset_history`:
|
39
|
+
|
40
|
+
>> buzzdata.dataset_history.each {|v| puts "version #{v['version']}!" }
|
41
|
+
|
42
|
+
|
43
|
+
## Creating a Dataset
|
44
|
+
|
45
|
+
You can use the `create_dataset` method to create a new dataset. All fields are required:
|
46
|
+
|
47
|
+
>> ds = buzzdata.create_dataset(:username => 'eviltrout',
|
48
|
+
:name => "My Awesome Dataset!",
|
49
|
+
:public => false,
|
50
|
+
:readme => "This is my awesome dataset",
|
51
|
+
:license => 'cc0',
|
52
|
+
:topics => ['testing-buzzdata'])
|
53
|
+
|
54
|
+
>> puts ds['id'] # outputs eviltrout/my-awesome-dataset
|
55
|
+
|
56
|
+
## Uploading Data
|
57
|
+
|
58
|
+
If your account has the ability to upload data to a dataset, you can do so like this:
|
59
|
+
|
60
|
+
>> upload = buzzdata.start_upload('eviltrout/b-list-celebrities', File.new('datasets/celebrities.csv')
|
61
|
+
|
62
|
+
Uploads take some time to be processed. You can poll how the processing is going using `in_progress?`
|
63
|
+
|
64
|
+
>> upload.in_progress? # true - upload is going on
|
65
|
+
|
66
|
+
# (wait for some time to pass..)
|
67
|
+
|
68
|
+
>> upload.in_progress? # false - upload is done!
|
69
|
+
|
70
|
+
For a more thourough example of this, look at the sample in *samples/upload_data.rb*
|
71
|
+
|
72
|
+
|
73
|
+
## Publish a dataset
|
74
|
+
|
75
|
+
>> buzzdata.publish_dataset('eviltrout/b-list-celebrities')
|
76
|
+
|
77
|
+
|
78
|
+
## Clone another user's dataset
|
79
|
+
|
80
|
+
>> buzzdata.clone_dataset('pete/pete-forde-s-genome')
|
81
|
+
|
82
|
+
|
83
|
+
## Delete a dataset
|
84
|
+
|
85
|
+
>> buzzdata.delete_dataset('eviltrout/tasteless-dataset')
|
86
|
+
|
87
|
+
|
88
|
+
## Get a user's information
|
89
|
+
|
90
|
+
>> user = buzzdata.user_info('eviltrout')
|
91
|
+
>> puts user['name'] # Robin Ward
|
92
|
+
|
93
|
+
|
94
|
+
## Search BuzzData
|
95
|
+
|
96
|
+
>> buzzdata.search("pets").each do |r|
|
97
|
+
puts r['label'] # Outputs each search result label
|
98
|
+
end
|
99
|
+
|
100
|
+
|
101
|
+
## Get a list of usable Licenses
|
102
|
+
|
103
|
+
>> buzzdata.licenses
|
104
|
+
|
105
|
+
|
106
|
+
## Get a list of usable Topics
|
107
|
+
|
108
|
+
>> buzzdata.topics
|
109
|
+
|
110
|
+
|
111
|
+
# BuzzData API
|
112
|
+
|
113
|
+
The BuzzData API returns results in JSON.
|
114
|
+
|
115
|
+
You must attach your API key as either a query parameter or post variable in the form of `api_key=YOUR_KEY`.
|
116
|
+
|
117
|
+
For example, to test if your API key works, try this url:
|
118
|
+
|
119
|
+
https://buzzdata.com/api/test?api_key=YOUR_KEY
|
120
|
+
|
121
|
+
You should get back a JSON object with your username in it, confirming that the key is yours and has authenticated properly.
|
122
|
+
|
123
|
+
## Rate Limits
|
124
|
+
|
125
|
+
The API currently limits the requests you can make against it hourly. If you need more requests than that, please contact us and we'll let you know.
|
126
|
+
|
127
|
+
We have provided two response headers with each request to the API with Rate Limiting Information. They are returned from every API call.
|
128
|
+
|
129
|
+
X-RateLimit-Limit: 5000
|
130
|
+
X-RateLimit-Remaining: 4998
|
131
|
+
|
132
|
+
`X-RateLimit-Limit` is your current limit per hour. `X-RateLimit-Remaining` is how many requests you have left.
|
133
|
+
|
134
|
+
# Datasets
|
135
|
+
|
136
|
+
## Dataset Details (Overview)
|
137
|
+
|
138
|
+
To retrieve information about a dataset, simply make a GET:
|
139
|
+
|
140
|
+
**GET https://buzzdata.com/api/:username/:dataset**
|
141
|
+
|
142
|
+
**GET Parameters:**
|
143
|
+
|
144
|
+
* `api_key` = your API Key (optional - but necessary for viewing private or unpublished datasets.)
|
145
|
+
|
146
|
+
**Returns JSON:**
|
147
|
+
|
148
|
+
{"dataset":
|
149
|
+
{"id":"eviltrout/b-list-celebrities",
|
150
|
+
"username":"eviltrout",
|
151
|
+
"shortname":"b-list-celebrities",
|
152
|
+
"name":"B-List Celebrities",
|
153
|
+
"readme":"Here's a list of B-List Celebrities that I've curated.",
|
154
|
+
"public":true,
|
155
|
+
"license":"cc0",
|
156
|
+
"published":true,
|
157
|
+
"url":"http://buzzdata.com/eviltrout/b-list-celebrities",
|
158
|
+
"avatar":"/images/avatars/b9/e987d17045c649da4de2a580e8109d655e6a12?1312292315",
|
159
|
+
"followers_count":10,
|
160
|
+
"clones_count":2,
|
161
|
+
"articles_count":5,
|
162
|
+
"visualizations_count":10,
|
163
|
+
"attachments_count":2,
|
164
|
+
"created_at":"2011-07-12T14:31:21-04:00",
|
165
|
+
"data_updated_at":"2011-07-12T14:41:52-04:00"}}
|
166
|
+
|
167
|
+
|
168
|
+
## Listing Datasets
|
169
|
+
|
170
|
+
You can view a list of any user's datasets.
|
171
|
+
|
172
|
+
**GET https://buzzdata.com/api/:username/datasets/list**
|
173
|
+
|
174
|
+
**GET Parameters:**
|
175
|
+
|
176
|
+
* `api_key` = your API Key (optional - but necessary for viewing private or unpublished datasets.)
|
177
|
+
|
178
|
+
**Returns JSON:**
|
179
|
+
|
180
|
+
[
|
181
|
+
{"id":"eviltrout/b-list-celebrities",
|
182
|
+
"url":"http://buzzdata.com/eviltrout/b-list-celebrities",
|
183
|
+
"name":"B-List Celebrities",
|
184
|
+
"public":true,
|
185
|
+
"published":true,
|
186
|
+
"readme":"Here's a list of B-List Celebrities that I've curated."},
|
187
|
+
{"id":"eviltrout/pets",
|
188
|
+
"url":"http://buzzdata.com/eviltrout/pets",
|
189
|
+
"name":"Pets",
|
190
|
+
"public":true,
|
191
|
+
"published":true,
|
192
|
+
"readme":"A list of pets by owner."}
|
193
|
+
...
|
194
|
+
]
|
195
|
+
|
196
|
+
|
197
|
+
## Dataset History
|
198
|
+
|
199
|
+
To retrieve a list of uploads and versions to a dataset:
|
200
|
+
|
201
|
+
**GET https://buzzdata.com/api/:username/:dataset/history**
|
202
|
+
|
203
|
+
**GET Paramters: **
|
204
|
+
|
205
|
+
* `api_key` = your API Key (optional - but necessary for viewing private or unpublished datasets.)
|
206
|
+
|
207
|
+
**Returns JSON:**
|
208
|
+
|
209
|
+
[
|
210
|
+
{"version":1,"created_at":"2011-07-12T14:41:52-04:00","username":"eviltrout"},
|
211
|
+
{"version":2,"created_at":"2011-07-13T13:00:21-04:00","username":"eviltrout"}
|
212
|
+
]
|
213
|
+
|
214
|
+
|
215
|
+
## Downloading Data
|
216
|
+
|
217
|
+
Before you can download data, you need to create a `download_request`. If successful you will be given a
|
218
|
+
url to download the data from.
|
219
|
+
|
220
|
+
**POST https://buzzdata.com/api/:username/:dataset/download_request**
|
221
|
+
|
222
|
+
* `:username` is your username: ex: 'eviltrout'
|
223
|
+
* `:dataset` is the short name (url name) of the dataset you are uploading to. For example: 'b-list-celebrities'
|
224
|
+
|
225
|
+
**POST Parameters:**
|
226
|
+
|
227
|
+
* `api_key` = your API key
|
228
|
+
* `version` = the version of the dataset you wish to download
|
229
|
+
|
230
|
+
**Returns JSON:**
|
231
|
+
|
232
|
+
{download_request: {path:'PATH_TO_DOWNLOAD_YOUR_DATA'}}
|
233
|
+
|
234
|
+
You can then perform a GET to download the data from the path you are given.
|
235
|
+
|
236
|
+
|
237
|
+
## Creating a Dataset
|
238
|
+
|
239
|
+
Before you can upload data to a dataset, you need to create a dataset object in our system with meta-data about the dataset.
|
240
|
+
|
241
|
+
**POST https://buzzdata.com/api/:username/datasets**
|
242
|
+
|
243
|
+
* `:username` is your username: ex: 'eviltrout'
|
244
|
+
|
245
|
+
**POST Parameters:**
|
246
|
+
|
247
|
+
* `api_key` = your API Key
|
248
|
+
* `dataset[name]` = the name of the dataset
|
249
|
+
* `dataset[public]` = (true/false) whether the dataset is public or private
|
250
|
+
* `dataset[readme]` = the content for "About this Dataset"
|
251
|
+
* `dataset[license]` = the license the dataset is being offered with. See *Licenses* below
|
252
|
+
* `dataset[topics]` = the ids of the topics associated with this dataset. See *Topics* below
|
253
|
+
|
254
|
+
**Returns JSON:**
|
255
|
+
|
256
|
+
It returns the same output from the *Dataset Details (Overview)* above of the completed dataset, or an error message if the dataset couldn't be created.
|
257
|
+
|
258
|
+
|
259
|
+
## Upload Requests
|
260
|
+
|
261
|
+
To upload data to the system, you need to create an `upload_request`. An upload request tells you the HTTP end point your upload should be going to, as well as an `upload_code` you should pass along with your upload to verify it.
|
262
|
+
|
263
|
+
**POST https://buzzdata.com/api/:username/:dataset/upload_request**
|
264
|
+
|
265
|
+
* `:username` = your username: ex: 'eviltrout'
|
266
|
+
* `:dataset` = the short name (url name) of the dataset you are uploading to. For example: 'b-list-celebrities'
|
267
|
+
|
268
|
+
|
269
|
+
**POST Parameters:**
|
270
|
+
|
271
|
+
* `api_key` = your API key
|
272
|
+
|
273
|
+
**Returns JSON:**
|
274
|
+
|
275
|
+
{upload_request: {upload_code: 'SOME_CODE_HERE', url: 'URL_TO_UPLOAD_TO'} }
|
276
|
+
|
277
|
+
* `upload_code` = a unique token that authenticates your upload
|
278
|
+
* `url` = the endpoint for where the file should be uploaded
|
279
|
+
|
280
|
+
|
281
|
+
## Performing an Upload
|
282
|
+
|
283
|
+
After creating an `upload_request`, you can then POST your data file to be ingested. To do this, send a POST request to the `url` you received in your `upload_request` JSON.
|
284
|
+
|
285
|
+
*note: Make sure your POST is a multipart, otherwise the file upload will not work.*
|
286
|
+
|
287
|
+
**POST Parameters:**
|
288
|
+
|
289
|
+
* `api_key` = your API Key
|
290
|
+
* `upload_code` = the `upload_code` returned in the `upload_request`
|
291
|
+
* `file` = the file data you are uploading to be ingested
|
292
|
+
|
293
|
+
**Returns JSON:**
|
294
|
+
|
295
|
+
[{"name"=>"kittens_born.csv", "size"=>187, "job_status_token"=>"a24b2155-e2ec-48d4-8bc0-f77e3758966f"}]
|
296
|
+
|
297
|
+
* `name` = the filename of the upload.
|
298
|
+
* `size` = the size of the upload in bytes
|
299
|
+
* `job_status_token` = an important
|
300
|
+
|
301
|
+
|
302
|
+
## Checking your Upload Status
|
303
|
+
|
304
|
+
After a file has been uploaded, you can check out the upload's status by making a GET to:
|
305
|
+
|
306
|
+
**GET https://buzzdata.com/api/:username/:dataset/upload_request/status**
|
307
|
+
|
308
|
+
* `:username` = your username: ex: 'eviltrout'
|
309
|
+
* `:dataset` = the short name (url name) of the dataset you are uploading to. For example: 'b-list-celebrities'
|
310
|
+
|
311
|
+
**GET Parameters:**
|
312
|
+
|
313
|
+
* `api_key` = your API Key
|
314
|
+
* `job_status_token` = The job status token you received when you performed your upload.
|
315
|
+
|
316
|
+
**Returns JSON:**
|
317
|
+
|
318
|
+
{"message"=>"Ingest Job Created", "status_code"=>"created"}
|
319
|
+
|
320
|
+
* `message` is a textual description of the current status, or an error message in the event of an error
|
321
|
+
* `status_code` is the status of the current job. The job has finished when it is `complete` or `error`.
|
322
|
+
|
323
|
+
Important! You should wait a little while between polls to the job status. We recommend sleeping for one second in most cases.
|
324
|
+
|
325
|
+
Note: If you receive a status of 'Unknown' it means the file has not begun processing yet. If you continue to poll it will move to 'created'
|
326
|
+
|
327
|
+
|
328
|
+
## Publishing a Dataset
|
329
|
+
|
330
|
+
Once a dataset has an upload associated with it, you can publish it:
|
331
|
+
|
332
|
+
**POST https://buzzdata.com/api/:username/:dataset/publish**
|
333
|
+
|
334
|
+
**POST Parameters:**
|
335
|
+
|
336
|
+
* `api_key` = your API Key
|
337
|
+
|
338
|
+
**Returns JSON:**
|
339
|
+
|
340
|
+
It returns the same output from the *Dataset Details (Overview)* above of the completed dataset, or an error message if the dataset couldn't be published.
|
341
|
+
|
342
|
+
|
343
|
+
## Cloning a Dataset
|
344
|
+
|
345
|
+
You can clone another's dataset by making a post. Note the username in this case is the user whose dataset you want to clone, not your own:
|
346
|
+
|
347
|
+
**POST https://buzzdata.com/api/:username/:dataset/publish**
|
348
|
+
|
349
|
+
**POST Parameters:**
|
350
|
+
|
351
|
+
* `api_key` = your API Key
|
352
|
+
|
353
|
+
**Returns JSON:**
|
354
|
+
|
355
|
+
It returns the same output from the *Dataset Details (Overview)* above of the completed dataset, or an error message if the dataset couldn't be cloned.
|
356
|
+
|
357
|
+
|
358
|
+
## Delete a Dataset
|
359
|
+
|
360
|
+
To delete a dataset, make a DELETE call:
|
361
|
+
|
362
|
+
**DELETE https://buzzdata.com/api/:username/:dataset**
|
363
|
+
|
364
|
+
**POST Parameters:**
|
365
|
+
|
366
|
+
* `api_key` = your API Key
|
367
|
+
|
368
|
+
**Returns JSON:**
|
369
|
+
|
370
|
+
{"id"=>"eviltrout/tasteless-dataset", "deleted"=>true}
|
371
|
+
|
372
|
+
# Users
|
373
|
+
|
374
|
+
To retrieve information about a particular BuzzData user, perform the following GET:
|
375
|
+
|
376
|
+
**GET https://buzzdata.com/api/:username**
|
377
|
+
|
378
|
+
**Returns JSON:**
|
379
|
+
|
380
|
+
{"user":
|
381
|
+
{"id":"eviltrout",
|
382
|
+
"name":"Robin Ward",
|
383
|
+
"description":"The Evilest Trout of them all and BuzzData Developer",
|
384
|
+
"location":"Toronto, Canada",
|
385
|
+
"url":"http://buzzdata.com/eviltrout",
|
386
|
+
"avatar":"/images/avatars/b9/e987d17045c649da4de2a580e8109d655e6a12?1312292315",
|
387
|
+
"followers_count": 12}
|
388
|
+
}
|
389
|
+
|
390
|
+
|
391
|
+
# Searching BuzzData
|
392
|
+
|
393
|
+
To search BuzzData, make a GET call:
|
394
|
+
|
395
|
+
**GET https://buzzdata.com/api/search**
|
396
|
+
|
397
|
+
**GET Parameters:**
|
398
|
+
|
399
|
+
* `query` = the string you'd like to search for
|
400
|
+
|
401
|
+
**Returns JSON:**
|
402
|
+
|
403
|
+
[
|
404
|
+
{"label":"Pets","value":"Pets","id":"pets","url":"/eviltrout/pets","cloned":false,"type":"Dataset"},
|
405
|
+
{"label":"Business","value":"Business","id":"business","url":"/topics/business","type":"Topic"},
|
406
|
+
{"label":"Momoko Price","value":"Momoko Price","id":"momoko","url":"/momoko","type":"User","icon":"http://buzzdata.s3.amazonaws.com/avatars/fe/fe361ff01695aa4741840f4f8851a6da9e2ef64c"},
|
407
|
+
...
|
408
|
+
]
|
409
|
+
|
410
|
+
Note that while in the sample output above there is one of each Dataset, Topic and Users, a search can return many more. The type of result is based on the `type` attribute.
|
411
|
+
|
412
|
+
|
413
|
+
# Topics
|
414
|
+
|
415
|
+
When creating a dataset, it is necessary to supply at least once topic. To retrieve a list of topics and their ids:
|
416
|
+
|
417
|
+
**GET https://buzzdata.com/api/topics**
|
418
|
+
|
419
|
+
**Returns JSON:**
|
420
|
+
|
421
|
+
[
|
422
|
+
{"id":"agriculture","name":"Agriculture"},
|
423
|
+
{"id":"animals","name":"Animals"},
|
424
|
+
{"id":"anthropology","name":"Anthropology"},
|
425
|
+
...
|
426
|
+
]
|
427
|
+
|
428
|
+
# Licenses
|
429
|
+
|
430
|
+
When creating a dataset, it is necessary to supply a valid license for the data. You can query the available licenses by:
|
431
|
+
|
432
|
+
**GET https://buzzdata.com/api/licenses**
|
433
|
+
|
434
|
+
**Returns JSON:**
|
435
|
+
|
436
|
+
[
|
437
|
+
{"id":"cc0"},
|
438
|
+
{"id":"pdm"},
|
439
|
+
{"id":"cc_by"},
|
440
|
+
...
|
441
|
+
]
|
data/Rakefile
ADDED
data/buzzdata.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require "buzzdata/version"
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = "buzzdata"
|
7
|
+
s.version = Buzzdata::VERSION
|
8
|
+
s.platform = Gem::Platform::RUBY
|
9
|
+
s.authors = ["BuzzData"]
|
10
|
+
s.email = ["support@buzzdata.com"]
|
11
|
+
s.homepage = "http://buzzdata.com/"
|
12
|
+
s.summary = %q{Ruby client for the BuzzData API}
|
13
|
+
|
14
|
+
s.rubyforge_project = "buzzdata"
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
|
21
|
+
s.add_runtime_dependency('rest-client', '~> 1.6.7')
|
22
|
+
s.add_development_dependency('rspec', '~> 2.6.0')
|
23
|
+
end
|
data/lib/buzzdata.rb
ADDED
@@ -0,0 +1,155 @@
|
|
1
|
+
# Dependencies
|
2
|
+
require 'rest-client'
|
3
|
+
require 'json'
|
4
|
+
require 'yaml'
|
5
|
+
|
6
|
+
# Our code
|
7
|
+
require_relative 'buzzdata/error'
|
8
|
+
require_relative 'buzzdata/rest_helpers'
|
9
|
+
require_relative 'buzzdata/upload'
|
10
|
+
|
11
|
+
class Buzzdata
|
12
|
+
YAML_ERRORS = [ArgumentError]
|
13
|
+
if defined?(Psych) && defined?(Psych::SyntaxError)
|
14
|
+
YAML_ERRORS << Psych::SyntaxError
|
15
|
+
end
|
16
|
+
|
17
|
+
include RestHelpers
|
18
|
+
|
19
|
+
def initialize(api_key=nil, opts={})
|
20
|
+
|
21
|
+
@api_key = api_key
|
22
|
+
|
23
|
+
# If the API Key is missing, try to load it from a yml
|
24
|
+
if @api_key.nil?
|
25
|
+
config_file = File.expand_path(opts[:config_file] || 'config/buzzdata.yml')
|
26
|
+
|
27
|
+
# If the user set the config file, we want to raise errors.
|
28
|
+
if opts[:config_file] || File.exist?(config_file)
|
29
|
+
begin
|
30
|
+
config = YAML.load_file config_file
|
31
|
+
if Hash === config
|
32
|
+
if config['api_key']
|
33
|
+
@api_key = config['api_key']
|
34
|
+
@base_url = config['base_url']
|
35
|
+
else
|
36
|
+
raise Buzzdata::Error, "API key missing from configuration file (#{config_file})"
|
37
|
+
end
|
38
|
+
else
|
39
|
+
raise Buzzdata::Error, "Configuration file improperly formatted (not a Hash: #{config_file})"
|
40
|
+
end
|
41
|
+
rescue *YAML_ERRORS
|
42
|
+
raise Buzzdata::Error, "Configuration file improperly formatted (invalid YAML: #{config_file})"
|
43
|
+
rescue Errno::EACCES
|
44
|
+
raise Buzzdata::Error, "Configuration file unreadable (Permission denied: #{config_file})"
|
45
|
+
rescue Errno::ENOENT
|
46
|
+
raise Buzzdata::Error, "Configuration file missing (No such file or directory: #{config_file})"
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
raise Buzzdata::Error.new('No API key provided') if @api_key.nil?
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
def new_upload_request(dataset)
|
55
|
+
result = post_json(url_for("#{dataset}/upload_request"))
|
56
|
+
result['upload_request']
|
57
|
+
end
|
58
|
+
|
59
|
+
def start_upload(dataset, file)
|
60
|
+
upload_request = new_upload_request(dataset)
|
61
|
+
|
62
|
+
# Prepare our request
|
63
|
+
post_url = upload_request.delete('url')
|
64
|
+
upload_request['file'] = file
|
65
|
+
|
66
|
+
Buzzdata::Upload.new(self, dataset, post_json(post_url, upload_request))
|
67
|
+
end
|
68
|
+
|
69
|
+
def upload_status(dataset, job_status_token)
|
70
|
+
get_json(url_for("#{dataset}/upload_request/status"), :job_status_token => job_status_token)
|
71
|
+
end
|
72
|
+
|
73
|
+
def download_path(dataset)
|
74
|
+
result = post_json(url_for("#{dataset}/download_request"))
|
75
|
+
result['download_request']['url']
|
76
|
+
end
|
77
|
+
|
78
|
+
def dataset_overview(dataset)
|
79
|
+
result = get_json(url_for(dataset))
|
80
|
+
result['dataset']
|
81
|
+
end
|
82
|
+
|
83
|
+
def dataset_history(dataset)
|
84
|
+
get_json(url_for("#{dataset}/history"))
|
85
|
+
end
|
86
|
+
|
87
|
+
def publish_dataset(dataset)
|
88
|
+
result = post_json(url_for("#{dataset}/publish"))
|
89
|
+
result['dataset']
|
90
|
+
end
|
91
|
+
|
92
|
+
def clone_dataset(dataset)
|
93
|
+
result = post_json(url_for("#{dataset}/clone"))
|
94
|
+
result['dataset']
|
95
|
+
end
|
96
|
+
|
97
|
+
def delete_dataset(dataset)
|
98
|
+
delete_json(url_for("#{dataset}"))
|
99
|
+
end
|
100
|
+
|
101
|
+
def licenses
|
102
|
+
get_json(url_for("licenses"))
|
103
|
+
end
|
104
|
+
|
105
|
+
def topics
|
106
|
+
get_json(url_for("topics"))
|
107
|
+
end
|
108
|
+
|
109
|
+
def search(query)
|
110
|
+
get_json(url_for("search"), :query => query)
|
111
|
+
end
|
112
|
+
|
113
|
+
def datasets_list(username)
|
114
|
+
get_json(url_for("#{username}/datasets/list"))
|
115
|
+
end
|
116
|
+
|
117
|
+
def download_data(dataset)
|
118
|
+
raw_get(download_path(dataset))
|
119
|
+
end
|
120
|
+
|
121
|
+
def user_info(username)
|
122
|
+
result = get_json(url_for(username))
|
123
|
+
result['user']
|
124
|
+
end
|
125
|
+
|
126
|
+
def create_dataset(attributes)
|
127
|
+
|
128
|
+
# Validate attributes
|
129
|
+
raise BuzzData::Error, "Missing attributes" if attributes.nil?
|
130
|
+
raise Buzzdata::Error, "Username is required" if param_blank?(attributes, :username)
|
131
|
+
raise Buzzdata::Error, "Dataset name is required" if param_blank?(attributes, :name)
|
132
|
+
raise Buzzdata::Error, "Dataset readme is required" if param_blank?(attributes, :name)
|
133
|
+
raise Buzzdata::Error, "Dataset license is required" if param_blank?(attributes, :license)
|
134
|
+
raise Buzzdata::Error, "Dataset topics are required" if param_blank?(attributes, :topics)
|
135
|
+
|
136
|
+
username = attributes.delete(:username)
|
137
|
+
|
138
|
+
result = post_json(url_for("#{username}/datasets"), :dataset => attributes)
|
139
|
+
result['dataset']
|
140
|
+
end
|
141
|
+
|
142
|
+
private
|
143
|
+
|
144
|
+
def param_blank?(obj, param)
|
145
|
+
return (obj.nil? or obj[param].nil? or obj[param].empty?)
|
146
|
+
end
|
147
|
+
|
148
|
+
def url_for(path)
|
149
|
+
base_path = @base_url || "https://buzzdata.com/api/"
|
150
|
+
"#{base_path}#{path}"
|
151
|
+
end
|
152
|
+
|
153
|
+
end
|
154
|
+
|
155
|
+
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module RestHelpers
|
2
|
+
|
3
|
+
def handle_error(response)
|
4
|
+
raise Buzzdata::Error.new if response.nil?
|
5
|
+
parsed_error = JSON.parse(response.body)
|
6
|
+
raise Buzzdata::Error.new(parsed_error['message'])
|
7
|
+
end
|
8
|
+
|
9
|
+
# Define methods for our HTTP verbs
|
10
|
+
[:post, :put, :get, :delete].each do |method|
|
11
|
+
|
12
|
+
define_method(method) do |url, params={}|
|
13
|
+
params['api_key'] = @api_key
|
14
|
+
|
15
|
+
RestClient.send(method, url, params) do |response, request, result, &block|
|
16
|
+
case response.code
|
17
|
+
when 403, 404, 500
|
18
|
+
handle_error(response)
|
19
|
+
else
|
20
|
+
response.return!(request, result, &block)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# Define methods for our verbs with json handling
|
26
|
+
define_method("#{method}_json") do |path, params={}|
|
27
|
+
response = send(method, path, params)
|
28
|
+
JSON.parse(response.body)
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
def raw_get(url)
|
34
|
+
RestClient.get(url)
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
class Buzzdata
|
2
|
+
class Upload
|
3
|
+
|
4
|
+
attr_reader :filename, :size, :job_status_token, :dataset
|
5
|
+
|
6
|
+
def initialize(buzzdata_api, dataset, upload_response)
|
7
|
+
@api = buzzdata_api
|
8
|
+
@dataset = dataset
|
9
|
+
@filename = upload_response[0]['name']
|
10
|
+
@size = upload_response[0]['size']
|
11
|
+
@job_status_token = upload_response[0]['job_status_token']
|
12
|
+
end
|
13
|
+
|
14
|
+
def in_progress?
|
15
|
+
!is_complete?(current_status)
|
16
|
+
end
|
17
|
+
|
18
|
+
def success?
|
19
|
+
(current_status['status_code'] == "complete")
|
20
|
+
end
|
21
|
+
|
22
|
+
def status_message
|
23
|
+
current_status['message']
|
24
|
+
end
|
25
|
+
|
26
|
+
private
|
27
|
+
|
28
|
+
def is_complete?(status)
|
29
|
+
return false if status.nil?
|
30
|
+
['complete', 'error'].include?(status['status_code'])
|
31
|
+
end
|
32
|
+
|
33
|
+
def current_status
|
34
|
+
return @current_status if is_complete?(@current_status)
|
35
|
+
@current_status = @api.upload_status(@dataset, @job_status_token)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
api_key: <YOUR_API_KEY>
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require '../lib/buzzdata'
|
2
|
+
|
3
|
+
if ARGV.size != 1
|
4
|
+
puts "Usage: ./dataset_overview.rb dataset"
|
5
|
+
puts "Example: ./dataset_overview.rb eviltrout/kittens-born-by-month"
|
6
|
+
exit(0)
|
7
|
+
end
|
8
|
+
|
9
|
+
# Retrieve a Dataset's Details
|
10
|
+
buzzdata = Buzzdata.new
|
11
|
+
overview = buzzdata.dataset_overview(ARGV[0])
|
12
|
+
|
13
|
+
puts "Dataset Details:"
|
14
|
+
overview.each do |k, v|
|
15
|
+
puts "#{k}: #{v}"
|
16
|
+
end
|
17
|
+
puts
|
18
|
+
|
19
|
+
puts "Version History:"
|
20
|
+
buzzdata.dataset_history(ARGV[0]).each do |h|
|
21
|
+
puts "=> #{h['version']} - Uploaded On: #{h['created_at']} by #{h['username']}"
|
22
|
+
end
|
@@ -0,0 +1,11 @@
|
|
1
|
+
require '../lib/buzzdata'
|
2
|
+
|
3
|
+
if ARGV.size != 1
|
4
|
+
puts "Usage: ./download_data.rb dataset"
|
5
|
+
puts "Example: ./download_data.rb eviltrout/kittens-born-by-month"
|
6
|
+
exit(0)
|
7
|
+
end
|
8
|
+
|
9
|
+
# Download a Dataset
|
10
|
+
buzzdata = Buzzdata.new
|
11
|
+
puts buzzdata.download_data(ARGV[0])
|
@@ -0,0 +1,31 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require '../lib/buzzdata'
|
4
|
+
|
5
|
+
if ARGV.size != 2
|
6
|
+
puts "Usage: ./upload_data.rb dataset filename"
|
7
|
+
puts "Example: ./upload_data.rb eviltrout/kittens-born-by-month kittens_born.csv"
|
8
|
+
exit(0)
|
9
|
+
end
|
10
|
+
|
11
|
+
buzzdata = Buzzdata.new
|
12
|
+
|
13
|
+
dataset_name, filename = *ARGV
|
14
|
+
|
15
|
+
# Upload a file to a dataset
|
16
|
+
print "Uploading #{filename}..."
|
17
|
+
upload = buzzdata.start_upload(dataset_name, File.new(filename))
|
18
|
+
puts "Done!"
|
19
|
+
|
20
|
+
# Wait while it's being processed
|
21
|
+
print "Waiting for processing to finish..."
|
22
|
+
while upload.in_progress?
|
23
|
+
print "."
|
24
|
+
sleep(1) # Let's not poll too frequently
|
25
|
+
end
|
26
|
+
|
27
|
+
if upload.success?
|
28
|
+
puts "Done!"
|
29
|
+
else
|
30
|
+
puts "ERROR! #{upload.status_message}"
|
31
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
|
2
|
+
|
3
|
+
class Buzzdata
|
4
|
+
describe Buzzdata do
|
5
|
+
describe '#initialize' do
|
6
|
+
def fixture_path(fixture)
|
7
|
+
File.expand_path File.dirname(__FILE__) + '/fixtures/' + fixture
|
8
|
+
end
|
9
|
+
|
10
|
+
it 'should use custom configuration file' do
|
11
|
+
client = Buzzdata.new nil, :config_file => fixture_path('custom.yml')
|
12
|
+
client.instance_variable_get('@api_key').should == 'dummy'
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should not raise an error if the configuration file is default and missing" do
|
16
|
+
expect{Buzzdata.new nil}.not_to raise_error(Buzzdata::Error, /No such file or directory/)
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should raise an error if the configuration file is custom and missing" do
|
20
|
+
expect{Buzzdata.new nil, :config_file => fixture_path('non_existent.yml')}.to raise_error(Buzzdata::Error, /No such file or directory/)
|
21
|
+
end
|
22
|
+
|
23
|
+
it "should raise an error if the configuration file is unreadable" do
|
24
|
+
File.open(fixture_path('unreadable.yml'), 'w').chmod(0000) unless File.exist? fixture_path('unreadable.yml')
|
25
|
+
expect{Buzzdata.new nil, :config_file => fixture_path('unreadable.yml')}.to raise_error(Buzzdata::Error, /Permission denied/)
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should raise an error if the configuration file is invalid YAML" do
|
29
|
+
expect{Buzzdata.new nil, :config_file => fixture_path('invalid_yaml.yml')}.to raise_error(Buzzdata::Error, /invalid YAML/)
|
30
|
+
end
|
31
|
+
|
32
|
+
it "should raise an error if the configuration file is not a Hash" do
|
33
|
+
expect{Buzzdata.new nil, :config_file => fixture_path('not_a_hash.yml')}.to raise_error(Buzzdata::Error, /not a Hash/)
|
34
|
+
end
|
35
|
+
|
36
|
+
it "should raise an error if the API key is missing from the configuration file" do
|
37
|
+
expect{Buzzdata.new nil, :config_file => fixture_path('missing_api_key.yml')}.to raise_error(Buzzdata::Error, /API key missing/)
|
38
|
+
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
api_key: dummy
|
@@ -0,0 +1 @@
|
|
1
|
+
api_key:
|
@@ -0,0 +1 @@
|
|
1
|
+
true
|
data/spec/spec.opts
ADDED
data/spec/spec_helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,106 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: buzzdata
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 0.0.1
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- BuzzData
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2011-11-22 00:00:00 -05:00
|
14
|
+
default_executable:
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: rest-client
|
18
|
+
prerelease: false
|
19
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
20
|
+
none: false
|
21
|
+
requirements:
|
22
|
+
- - ~>
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: 1.6.7
|
25
|
+
type: :runtime
|
26
|
+
version_requirements: *id001
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rspec
|
29
|
+
prerelease: false
|
30
|
+
requirement: &id002 !ruby/object:Gem::Requirement
|
31
|
+
none: false
|
32
|
+
requirements:
|
33
|
+
- - ~>
|
34
|
+
- !ruby/object:Gem::Version
|
35
|
+
version: 2.6.0
|
36
|
+
type: :development
|
37
|
+
version_requirements: *id002
|
38
|
+
description:
|
39
|
+
email:
|
40
|
+
- support@buzzdata.com
|
41
|
+
executables: []
|
42
|
+
|
43
|
+
extensions: []
|
44
|
+
|
45
|
+
extra_rdoc_files: []
|
46
|
+
|
47
|
+
files:
|
48
|
+
- .gitignore
|
49
|
+
- Gemfile
|
50
|
+
- LICENCE
|
51
|
+
- README.md
|
52
|
+
- Rakefile
|
53
|
+
- buzzdata.gemspec
|
54
|
+
- lib/buzzdata.rb
|
55
|
+
- lib/buzzdata/error.rb
|
56
|
+
- lib/buzzdata/rest_helpers.rb
|
57
|
+
- lib/buzzdata/upload.rb
|
58
|
+
- lib/buzzdata/version.rb
|
59
|
+
- samples/config/buzzdata.yml.sample
|
60
|
+
- samples/dataset_overview.rb
|
61
|
+
- samples/datasets/kittens_born.csv
|
62
|
+
- samples/download_data.rb
|
63
|
+
- samples/upload_data.rb
|
64
|
+
- spec/buzzdata_spec.rb
|
65
|
+
- spec/fixtures/custom.yml
|
66
|
+
- spec/fixtures/invalid_yaml.yml
|
67
|
+
- spec/fixtures/missing_api_key.yml
|
68
|
+
- spec/fixtures/not_a_hash.yml
|
69
|
+
- spec/spec.opts
|
70
|
+
- spec/spec_helper.rb
|
71
|
+
has_rdoc: true
|
72
|
+
homepage: http://buzzdata.com/
|
73
|
+
licenses: []
|
74
|
+
|
75
|
+
post_install_message:
|
76
|
+
rdoc_options: []
|
77
|
+
|
78
|
+
require_paths:
|
79
|
+
- lib
|
80
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ">="
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: "0"
|
86
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
87
|
+
none: false
|
88
|
+
requirements:
|
89
|
+
- - ">="
|
90
|
+
- !ruby/object:Gem::Version
|
91
|
+
version: "0"
|
92
|
+
requirements: []
|
93
|
+
|
94
|
+
rubyforge_project: buzzdata
|
95
|
+
rubygems_version: 1.6.2
|
96
|
+
signing_key:
|
97
|
+
specification_version: 3
|
98
|
+
summary: Ruby client for the BuzzData API
|
99
|
+
test_files:
|
100
|
+
- spec/buzzdata_spec.rb
|
101
|
+
- spec/fixtures/custom.yml
|
102
|
+
- spec/fixtures/invalid_yaml.yml
|
103
|
+
- spec/fixtures/missing_api_key.yml
|
104
|
+
- spec/fixtures/not_a_hash.yml
|
105
|
+
- spec/spec.opts
|
106
|
+
- spec/spec_helper.rb
|