veritable 0.1.0.79 → 0.1.0.80

Sign up to get free protection for your applications and to get access to all the features.
data/lib/veritable/api.rb CHANGED
@@ -5,23 +5,82 @@ require 'veritable/resource'
5
5
  require 'veritable/util'
6
6
 
7
7
  module Veritable
8
+
9
+ # Represents the resources available to a user of the Veritable API.
10
+ #
11
+ # Users should not initialize directly; use Veritable.connect as the entry point.
12
+ #
13
+ # ==== Methods
14
+ # * +root+ -- gets the root of the API
15
+ # * +limits+ -- gets the user-specific API limits
16
+ # * +tables+ -- gets a Veritable::Cursor over the collection of available tables
17
+ # * +table+ -- gets an individual data table by its unique id
18
+ # * +create_table+ -- creates a new data table
19
+ # * +delete_table+ -- deletes a new data table by its unique id
20
+ # * +has_table?+ -- checks whether a table with the given id is available
21
+ #
22
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
8
23
  class API
9
24
  include VeritableResource
10
25
 
26
+ # Gets the root of the api
27
+ #
28
+ # ==== Returns
29
+ # A Hash with the keys <tt>"status"</tt> (should be equal to <tt>"SUCCESS"</tt>) and <tt>"entropy"</tt> (a random Float).
30
+ #
31
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
11
32
  def root; get(""); end
12
33
 
34
+ # Gets the user-specific API limits
35
+ #
36
+ # ==== Returns
37
+ # A Hash with the keys <tt>"max_categories"</tt>, <tt>"max_row_batch_count"</tt>, <tt>"max_string_length"</tt>, <tt>"predictions_max_cols"</tt>, <tt>"predictions_max_count"</tt>, <tt>"schema_max_cols"</tt>, <tt>"table_max_cols_per_row"</tt>, <tt>"table_max_rows"</tt>, and <tt>"table_max_running_analyses"</tt>, representing the user's current API limits.
38
+ #
39
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
13
40
  def limits; get("user/limits"); end
14
41
 
42
+ # Gets a cursor for the table collection
43
+ #
44
+ # ==== Arguments
45
+ # * +opts+ A Hash optionally containing the keys
46
+ # - <tt>"start"</tt> -- the table id from which the cursor should begin returning results. Defaults to +nil+, in which case the cursor will return result starting with the lexicographically first table id.
47
+ # - <tt>"limit"</tt> -- the total number of results to return (must be a Fixnum). Defaults to +nil+, in which case the number of results returned will not be limited.
48
+ #
49
+ # ==== Returns
50
+ # A Veritable::Cursor. The cursor will return Veritable::Table objects representing the available data tables, in lexicographic order of their unique ids.
51
+ #
52
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
15
53
  def tables(opts={'start' => nil, 'limit' => nil})
16
54
  Cursor.new({'collection' => "tables",
17
55
  'start' => opts['start'],
18
56
  'limit' => opts['limit']}.update(@opts)) {|x| Table.new(@opts, x)}
19
57
  end
20
58
 
21
- def table(table_id)
22
- Table.new(@opts, get("tables/#{table_id}"))
23
- end
24
-
59
+ # Gets an individual table by its unique id
60
+ #
61
+ # ==== Arguments
62
+ # * +table_id+ -- the unique id of the table
63
+ #
64
+ # ==== Returns
65
+ # A Veritable::Table
66
+ #
67
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
68
+ def table(table_id); Table.new(@opts, get("tables/#{table_id}")); end
69
+
70
+ # Creates a new table
71
+ #
72
+ # ==== Arguments
73
+ # * +table_id+ -- the unique String id of the new table. Must contain only alphanumeric characters, underscores, and dashes. Note that underscores and dashes are not permitted as the first character of a +table_id+. Default is +nil+, in which case a new id will be automatically generated.
74
+ # * +description+ -- a String describing the table. Default is <tt>''</tt>.
75
+ # * +force+ -- if true, will overwrite any existing table with the same id. Default is +false+.
76
+ #
77
+ # ==== Raises
78
+ # A Veritable::VeritableError if +force+ is not true and there is an existing table with the same id.
79
+ #
80
+ # ==== Returns
81
+ # A Veritable::Table
82
+ #
83
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
25
84
  def create_table(table_id=nil, description='', force=false)
26
85
  if table_id.nil?
27
86
  autogen = true
@@ -45,11 +104,26 @@ module Veritable
45
104
  Table.new(@opts, doc)
46
105
  end
47
106
 
48
- def delete_table(table_id); delete("tables/#{table_id}"); end
49
-
50
- def inspect; to_s; end
51
- def to_s; "#<Veritable::API url='#{api_base_url}'>"; end
52
-
107
+ # Deletes an existing table
108
+ #
109
+ # ==== Arguments
110
+ # +table_id+ --- the unique id of the table to delete
111
+ #
112
+ # ==== Returns
113
+ # +nil+ on success. Succeeds silently if no table with the specified id is found.
114
+ #
115
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
116
+ def delete_table(table_id); delete("tables/#{table_id}"); nil; end
117
+
118
+ # Checks if a table with the given unique id exists
119
+ #
120
+ # ==== Arguments
121
+ # +table_id+ --- the unique id of the table to check
122
+ #
123
+ # ==== Returns
124
+ # +true+ or +false+, as appropriate.
125
+ #
126
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
53
127
  def has_table?(table_id)
54
128
  begin
55
129
  table table_id
@@ -59,37 +133,128 @@ module Veritable
59
133
  true
60
134
  end
61
135
  end
136
+
137
+ # Returns a string representation of the API resource
138
+ def inspect; to_s; end
139
+
140
+ # Returns a string representation of the API resource
141
+ def to_s; "#<Veritable::API url='#{api_base_url}'>"; end
142
+
62
143
  end
63
144
 
145
+ # Represents the resources associated with a single table
146
+ #
147
+ # ==== Attributes
148
+ # * +_id+ -- the unique String id of the table
149
+ # * +description+ -- the String description of the table
150
+ #
151
+ # ==== Methods
152
+ # * +delete+ -- deletes the associated table resource
153
+ # * +row+ -- gets a row of the table by its unique id
154
+ # * +rows+ -- gets a Veritable::Cursor over the collection of rows in the table
155
+ # * +upload_row+ -- uploads a new row to the table
156
+ # * +batch_upload_rows+ -- batch uploads multiple rows to the table
157
+ # * +delete_row+ -- deletes a row from the table by its unique id
158
+ # * +batch_delete_rows+ -- batch deletes multiple rows from the table
159
+ # * +analyses+ -- batch deletes multiple rows from the table
160
+ # * +analysis+ -- batch deletes multiple rows from the table
161
+ # * +create_analysis+ -- batch deletes multiple rows from the table
162
+ # * +delete_analysis+ -- batch deletes multiple rows from the table
163
+ # * +has_analysis?+ -- batch deletes multiple rows from the table
164
+ #
165
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
64
166
  class Table
65
167
  include VeritableResource
66
168
 
67
169
  alias :rest_delete :delete
68
- def delete
69
- rest_delete(link('self'))
70
- end
71
170
 
171
+ # Deletes the table
172
+ #
173
+ # ==== Returns
174
+ # +nil+ on success. Succeeds silently if the resource has already been deleted.
175
+ #
176
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
177
+ def delete; rest_delete(link('self')); end
178
+
179
+ # Gets a row by its unique id
180
+ #
181
+ # ==== Arguments
182
+ # +row_id+ --- the unique id of the row to retrieve
183
+ #
184
+ # ==== Returns
185
+ # A Hash representing the row, whose keys are column ids as Strings and whose values are data cells.
186
+ #
187
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
72
188
  def row(row_id); get("#{link('rows')}/#{row_id}"); end
73
189
 
190
+ # Gets a cursor for the row collection
191
+ #
192
+ # ==== Arguments
193
+ # * +opts+ A Hash optionally containing the keys
194
+ # - <tt>"start"</tt> -- the row id from which the cursor should begin returning results. Defaults to +nil+, in which case the cursor will return result starting with the lexicographically first table id.
195
+ # - <tt>"limit"</tt> -- the total number of results to return (must be a Fixnum). Defaults to +nil+, in which case the number of results returned will not be limited.
196
+ #
197
+ # ==== Returns
198
+ # A Veritable::Cursor. The cursor will return Hashes representing the rows, in lexicographic order of their unique ids.
199
+ #
200
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
74
201
  def rows(opts={'start' => nil, 'limit' => nil})
75
202
  Cursor.new({'collection' => link('rows'),
76
203
  'start' => opts['start'],
77
204
  'limit' => opts['limit']}.update(@opts))
78
205
  end
79
206
 
207
+ # Uploads a new row to the table
208
+ #
209
+ # ==== Arguments
210
+ # * +row+ -- a Hash repreenting the data in the row, whose keys are column ids as Strings. Must contain the key <tt>"_id"</tt>, whose value must be a String containing only alphanumeric characters, underscores, and hyphens, and must be unique in the table.
211
+ #
212
+ # ==== Raises
213
+ # A Veritable::VeritableError if the row Hash is missing the <tt>"_id"</tt> field or is improperly formed.
214
+ #
215
+ # ==== Returns
216
+ # +nil+ on success.
217
+ #
218
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
80
219
  def upload_row(row)
81
220
  Util.check_row row
82
221
  put("#{link('rows')}/#{row['_id']}", row)
222
+ nil
83
223
  end
84
224
 
85
- def batch_upload_rows(rows, per_page=100)
86
- batch_modify_rows('put', rows, per_page)
87
- end
88
-
89
- def delete_row(row_id)
90
- rest_delete("#{link('rows')}/#{row_id}")
91
- end
92
-
225
+ # Batch uploads multiple rows to the table
226
+ #
227
+ # ==== Arguments
228
+ # * +rows+ -- an Array of Hashes, each of which represents a row of the table. Each row must contain the key <tt>"_id"</tt>, whose value must be a String containing only alphanumeric characters, underscores, and hyphens, and must be unique in the table.
229
+ # * +per_page+ -- optionally controls the number of rows to upload in each batch. Defaults to +100+.
230
+ #
231
+ # ==== Returns
232
+ # +nil+ on success.
233
+ #
234
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
235
+ def batch_upload_rows(rows, per_page=100); batch_modify_rows('put', rows, per_page); end
236
+
237
+ # Deletes a row from the table
238
+ #
239
+ # ==== Arguments
240
+ # * +row_id+ -- the unique String id of the row to delete
241
+ #
242
+ # ==== Returns
243
+ # +nil+ on success. Succeeds silently if the row does not exist in the table.
244
+ #
245
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
246
+ def delete_row(row_id); rest_delete("#{link('rows')}/#{row_id}"); nil; end
247
+
248
+ # Batch deletes a list of rows from the table
249
+ #
250
+ # ==== Arguments
251
+ # * +rows+ -- an Array of Hashes, each of which represents a row of the table. Each row must contain the key <tt>"_id"</tt>, whose value must be a String containing only alphanumeric characters, underscores, and hyphens, and must be unique in the table. Any other keys will be ignored.
252
+ # * +per_page+ -- optionally controls the number of rows to delete in each batch. Defaults to +100+.
253
+ #
254
+ # ==== Returns
255
+ # +nil+ on success.
256
+ #
257
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
93
258
  def batch_delete_rows(rows, per_page=100)
94
259
  begin
95
260
  batch_modify_rows('delete', rows, per_page)
@@ -100,20 +265,63 @@ module Veritable
100
265
  end
101
266
  end
102
267
 
103
- def analysis(analysis_id)
104
- Analysis.new(@opts, get("#{link('analyses')}/#{analysis_id}"))
105
- end
106
-
268
+ # Gets an analysis by its unique id
269
+ #
270
+ # ==== Arguments
271
+ # * +analysis_id+ -- the unique id of the analysis to retrieve
272
+ #
273
+ # ==== Returns
274
+ # A new Veritable::Analysis
275
+ #
276
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
277
+ def analysis(analysis_id); Analysis.new(@opts, get("#{link('analyses')}/#{analysis_id}")); end
278
+
279
+ # Gets a cursor for the analysis collection
280
+ #
281
+ # ==== Arguments
282
+ # * +opts+ A Hash optionally containing the keys
283
+ # - <tt>"start"</tt> -- the analysis id from which the cursor should begin returning results. Defaults to +nil+, in which case the cursor will return result starting with the lexicographically first analysis id.
284
+ # - <tt>"limit"</tt> -- the total number of results to return (must be a Fixnum). Defaults to +nil+, in which case the number of results returned will not be limited.
285
+ #
286
+ # ==== Returns
287
+ # A Veritable::Cursor. The cursor will return Veritable::Analysis objects, in lexicographic order of their unique ids.
288
+ #
289
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
107
290
  def analyses(opts={'start' => nil, 'limit' => nil})
108
291
  Cursor.new({'collection' => link('analyses'),
109
292
  'start' => opts['start'],
110
293
  'limit' => opts['limit']}.update(@opts)) {|x| Analysis.new(@opts, x)}
111
294
  end
112
295
 
113
- def delete_analysis(analysis_id)
114
- rest_delete("#{link('analyses')}/#{analysis_id}")
115
- end
116
-
296
+ # Deletes an analysis by its unique id
297
+ #
298
+ # ==== Arguments
299
+ # * +analysis_id+ -- the unique String id of the analysis to delete
300
+ #
301
+ # ==== Returns
302
+ # +nil+ on success. Succeeds silently if the analysis does not exist.
303
+ #
304
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
305
+ def delete_analysis(analysis_id); rest_delete("#{link('analyses')}/#{analysis_id}"); nil; end
306
+
307
+ # Creates a new analysis
308
+ #
309
+ # ==== Arguments
310
+ # * +schema+ -- a schema describing the analysis to perform. Must be a Veritable::Schema object or a Hash of the form:
311
+ # {'col_1': {type: 'datatype'}, 'col_2': {type: 'datatype'}, ...}
312
+ # where the specified datatype for each column is one of <tt>['real', 'boolean', 'categorical', 'count']</tt> and is valid for the column.
313
+ # * +analysis_id -- the unique String id of the new analysis. Must contain only alphanumeric characters, underscores, and dashes. Note that underscores and dashes are not permitted as the first character of an +analysis_id+. Default is +nil+, in which case a new id will be automatically generated.
314
+ # * +description+ -- a String describing the analysis. Default is <tt>''</tt>.
315
+ # * +force+ -- if true, will overwrite any existing analysis with the same id. Default is +false+.
316
+ # * +analysis_type+ -- defaults to, and must be equal to, <tt>"veritable"</tt>.
317
+ #
318
+ # ==== Raises
319
+ # A Veritable::VeritableError if +force+ is not true and there is an existing table with the same id.
320
+ #
321
+ # ==== Returns
322
+ # A Veritable::Table
323
+ #
324
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
117
325
  def create_analysis(schema, analysis_id=nil, description="", force=false, analysis_type="veritable")
118
326
  if analysis_type != "veritable"
119
327
  if analysis_type.respond_to? :to_s
@@ -145,12 +353,15 @@ module Veritable
145
353
  Analysis.new(@opts, doc)
146
354
  end
147
355
 
148
- def inspect; to_s; end
149
- def to_s; "#<Veritable::Table _id='#{_id}'>"; end
150
-
151
- def _id; @doc['_id']; end
152
- def description; @doc['description']; end
153
-
356
+ # Checks if an analysis with the given unique id exists
357
+ #
358
+ # ==== Arguments
359
+ # * +analysis_id+ --- the unique id of the table to check
360
+ #
361
+ # ==== Returns
362
+ # +true+ or +false+, as appropriate.
363
+ #
364
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
154
365
  def has_analysis?(analysis_id)
155
366
  begin
156
367
  analysis analysis_id
@@ -161,8 +372,23 @@ module Veritable
161
372
  end
162
373
  end
163
374
 
375
+ # Returns a string representation of the table resource
376
+ def inspect; to_s; end
377
+
378
+ # Returns a string representation of the table resource
379
+ def to_s; "#<Veritable::Table _id='#{_id}'>"; end
380
+
381
+ # The String unique id of the table resources
382
+ def _id; @doc['_id']; end
383
+
384
+ # The String description of the table resource
385
+ def description; @doc['description']; end
386
+
164
387
  private
165
388
 
389
+ # Abstracts the logic for batch deleting and batch retrieving rows
390
+ #
391
+ # Private method -- do not call directly. Instead, call batch_upload_rows or batch_delete_rows as appropriate.
166
392
  def batch_modify_rows(action, rows, per_page=100)
167
393
  if not per_page.is_a? Fixnum or not per_page > 0
168
394
  raise VeritableError.new("Batch upload or delete must have integer page size greater than 0.")
@@ -192,16 +418,69 @@ module Veritable
192
418
  end
193
419
  end
194
420
 
421
+ # Represents the resources associated with a single analysis
422
+ #
423
+ # ==== Attributes
424
+ # * +_id+ -- the unique String id of the analysis
425
+ # * +description+ -- the String description of the analysis
426
+ # * +created_at+ -- a String timestamp recording the time the analysis was created
427
+ # * +finished_at+ -- a String timestamp recording the time the analysis completd
428
+ # * +state+ -- the state of the analysis, one of <tt>["running", "succeeded", "failed"]</tt>
429
+ # * +running?+ -- +true+ if +state+ is <tt>"running"</tt>
430
+ # * +succeeded?+ -- ++true+ if +state+ is <tt>"succeeded"</tt>
431
+ # * +failed?+ -- +true+ if +state+ is <tt>"failed"</tt>
432
+ # * +error+ -- a Hash containing details of the error that occurred, if +state+ is <tt>"failed"</tt>, otherwise +nil+
433
+ # * +progress+ -- a Hash containing details of the analysis progress, if +state+ is <tt>"running"</tt>, otherwise +nil+
434
+ # * +schema+ -- a Veritable::Schema describing the columns included in the analysis
435
+ #
436
+ # ==== Methods
437
+ # * +update+ -- refreshes the local representation of the API resource
438
+ # * +delete+ -- deletes the associated API resource
439
+ # * +wait+ -- blocks until the analysis succeeds or fails
440
+ # * +predict+ -- makes new predictions based on the analysis
441
+ # * +related_to+ -- calculates column relatedness based on the analysis
442
+ #
443
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
195
444
  class Analysis
196
445
  include VeritableResource
197
446
 
198
- def update; @doc = get(link('self')); end
447
+ # Refreshes the local representation of the analysis
448
+ #
449
+ # ==== Returns
450
+ # +nil+ on success
451
+ #
452
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
453
+ def update; @doc = get(link('self')); nil; end
199
454
 
455
+ # Alias the connection's delete method as rest_delete
200
456
  alias :rest_delete :delete
457
+
458
+ # Deletes the associated analysis resource
459
+ #
460
+ # ==== Returns
461
+ # +nil+ on success. Succeeds silently if the analysis has already been deleted.
462
+ #
463
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
201
464
  def delete; rest_delete(link('self')); end
202
465
 
466
+ # The schema describing the analysis
467
+ #
468
+ # ==== Returns
469
+ # A new Veritable::Schema object describing the colums contained in the analysis.
470
+ #
471
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
203
472
  def schema; Schema.new(get(link('schema'))); end
204
473
 
474
+ # Blocks until the analysis succeeds or fails
475
+ #
476
+ # ==== Arguments
477
+ # * +max_time+ -- the maximum time to wait, in seconds. Default is +nil+, in which case the method will wait indefinitely.
478
+ # * +poll+ -- the number of seconds to wait between polling the API server. Default is +2+.
479
+ #
480
+ # ==== Returns
481
+ # +nil+ on success.
482
+ #
483
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
205
484
  def wait(max_time=nil, poll=2)
206
485
  elapsed = 0
207
486
  while running?
@@ -216,6 +495,16 @@ module Veritable
216
495
  end
217
496
  end
218
497
 
498
+ # Makes predictions based on the analysis
499
+ #
500
+ # ==== Arguments
501
+ # * +row+ -- a Hash representing the row whose missing values are to be predicted. Keys must be valid String ids of columns contained in the underlying table, and values must be either fixed (conditioning) values of an appropriate type for each column, or +nil+ for values to be predicted.
502
+ # * +count+ -- optionally specify the number of samples from the predictive distribution to return. Defaults to +100+.
503
+ #
504
+ # ==== Returns
505
+ # A Veritable::Prediction object
506
+ #
507
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
219
508
  def predict(row, count=100)
220
509
  update if running?
221
510
  if succeeded?
@@ -242,6 +531,17 @@ module Veritable
242
531
  end
243
532
  end
244
533
 
534
+ # Scores how related columns are to a column of interest
535
+ #
536
+ # ==== Arguments
537
+ # * +column_id+ -- the id of the column of interest
538
+ # * +start+ -- the column id from which to start the cursor. Columns with related scores greater than or equal to the score of column +start+ will be returned by the cursor. Default is +nil+, in which case all columns in the table will be returned by the cursor.
539
+ # * +limit+ -- optionally limits the number of columns returned by the cursor. Default is +nil+, in which case the number of columns returned will not be limited.
540
+ #
541
+ # ==== Returns
542
+ # A Veritable::Cursor. The cursor will return column ids, in order of their relatedness to the column of interest.
543
+ #
544
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
245
545
  def related_to(column_id, opts={'start' => nil, 'limit' => nil})
246
546
  update if running?
247
547
  if succeeded?
@@ -258,21 +558,68 @@ module Veritable
258
558
  end
259
559
  end
260
560
 
561
+ # Returns a string representation of the analysis resource
261
562
  def inspect; to_s; end
563
+
564
+ # Returns a string representation of the analysis resource
262
565
  def to_s; "#<Veritable::Analysis _id='#{_id}'>"; end
263
566
 
567
+ # The unique String id of the analysis
264
568
  def _id; @doc['_id']; end
569
+
570
+ # String timestamp recording the time the analysis was created
265
571
  def created_at; @doc['created_at']; end
572
+
573
+ # String timestamp recording the time the analysis completed
266
574
  def finished_at; @doc['finished_at']; end
575
+
576
+ # The state of the analysis
577
+ #
578
+ # One of <tt>["running", "succeeded", "failed"]</tt>
267
579
  def state; @doc['state']; end
580
+
581
+ # +true+ if +state+ is <tt>"running"</tt>, otherwise +false+
268
582
  def running?; state == 'running'; end
583
+
584
+ # +true+ if +state+ is <tt>"succeeded"</tt>, otherwise +false+
269
585
  def succeeded?; state == 'succeeded'; end
586
+
587
+ # +true+ if +state+ is <tt>"failed"</tt>, otherwise +false+
270
588
  def failed?; state == 'failed'; end
589
+
590
+ # A Hash containing details of the error if +state+ is <tt>"failed"</tt>, otherwise +nil+
271
591
  def error; state == 'failed' ? @doc['error'] : nil; end
592
+
593
+ # A Hash containing details of the analysis progress if +state+ is <tt>"running"</tt>, otherwise +nil+
272
594
  def progress; state == 'running' ? @doc['progress'] : nil; end
595
+
596
+ # The String description of the analysis
597
+ def description; @doc['description']; end
273
598
  end
274
599
 
600
+ # Represents a schema for a Veritable analysis
601
+ #
602
+ # A Veritable::Schema is a Hash with some additional convenience methods. Schema objects can be used interchangeably with Hashes of the same structure throughout veritable-ruby.
603
+ #
604
+ # ==== Methods
605
+ # +type+ -- gets the datatype for a given column
606
+ # +validate+ -- checks that the schema is well-formed
607
+ #
608
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
275
609
  class Schema < Hash
610
+
611
+ # Initalizes a new Schema from a Hash
612
+ #
613
+ # ==== Arguments
614
+ # * +data+ -- the data for the schema as a Hash with the form:
615
+ # {'col_1': {type: 'datatype'}, 'col_2': {type: 'datatype'}, ...}
616
+ # where the datatype must be one of <tt>["real", "categorical", "count", "boolean"]</tt>
617
+ # * +subset+ -- a Hash or Array whose keys will be used to limit the columns present in the Schema created from the input +data+
618
+ #
619
+ # ==== Returns
620
+ # A new Veritable::Schema
621
+ #
622
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
276
623
  def initialize(data, subset=nil)
277
624
  begin
278
625
  data.each {|k, v|
@@ -295,10 +642,25 @@ module Veritable
295
642
  end
296
643
  end
297
644
 
298
- def type(column)
299
- self[column]['type']
300
- end
301
-
645
+ # Convenience accessor for the type of a Schema column
646
+ #
647
+ # Running <tt>schema.type(column)</tt> is sugar for <tt>schema[column]['type']</tt>
648
+ #
649
+ # ==== Arguments
650
+ # +column+ -- the id of the column whose type we are retrieving
651
+ #
652
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
653
+ def type(column); self[column]['type']; end
654
+
655
+ # Validates the schema, checking that it is well-formed
656
+ #
657
+ # ==== Raises
658
+ # A Veritable::VeritableError if any column ids or types are invalid.
659
+ #
660
+ # ==== Returns
661
+ # +nil+ on success
662
+ #
663
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
302
664
  def validate
303
665
  self.each {|k, v|
304
666
  if not k.is_a? String
@@ -322,15 +684,60 @@ module Veritable
322
684
  raise VeritableError.new("Validate schema -- Invalid schema specification. Column #{k}, type #{v['type']} is not valid. Type must be one of #{DATATYPES}")
323
685
  end
324
686
  }
687
+ nil
325
688
  end
326
689
  end
327
690
 
691
+ # Represents the result of a Veritable prediction
692
+ #
693
+ # A Veritable::Prediction is a Hash whose keys are the columns in the prediction request, and whose values are standard point estimates for predicted columns. For fixed (conditioning) columns, the value is the fixed value. For predicted values, the point estimate varies by datatype:
694
+ # * real -- mean
695
+ # * count -- mean rounded to the nearest integer
696
+ # * categorical -- mode
697
+ # * boolean -- mode
698
+ # The object also gives access to the original predictions request, the predicted distribution on missing values, the schema of the analysis used to make predictions, and standard measures of uncertainty for the predicted values.
699
+ #
700
+ # ==== Attributes
701
+ # * +request+ -- a Hash containing the original predictions request. Keys are column names; conditioning values are present, predicted values are +nil+.
702
+ # * +distribution+ -- the underlying predicted distribution as an Array of Hashes, each of which represents a single sample from the predictive distribution.
703
+ # * +schema+ -- the schema for the columns in the predictions request
704
+ # * +uncertainty+ -- a Hash containing measures of uncertainty for each predicted value.
705
+ #
706
+ # ==== Methods
707
+ # * +prob_within+ -- calculates the probability a column's value lies within a range
708
+ # * +credible_values+ -- calculates a credible range for the value of a column
709
+ #
710
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
328
711
  class Prediction < Hash
712
+ # The original predictions request, as a Hash
329
713
  attr_reader :request
714
+
715
+ # The underlying predicted distribution, as an Array of Hashes
716
+ #
717
+ # Each Hash represents a single draw from the predictive distribution, and should be regarded as equiprobable with the others.
718
+ #
719
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
330
720
  attr_reader :distribution
721
+
722
+ # The schema for the columns in the predictions request
331
723
  attr_reader :schema
724
+
725
+ # A Hash of standard uncertainty measures
726
+ #
727
+ # Keys are the columns in the prediction request and values are uncertainty measures associated with each point estimate. A higher value indicates greater uncertainty. These measures vary by datatype:
728
+ # * real -- length of 90% credible interval
729
+ # * count -- length of 90% credible interval
730
+ # * categorical -- total probability of all non-modal values
731
+ # * boolean -- probability of the non-modal value
732
+ #
733
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
332
734
  attr_reader :uncertainty
333
735
 
736
+ # Initializes a Veritable::Prediction
737
+ #
738
+ # Users should not call directly. Instead, call Veritable::Analysis#predict.
739
+ #
740
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
334
741
  def initialize(request, distribution, schema)
335
742
  @request = request
336
743
  @distribution = distribution
@@ -348,6 +755,18 @@ module Veritable
348
755
  }
349
756
  end
350
757
 
758
+ # Calculates the probability a column's value lies within a range.
759
+ #
760
+ # Based on the underlying predicted distribution, calculates the marginal probability that the predicted value for the given columns lies within the specified range.
761
+ #
762
+ # ==== Arguments
763
+ # column -- the column for which to calculate probabilities
764
+ # range -- a representation of the range for which to calculate probabilities. For real and count columns, this is an Array of <tt>[start, end]</tt> representing a closed interval. For boolean and categorical columns, this is an Array of discrete values.
765
+ #
766
+ # ==== Returns
767
+ # A probability as a Float
768
+ #
769
+ # See also: https://dev.priorknowledge.com/docs/client/python
351
770
  def prob_within(column, range)
352
771
  col_type = schema.type column
353
772
  Veritable::Util.check_datatype(col_type, "Probability within -- ")
@@ -375,6 +794,18 @@ module Veritable
375
794
  end
376
795
  end
377
796
 
797
+ # Calculates a credible range for the value of a column.
798
+
799
+ # Based on the underlying predicted distribution, calculates a range within which the predicted value for the column lies with the specified probability.
800
+ #
801
+ # ==== Arguments
802
+ # * +column+ -- the column for which to calculate the range
803
+ # * +p+ -- The desired degree of probability. Default is +nil+, in which case will default to 0.5 for boolean and categorical columns, and to 0.9 for count and real columns.
804
+ #
805
+ # ==== Returns
806
+ # For boolean and categorical columns, a Hash whose keys are categorical values in the calculated range and whose values are probabilities; for real and count columns, an Array of the <tt>[min, max]</tt> values for the calculated range.
807
+ #
808
+ # See also: https://dev.priorknowledge.com/docs/client/ruby
378
809
  def credible_values(column, p=nil)
379
810
  col_type = schema.type column
380
811
  Veritable::Util.check_datatype(col_type, "Credible values -- ")
@@ -395,16 +826,21 @@ module Veritable
395
826
  end
396
827
  end
397
828
 
829
+ # Returns a string representation of the prediction results
398
830
  def inspect; to_s; end
831
+
832
+ # Returns a string representation of the prediction results
399
833
  def to_s; "<Veritable::Prediction #{super}>"; end
400
834
 
401
835
  private
402
836
 
837
+ # Private method: sorts the values for a column
403
838
  def sorted_values(column)
404
839
  values = (distribution.collect {|row| row[column]}).reject {|x| x.nil?}
405
840
  values.sort
406
841
  end
407
842
 
843
+ # Private method: calculates counts for a column
408
844
  def counts(column)
409
845
  cts = Hash.new
410
846
  distribution.each {|row|
@@ -419,6 +855,7 @@ module Veritable
419
855
  cts
420
856
  end
421
857
 
858
+ # Private method: calculates frequencies for a column
422
859
  def freqs(cts)
423
860
  total = cts.values.inject(0) {|memo, obj| memo + obj}
424
861
  freqs = Hash.new()
@@ -428,6 +865,7 @@ module Veritable
428
865
  freqs
429
866
  end
430
867
 
868
+ # Private method: calculates point estimates for a column
431
869
  def point_estimate(column)
432
870
  col_type = schema.type column
433
871
  Veritable::Util.check_datatype(col_type, "Point estimate -- ")
@@ -442,6 +880,7 @@ module Veritable
442
880
  end
443
881
  end
444
882
 
883
+ # Private method: calculates uncertainties for a column
445
884
  def calculate_uncertainty(column)
446
885
  values = distribution.collect {|row| row[column]}
447
886
  col_type = schema.type column