deriva 1.7.9__py3-none-any.whl → 1.7.11__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,859 +0,0 @@
1
- # Tests for the datapath module.
2
- #
3
- # Environment variables:
4
- # DERIVA_PY_TEST_HOSTNAME: hostname of the test server
5
- # DERIVA_PY_TEST_CREDENTIAL: user credential, if none, it will attempt to get credentail for given hostname
6
- # DERIVA_PY_TEST_VERBOSE: set for verbose logging output to stdout
7
-
8
- from copy import deepcopy
9
- import logging
10
- from operator import itemgetter
11
- import os
12
- import unittest
13
- import sys
14
- from deriva.core import DerivaServer, get_credential, ermrest_model as _em, __version__
15
- from deriva.core.datapath import DataPathException, Min, Max, Sum, Avg, Cnt, CntD, Array, ArrayD, Bin, \
16
- simple_denormalization_with_whole_entities
17
-
18
- try:
19
- from pandas import DataFrame
20
- HAS_PANDAS = True
21
- except ImportError:
22
- HAS_PANDAS = False
23
-
24
- TEST_EXP_MAX = 100
25
- TEST_EXPTYPE_MAX = 10
26
- TEST_EXP_NAME_FORMAT = "experiment-{}"
27
- TEST_PROJ_MAX = 1
28
- TEST_PROJ_INVESTIGATOR = "Smith"
29
- TEST_PROJ_NUM = 1
30
-
31
- SPECIAL_CHARACTERS = '`~!@#$%^&*()_+-={}|[]\\;:"\',./<>?'
32
- INVALID_IDENTIFIER, INVALID_IDENTIFIER_FIXED = '9 %$ ', '_9____'
33
- RESERVED_IDENTIFIER = 'column_definitions'
34
- CONFLICTING_IDENTIFIER, CONFLICTING_IDENTIFIER_FIXED = RESERVED_IDENTIFIER + '1', RESERVED_IDENTIFIER + '2'
35
-
36
- SNAME_ISA = 'ISA'
37
- SNAME_VOCAB = 'Vocab'
38
- TNAME_PROJECT = 'Project'
39
- TNAME_EXPERIMENT = 'Experiment'
40
- TNAME_EXPERIMENT_TYPE = 'Experiment_Type'
41
- TNAME_EXPERIMENT_COPY = 'Experiment_Copy'
42
-
43
- hostname = os.getenv("DERIVA_PY_TEST_HOSTNAME")
44
- logger = logging.getLogger(__name__)
45
- if os.getenv("DERIVA_PY_TEST_VERBOSE"):
46
- logger.setLevel(logging.DEBUG)
47
- logger.addHandler(logging.StreamHandler())
48
-
49
-
50
- def define_test_schema(catalog):
51
- """Defines the test schema.
52
-
53
- A 'vocab' schema with an 'experiment_type' term table.
54
- An 'isa' schema with an 'experiment' table, with 'type' that references the vocab table.
55
- """
56
- model = catalog.getCatalogModel()
57
- vocab = model.create_schema(_em.Schema.define(SNAME_VOCAB))
58
- vocab.create_table(_em.Table.define_vocabulary(TNAME_EXPERIMENT_TYPE, "TEST:{RID}"))
59
- isa = model.create_schema(_em.Schema.define(SNAME_ISA))
60
-
61
- # create TNAME_PROJECT table
62
- table_def = _em.Table.define(
63
- TNAME_PROJECT,
64
- column_defs=[
65
- _em.Column.define(cname, ctype) for (cname, ctype) in [
66
- ('Investigator', _em.builtin_types.text),
67
- ('Num', _em.builtin_types.int4),
68
- (INVALID_IDENTIFIER, _em.builtin_types.int4),
69
- (RESERVED_IDENTIFIER, _em.builtin_types.text),
70
- (RESERVED_IDENTIFIER + '1', _em.builtin_types.text)
71
- ]
72
- ],
73
- key_defs=[
74
- _em.Key.define(['Investigator', 'Num'])
75
- ]
76
- )
77
- isa.create_table(table_def)
78
-
79
- # experiment table definition helper
80
- def exp_table_def(exp_table_name):
81
- return _em.Table.define(
82
- exp_table_name,
83
- column_defs=[
84
- _em.Column.define(cname, ctype) for (cname, ctype) in [
85
- ('Name', _em.builtin_types.text),
86
- ('Amount', _em.builtin_types.int4),
87
- ('Time', _em.builtin_types.timestamptz),
88
- ('Type', _em.builtin_types.text),
89
- ('Project Investigator', _em.builtin_types.text),
90
- ('Project_Num', _em.builtin_types.int4),
91
- ('Empty', _em.builtin_types.int4)
92
- ]
93
- ],
94
- key_defs=[
95
- _em.Key.define(['Name'])
96
- ],
97
- fkey_defs=[
98
- _em.ForeignKey.define(['Type'], SNAME_VOCAB, TNAME_EXPERIMENT_TYPE, ['ID']),
99
- _em.ForeignKey.define(['Project Investigator', 'Project_Num'], SNAME_ISA, TNAME_PROJECT, ['Investigator', 'Num'])
100
- ]
101
- )
102
-
103
- # create experiment tables
104
- isa.create_table(exp_table_def(TNAME_EXPERIMENT))
105
- isa.create_table(exp_table_def(TNAME_EXPERIMENT_COPY))
106
-
107
-
108
- def _generate_experiment_entities(types, count):
109
- """Generates experiment entities (content only)
110
-
111
- :param types: type entities to be referenced from entities
112
- :param count: number of entities to return
113
- :return: a list of dict objects (experiment entities)
114
- """
115
- return [
116
- {
117
- "Name": TEST_EXP_NAME_FORMAT.format(i),
118
- "Amount": i,
119
- "Time": "2018-01-{}T01:00:00.0".format(1 + (i % 31)),
120
- "Type": types[i % TEST_EXPTYPE_MAX]['ID'],
121
- "Project Investigator": TEST_PROJ_INVESTIGATOR,
122
- "Project_Num": TEST_PROJ_NUM,
123
- "Empty": None
124
- }
125
- for i in range(count)
126
- ]
127
-
128
-
129
- def populate_test_catalog(catalog):
130
- """Populate the test catalog."""
131
- paths = catalog.getPathBuilder()
132
- logger.debug("Inserting project...")
133
- proj_table = paths.schemas[SNAME_ISA].tables[TNAME_PROJECT]
134
- logger.debug("Inserting investigators...")
135
- proj_table.insert([
136
- {"Investigator": TEST_PROJ_INVESTIGATOR, "Num": TEST_PROJ_NUM}
137
- ])
138
- logger.debug("Inserting experiment types...")
139
- type_table = paths.schemas[SNAME_VOCAB].tables[TNAME_EXPERIMENT_TYPE]
140
- types = type_table.insert([
141
- {"Name": "{}".format(name), "Description": "NA"} for name in range(TEST_EXPTYPE_MAX)
142
- ], defaults=['ID', 'URI'])
143
- logger.debug("Inserting experiments...")
144
- exp = paths.schemas[SNAME_ISA].tables[TNAME_EXPERIMENT]
145
- exp.insert(_generate_experiment_entities(types, TEST_EXP_MAX))
146
-
147
-
148
- @unittest.skipUnless(hostname, "Test host not specified")
149
- class DatapathTests (unittest.TestCase):
150
- catalog = None
151
-
152
- @classmethod
153
- def setUpClass(cls):
154
- logger.debug("setupUpClass begin")
155
- credential = os.getenv("DERIVA_PY_TEST_CREDENTIAL") or get_credential(hostname)
156
- server = DerivaServer('https', hostname, credentials=credential)
157
- cls.catalog = server.create_ermrest_catalog()
158
- try:
159
- define_test_schema(cls.catalog)
160
- populate_test_catalog(cls.catalog)
161
- except Exception:
162
- # on failure, delete catalog and re-raise exception
163
- cls.catalog.delete_ermrest_catalog(really=True)
164
- raise
165
- logger.debug("setupUpClass done")
166
-
167
- @classmethod
168
- def tearDownClass(cls):
169
- logger.debug("tearDownClass begin")
170
- cls.catalog.delete_ermrest_catalog(really=True)
171
- logger.debug("tearDownClass done")
172
-
173
- def setUp(self):
174
- self.paths = self.catalog.getPathBuilder()
175
- self.project = self.paths.schemas[SNAME_ISA].tables[TNAME_PROJECT]
176
- self.experiment = self.paths.schemas[SNAME_ISA].tables[TNAME_EXPERIMENT]
177
- self.experiment_type = self.paths.schemas[SNAME_VOCAB].tables[TNAME_EXPERIMENT_TYPE]
178
- self.experiment_copy = self.paths.schemas[SNAME_ISA].tables[TNAME_EXPERIMENT_COPY]
179
- self.types = list(self.experiment_type.entities())
180
- self.model = self.catalog.getCatalogModel()
181
-
182
- def tearDown(self):
183
- try:
184
- self.experiment_copy.path.delete()
185
- except DataPathException:
186
- # suppresses 404 errors when the table is empty
187
- pass
188
-
189
- def test_catalog_dir_base(self):
190
- self.assertIn('schemas', dir(self.paths))
191
-
192
- def test_schema_dir_base(self):
193
- self.assertLess({'_name', 'tables', 'describe'}, set(dir(self.paths.schemas[SNAME_ISA])))
194
-
195
- def test_datapath_dir_base(self):
196
- self.assertLess({'aggregates', 'groupby', 'attributes', 'context', 'delete', 'entities', 'filter',
197
- 'link', 'table_instances', 'uri'}, set(dir(self.paths.schemas[SNAME_ISA].tables[TNAME_EXPERIMENT].path)))
198
-
199
- def test_table_dir_base(self):
200
- self.assertLess({'aggregates', 'alias', 'groupby', 'attributes', 'describe', 'entities', 'filter', 'insert',
201
- 'link', 'path', 'update', 'uri'}, set(dir(self.paths.schemas[SNAME_ISA].tables[TNAME_EXPERIMENT])))
202
-
203
- def test_catalog_dir_with_schemas(self):
204
- self.assertLess({SNAME_ISA, SNAME_VOCAB}, set(dir(self.paths)))
205
-
206
- def test_schema_dir_with_tables(self):
207
- self.assertIn(TNAME_EXPERIMENT, dir(self.paths.ISA))
208
-
209
- def test_table_dir_with_columns(self):
210
- self.assertLess({'Name', 'Amount', 'Time', 'Type'}, set(dir(self.paths.ISA.Experiment)))
211
-
212
- def test_dir_path(self):
213
- self.assertIn(TNAME_EXPERIMENT, dir(self.paths.ISA.Experiment.path))
214
-
215
- def test_dir_invalid_identifier(self):
216
- self.assertIn(INVALID_IDENTIFIER_FIXED, dir(self.project))
217
- self.assertIsNotNone(getattr(self.project, INVALID_IDENTIFIER_FIXED))
218
-
219
- def test_dir_conflicting_identifier(self):
220
- self.assertIn(CONFLICTING_IDENTIFIER_FIXED, dir(self.project))
221
- self.assertIsNotNone(getattr(self.project, CONFLICTING_IDENTIFIER))
222
- self.assertIsNotNone(getattr(self.project, CONFLICTING_IDENTIFIER_FIXED))
223
-
224
- def test_describe_schema(self):
225
- with self.assertWarns(DeprecationWarning):
226
- self.paths.schemas[SNAME_ISA].describe()
227
-
228
- def test_describe_table(self):
229
- with self.assertWarns(DeprecationWarning):
230
- self.paths.schemas[SNAME_ISA].tables[TNAME_EXPERIMENT].describe()
231
-
232
- def test_describe_column(self):
233
- with self.assertWarns(DeprecationWarning):
234
- self.paths.schemas[SNAME_ISA].tables[TNAME_EXPERIMENT].column_definitions['Name'].describe()
235
-
236
- def test_unfiltered_fetch(self):
237
- results = self.experiment.entities()
238
- self.assertEqual(len(results), TEST_EXP_MAX)
239
-
240
- def test_fetch_with_headers(self):
241
- headers = {'User-Agent': __name__ + '/' + __version__}
242
- results = self.experiment.entities().fetch(headers=headers)
243
- self.assertEqual(len(results), TEST_EXP_MAX)
244
-
245
- def test_fetch_with_limit(self):
246
- results = self.experiment.entities()
247
- limit = TEST_EXP_MAX / 5
248
- results.fetch(limit=limit)
249
- self.assertEqual(len(results), limit)
250
-
251
- def test_fetch_with_sort(self):
252
- results = self.experiment.entities()
253
- results.sort(self.experiment.column_definitions['Amount'])
254
- self.assertEqual(results[0]['Amount'], 0)
255
-
256
- def test_fetch_attributes_with_sort(self):
257
- results = self.experiment.attributes(self.experiment.RID, self.experiment.Amount)
258
- results.sort(self.experiment.Amount)
259
- self.assertEqual(results[0]['Amount'], 0)
260
-
261
- def test_fetch_all_attributes_with_sort(self):
262
- results = self.experiment.attributes(self.experiment)
263
- results.sort(self.experiment.Amount)
264
- self.assertEqual(results[0]['Amount'], 0)
265
-
266
- def test_fetch_all_attributes_with_sort_desc(self):
267
- results = self.experiment.attributes(self.experiment)
268
- results.sort(self.experiment.Amount.desc)
269
- self.assertEqual(results[0]['Amount'], TEST_EXP_MAX-1)
270
-
271
- def test_fetch_from_path_attributes_with_sort_on_talias(self):
272
- path = self.experiment.path
273
- results = path.Experiment.attributes(path.Experiment.RID, path.Experiment.Amount)
274
- results.sort(path.Experiment.Amount)
275
- self.assertEqual(results[0]['Amount'], 0)
276
-
277
- def test_fetch_from_path_attributes_with_sort_on_talias_desc(self):
278
- path = self.experiment.path
279
- results = path.Experiment.attributes(path.Experiment.RID, path.Experiment.Amount)
280
- results.sort(path.Experiment.Amount.desc)
281
- self.assertEqual(results[0]['Amount'], TEST_EXP_MAX-1)
282
-
283
- def test_fetch_from_path_all_attributes_with_sort_on_talias(self):
284
- path = self.experiment.path
285
- results = path.Experiment.attributes(*path.Experiment.column_definitions.values())
286
- results.sort(path.Experiment.Amount)
287
- self.assertEqual(results[0]['Amount'], 0)
288
-
289
- def test_fetch_from_path_all_attributes_with_sort_on_alias_desc(self):
290
- path = self.experiment.path
291
- results = path.Experiment.attributes(*path.Experiment.column_definitions.values())
292
- results.sort(path.Experiment.Amount.desc)
293
- self.assertEqual(results[0]['Amount'], TEST_EXP_MAX-1)
294
-
295
- def test_fetch_all_cols_with_talias(self):
296
- path = self.paths.schemas[SNAME_ISA].tables[TNAME_EXPERIMENT].alias('X').path
297
- results = path.attributes(path.X)
298
- result = results.fetch(limit=1)[0]
299
- self.assertIn('X:RID', result)
300
- self.assertIn('X:Name', result)
301
-
302
- def test_fetch_with_talias(self):
303
- path = self.paths.schemas[SNAME_ISA].tables[TNAME_EXPERIMENT].alias('X').path
304
- results = path.attributes(path.X.RID, path.X.Name.alias('typeName'))
305
- result = results.fetch(limit=1)[0]
306
- self.assertIn('RID', result)
307
- self.assertIn('typeName', result)
308
-
309
- def test_attribute_projection(self):
310
- results = self.experiment.attributes(
311
- self.experiment.column_definitions['Name'],
312
- self.experiment.column_definitions['Amount']
313
- )
314
- result = results.fetch(limit=1)[0]
315
- self.assertIn('Name', result)
316
- self.assertIn('Amount', result)
317
-
318
- def test_attribute_err_table_attr(self):
319
- table_attr = ['_name', '_schema']
320
- for attr in table_attr:
321
- with self.assertRaises(TypeError):
322
- self.experiment.attributes(getattr(self.experiment, attr))
323
-
324
- def test_update_err_no_targets(self):
325
- entities = [{'RID': 1234}]
326
- with self.assertRaises(ValueError):
327
- self.experiment.update(entities)
328
-
329
- def test_aggregate_w_invalid_attributes(self):
330
- with self.assertRaises(TypeError):
331
- self.experiment.aggregates(Min(self.experiment.column_definitions['Amount']))
332
-
333
- def test_aggregate_w_invalid_renames(self):
334
- with self.assertRaises(TypeError):
335
- self.experiment.aggregates(
336
- self.experiment.column_definitions['Name'],
337
- Min(self.experiment.column_definitions['Amount'])
338
- )
339
-
340
- def test_aggregate_fns(self):
341
- tests = [
342
- ('min_amount', Min, 0),
343
- ('max_amount', Max, TEST_EXP_MAX-1),
344
- ('sum_amount', Sum, sum(range(TEST_EXP_MAX))),
345
- ('avg_amount', Avg, sum(range(TEST_EXP_MAX))/TEST_EXP_MAX),
346
- ('cnt_amount', Cnt, TEST_EXP_MAX),
347
- ('cnt_d_amount', CntD, TEST_EXP_MAX),
348
- ('array_amount', Array, list(range(TEST_EXP_MAX))),
349
- ('array_d_amount', ArrayD, list(range(TEST_EXP_MAX)))
350
- ]
351
- for name, Fn, value in tests:
352
- with self.subTest(name=name):
353
- # results = self.experiment.aggregates(**{name: Fn(self.experiment.column_definitions['Amount'])})
354
- results = self.experiment.aggregates(Fn(self.experiment.column_definitions['Amount']).alias(name))
355
- result = results.fetch()[0]
356
- self.assertIn(name, result)
357
- self.assertEqual(result[name], value)
358
-
359
- def test_aggregate_w_2_fns(self):
360
- results = self.experiment.aggregates(
361
- Min(self.experiment.column_definitions['Amount']).alias('min_amount'),
362
- Max(self.experiment.column_definitions['Amount']).alias('max_amount')
363
- )
364
- result = results.fetch()[0]
365
- self.assertIn('min_amount', result)
366
- self.assertEqual(result['min_amount'], 0)
367
- self.assertIn('max_amount', result)
368
- self.assertEqual(result['max_amount'], TEST_EXP_MAX-1)
369
-
370
- def test_aggregate_fns_array_star(self):
371
- path = self.experiment.path
372
- tests = [
373
- ('array_table_star', Array, self.experiment, self.experiment),
374
- ('array_alias_star', Array, path, path.Experiment),
375
- ('arrayd_table_star', ArrayD, self.experiment, self.experiment),
376
- ('arrayd_alias_star', ArrayD, path, path.Experiment)
377
- ]
378
- for name, Fn, path, instance in tests:
379
- results = path.aggregates(Fn(instance).alias('arr'))
380
- with self.subTest(name=name):
381
- result = results.fetch()[0]
382
- self.assertIn('arr', result)
383
- self.assertEqual(len(result['arr']), TEST_EXP_MAX)
384
- self.assertIn('Time', result['arr'][0])
385
-
386
- def test_aggregate_fns_cnt_star(self):
387
- path = self.experiment.path
388
- tests = [
389
- ('cnt_table_star', Cnt, self.experiment, self.experiment),
390
- ('cnt_alias_star', Cnt, path, path.Experiment)
391
- ]
392
- for name, Fn, path, instance in tests:
393
- results = path.aggregates(Fn(instance).alias('cnt'))
394
- with self.subTest(name=name):
395
- result = results.fetch()[0]
396
- self.assertIn('cnt', result)
397
- self.assertEqual(result['cnt'], TEST_EXP_MAX)
398
-
399
- def test_attributegroup_fns(self):
400
- tests = [
401
- ('one group key', [self.experiment.column_definitions['Type']]),
402
- ('two group keys', [self.experiment.column_definitions['Project_Num'], self.experiment.column_definitions['Type']]),
403
- ('aliased group key', [self.experiment.column_definitions['Type'].alias('The Type')])
404
- ]
405
- for test_name, group_key in tests:
406
- with self.subTest(name=test_name):
407
- self._do_attributegroup_fn_subtests(group_key)
408
-
409
- def _do_attributegroup_fn_subtests(self, group_key):
410
- """Helper method for running common attributegroup subtests for different group keys."""
411
- tests = [
412
- ('min_amount', Min, 0),
413
- ('max_amount', Max, TEST_EXP_MAX-TEST_EXPTYPE_MAX),
414
- ('sum_amount', Sum, sum(range(0, TEST_EXP_MAX, TEST_EXPTYPE_MAX))),
415
- ('avg_amount', Avg, sum(range(0, TEST_EXP_MAX, TEST_EXPTYPE_MAX))/TEST_EXPTYPE_MAX),
416
- ('cnt_amount', Cnt, TEST_EXPTYPE_MAX),
417
- ('cnt_d_amount', CntD, TEST_EXPTYPE_MAX),
418
- ('array_amount', Array, list(range(0, TEST_EXP_MAX, TEST_EXPTYPE_MAX))),
419
- ('array_d_amount', ArrayD, list(range(0, TEST_EXP_MAX, TEST_EXPTYPE_MAX)))
420
- ]
421
- for name, Fn, value in tests:
422
- with self.subTest(name=name):
423
- results = self.experiment.groupby(*group_key).attributes(
424
- Fn(self.experiment.column_definitions['Amount']).alias(name)).sort(*group_key)
425
-
426
- result = results[0]
427
- self.assertEqual(len(results), TEST_EXPTYPE_MAX)
428
- self.assertTrue(all(key._name in result for key in group_key))
429
- self.assertIn(name, result)
430
- self.assertEqual(result[name], value)
431
-
432
- def test_attributegroup_w_bin(self):
433
- tests = [
434
- ('min/max given', 0, TEST_EXP_MAX),
435
- ('min/max not given', None, None),
436
- ('min only given', 0, None),
437
- ('max only given', None, TEST_EXP_MAX)
438
- ]
439
- for testname, minval, maxval in tests:
440
- with self.subTest(name=testname):
441
- self._do_bin_subtests(minval, maxval)
442
-
443
- def _do_bin_subtests(self, minval, maxval):
444
- """Helper method for running common binning tests with & without min/max values."""
445
- new_name, bin_name = 'TheProj', 'ABin'
446
- nbins = int(TEST_EXP_MAX/20)
447
- group_key = [
448
- self.experiment.column_definitions['Project_Num'].alias(new_name),
449
- Bin(self.experiment.column_definitions['Amount'], nbins, minval=minval, maxval=maxval).alias(bin_name)
450
- ]
451
- tests = [
452
- ('min_amount', Min, lambda a, b: a >= b[1]),
453
- ('max_amount', Max, lambda a, b: a <= b[2]),
454
- ('sum_amount', Sum, lambda a, b: a >= b[1] + b[2]),
455
- ('avg_amount', Avg, lambda a, b: b[1] <= a <= b[2]),
456
- ('cnt_amount', Cnt, lambda a, b: a == TEST_EXP_MAX/nbins),
457
- ('cnt_d_amount', CntD, lambda a, b: a == TEST_EXP_MAX/nbins),
458
- ('array_amount', Array, lambda a, b: all(b[1] <= a_i <= b[2] for a_i in a)),
459
- ('array_d_amount', ArrayD, lambda a, b: all(b[1] <= a_i <= b[2] for a_i in a))
460
- ]
461
- for name, Fn, compare in tests:
462
- with self.subTest(name=name):
463
- results = self.experiment.groupby(*group_key).attributes(
464
- Fn(self.experiment.column_definitions['Amount']).alias(name)).fetch()
465
-
466
- self.assertTrue(all(key._name in results[0] for key in group_key))
467
- self.assertIn(name, results[0])
468
- for result in results:
469
- bin = result[bin_name]
470
- if not maxval and (bin[0] >= nbins):
471
- # skip the last 2 bins when maxval was resolved; those bins are not aligned like the others
472
- continue
473
- self.assertTrue(compare(result[name], bin))
474
-
475
- def test_attributegroup_w_bin_sort(self):
476
- bin_name = 'bin'
477
- nbins = int(TEST_EXP_MAX/20)
478
- bin = Bin(self.experiment.column_definitions['Amount'], nbins, 0, TEST_EXP_MAX).alias(bin_name)
479
- bin_desc = bin.desc
480
- asc_fn = lambda n, a, b: a[n] <= b[n]
481
- desc_fn = lambda n, a, b: a[n] >= b[n]
482
- tests = [
483
- ('min_amount', Min, bin, asc_fn),
484
- ('max_amount', Max, bin, asc_fn),
485
- ('sum_amount', Sum, bin, asc_fn),
486
- ('avg_amount', Avg, bin, asc_fn),
487
- ('min_amount', Min, bin_desc, desc_fn),
488
- ('max_amount', Max, bin_desc, desc_fn),
489
- ('sum_amount', Sum, bin_desc, desc_fn),
490
- ('avg_amount', Avg, bin_desc, desc_fn)
491
- ]
492
- for name, Fn, sort_key, compfn in tests:
493
- with self.subTest(name=name):
494
- results = self.experiment.groupby(bin).attributes(
495
- Fn(self.experiment.column_definitions['Amount']).alias(name)).sort(sort_key).fetch()
496
-
497
- self.assertIn(bin._name, results[0])
498
- self.assertIn(name, results[0])
499
- self.assertTrue(compfn(name, results[0], results[1]))
500
-
501
- def test_attributegroup_w_bin_resolution(self):
502
- binkey = self.experiment.column_definitions['Empty']
503
- binname = 'bin'
504
- tests = [
505
- ('min_max_valid', 0, 0, True),
506
- ('max_invalid', 0, None, False),
507
- ('min_invalid', None, 0, False),
508
- ('both_invalid', None, None, False)
509
- ]
510
- for name, minval, maxval, valid in tests:
511
- def _do_query():
512
- bin = Bin(binkey, 10, minval, maxval).alias(binname)
513
- return self.experiment.groupby(bin).attributes(Avg(binkey).alias(name)).fetch()
514
-
515
- with self.subTest(name=name):
516
- if valid:
517
- results = _do_query()
518
- self.assertIn(binname, results[0])
519
- self.assertIn(name, results[0])
520
- else:
521
- with self.assertRaises(ValueError):
522
- _do_query()
523
-
524
- def test_link_implicit(self):
525
- results = self.experiment.link(self.experiment_type).entities()
526
- self.assertEqual(TEST_EXPTYPE_MAX, len(results))
527
-
528
- def test_link_explicit_simple_key(self):
529
- results = self.experiment.link(
530
- self.experiment_type,
531
- on=(self.experiment.Type == self.experiment_type.ID)
532
- ).entities()
533
- self.assertEqual(TEST_EXPTYPE_MAX, len(results))
534
-
535
- def test_link_explicit_composite_key(self):
536
- path = self.experiment.link(
537
- self.project,
538
- on=(
539
- (self.experiment.Project_Investigator == self.project.Investigator) &
540
- (self.experiment.Project_Num == self.project.Num)
541
- )
542
- )
543
- results = path.entities()
544
- self.assertEqual(TEST_PROJ_MAX, len(results))
545
-
546
- def test_link_outbound_fkey(self):
547
- fkey_by_pk_table_name = {
548
- fkey.pk_table.name: fkey
549
- for fkey in self.model.schemas[SNAME_ISA].tables[TNAME_EXPERIMENT].foreign_keys
550
- }
551
-
552
- tests = [
553
- ('fkey-link-' + TNAME_PROJECT, fkey_by_pk_table_name[TNAME_PROJECT], self.project, TEST_PROJ_MAX),
554
- ('fkey-link-' + TNAME_EXPERIMENT_TYPE, fkey_by_pk_table_name[TNAME_EXPERIMENT_TYPE], self.project, TEST_EXPTYPE_MAX)
555
- ]
556
-
557
- for name, fkey, table, expected_results_len in tests:
558
- with self.subTest(name=name):
559
- results = self.experiment.link(table, on=fkey).entities()
560
- self.assertEqual(expected_results_len, len(results))
561
-
562
- def test_link_inbound_fkey(self):
563
- fkey_by_fk_table_name = {
564
- fkey.table.name: fkey
565
- for fkey in self.model.schemas[SNAME_VOCAB].tables[TNAME_EXPERIMENT_TYPE].referenced_by
566
- }
567
-
568
- tests = [
569
- ('fkey-link-' + TNAME_EXPERIMENT, fkey_by_fk_table_name[TNAME_EXPERIMENT], self.project, TEST_EXP_MAX)
570
- ]
571
-
572
- for name, fkey, table, expected_results_len in tests:
573
- with self.subTest(name=name):
574
- results = self.experiment_type.link(table, on=fkey).entities()
575
- self.assertEqual(expected_results_len, len(results))
576
-
577
- def test_filter_equality(self):
578
- results = self.experiment.filter(
579
- self.experiment.column_definitions['Name'] == TEST_EXP_NAME_FORMAT.format(1)
580
- ).entities()
581
- self.assertEqual(len(results), 1)
582
-
583
- def test_filter_inequality(self):
584
- results = self.experiment.filter(
585
- self.experiment.column_definitions['Amount'] < 10
586
- ).entities()
587
- self.assertEqual(len(results), 10)
588
-
589
- def test_filter_ciregexp(self):
590
- results = self.experiment.filter(
591
- self.experiment.column_definitions['Name'].ciregexp(TEST_EXP_NAME_FORMAT.format(0)[10:])
592
- ).entities()
593
- self.assertEqual(len(results), 1)
594
-
595
- def test_filter_negation(self):
596
- results = self.experiment.filter(
597
- ~ (self.experiment.column_definitions['Name'].ciregexp(TEST_EXP_NAME_FORMAT.format(0)[10:]))
598
- ).entities()
599
- self.assertEqual(len(results), TEST_EXP_MAX - 1)
600
-
601
- def test_filter_conjunction(self):
602
- results = self.experiment.filter(
603
- self.experiment.column_definitions['Name'].ciregexp(TEST_EXP_NAME_FORMAT.format(0)[10:])
604
- & (self.experiment.column_definitions['Amount'] == 0)
605
- ).entities()
606
- self.assertEqual(len(results), 1)
607
-
608
- def test_attribute_deprecated_rename(self):
609
- with self.assertRaises(TypeError):
610
- self.experiment.attributes(
611
- self.experiment.column_definitions['Name'],
612
- howmuch=self.experiment.column_definitions['Amount']
613
- )
614
-
615
- def test_attribute_rename(self):
616
- results = self.experiment.attributes(
617
- self.experiment.column_definitions['Name'],
618
- self.experiment.column_definitions['Amount'].alias('How many of them ?'),
619
- self.experiment.column_definitions['Project_Num'].alias('Project #')
620
- )
621
- result = results.fetch(limit=1)[0]
622
- self.assertIn('Name', result)
623
- self.assertIn('How many of them ?', result)
624
- self.assertIn('Project #', result)
625
-
626
- def test_attribute_rename_special_chars(self):
627
- # first test with only the `:` character present which would trigger a lexical error from ermrest
628
- special_character_out_alias = self.experiment._name + ':' + self.experiment.column_definitions['Name']._name
629
- results = self.experiment.attributes(self.experiment.column_definitions['Name'].alias(special_character_out_alias))
630
- result = results.fetch(limit=1)[0]
631
- self.assertIn(special_character_out_alias, result)
632
-
633
- # second test with url unsafe characters present which would trigger a bad request from the web server
634
- special_character_out_alias = SPECIAL_CHARACTERS
635
- results = self.experiment.attributes(self.experiment.column_definitions['Name'].alias(special_character_out_alias))
636
- result = results.fetch(limit=1)[0]
637
- self.assertIn(special_character_out_alias, result)
638
-
639
- def test_context(self):
640
- path = self.experiment.link(self.experiment_type)
641
- results = path.Experiment.entities()
642
- self.assertEqual(len(results), TEST_EXP_MAX)
643
-
644
- def test_path_table_instances(self):
645
- path = self.experiment.link(self.experiment_type)
646
- results = path.table_instances[TNAME_EXPERIMENT].entities()
647
- self.assertEqual(len(results), TEST_EXP_MAX)
648
-
649
- def test_path_project(self):
650
- path = self.experiment.link(self.experiment_type)
651
- results = path.Experiment.attributes(
652
- path.Experiment,
653
- path.Experiment_Type.column_definitions['URI'],
654
- path.Experiment_Type.column_definitions['Name'].alias('exptype')
655
- )
656
- result = results.fetch(limit=1)[0]
657
- self.assertIn('Experiment:Name', result)
658
- self.assertIn('Experiment:Time', result)
659
- self.assertIn('URI', result)
660
- self.assertIn('exptype', result)
661
-
662
- @unittest.skipUnless(HAS_PANDAS, "pandas library not available")
663
- def test_dataframe(self):
664
- results = self.experiment.entities()
665
- df = DataFrame(results)
666
- self.assertEqual(len(df), TEST_EXP_MAX)
667
-
668
- def test_insert_double_fetch(self):
669
- entities = _generate_experiment_entities(self.types, 2)
670
- results = self.experiment_copy.insert(entities)
671
- rows1 = results.fetch()
672
- rows2 = results.fetch()
673
- self.assertEqual(rows1, rows2)
674
-
675
- def test_insert_empty_entities(self):
676
- results = self.experiment_copy.insert(None)
677
- self.assertEqual(len(results), 0)
678
- results = self.experiment_copy.insert([])
679
- self.assertEqual(len(results), 0)
680
-
681
- def test_insert_entities_not_iterable(self):
682
- with self.assertRaises(TypeError):
683
- self.experiment_type.insert(1)
684
-
685
- def test_insert_entities0_not_dict(self):
686
- with self.assertRaises(TypeError):
687
- self.experiment_type.insert([1])
688
- with self.assertRaises(TypeError):
689
- self.experiment_type.insert('this is not a dict')
690
-
691
- def test_insert(self):
692
- results = self.experiment_copy.insert(_generate_experiment_entities(self.types, 10))
693
- self.assertEqual(len(results), 10)
694
-
695
- def test_insert_on_conflict_raise(self):
696
- entities = _generate_experiment_entities(self.types, 2)
697
- first = entities[0:1]
698
- results = self.experiment_copy.insert(first)
699
- self.assertEqual(len(results), 1)
700
- with self.assertRaises(DataPathException):
701
- self.experiment_copy.insert(entities)
702
-
703
- def test_insert_on_conflict_skip(self):
704
- entities = _generate_experiment_entities(self.types, 2)
705
- first = entities[0:1]
706
- results = self.experiment_copy.insert(first)
707
- self.assertEqual(len(results), 1)
708
- results = self.experiment_copy.insert(entities, on_conflict_skip=True)
709
- self.assertEqual(len(results), 1)
710
-
711
- def test_update(self):
712
- inserted = self.experiment_copy.insert(_generate_experiment_entities(self.types, 10))
713
- self.assertEqual(len(inserted), 10)
714
- # now change something in the first result
715
- updates = [dict(**inserted[0])]
716
- updates[0]['Name'] = '**CHANGED**'
717
- updated = self.experiment_copy.update(updates)
718
- self.assertEqual(len(updated), 1)
719
- self.assertEqual(inserted[0]['RID'], updated[0]['RID'])
720
- self.assertNotEqual(inserted[0]['Name'], updated[0]['Name'])
721
-
722
- def test_update_empty_entities(self):
723
- results = self.experiment_copy.update(None)
724
- self.assertEqual(len(results), 0)
725
- results = self.experiment_copy.update([])
726
- self.assertEqual(len(results), 0)
727
-
728
- def test_update_entities_not_iterable(self):
729
- with self.assertRaises(TypeError):
730
- self.experiment_type.update(1)
731
-
732
- def test_update_entities0_not_dict(self):
733
- with self.assertRaises(TypeError):
734
- self.experiment_type.update([1])
735
- with self.assertRaises(TypeError):
736
- self.experiment_type.update('this is not a dict')
737
-
738
- def test_delete_whole_path(self):
739
- self.experiment_copy.insert(_generate_experiment_entities(self.types, 10))
740
- self.assertEqual(len(self.experiment_copy.entities()), 10)
741
- self.experiment_copy.path.delete()
742
- self.assertEqual(len(self.experiment_copy.entities()), 0)
743
-
744
- def test_delete_filtered_path(self):
745
- self.experiment_copy.insert(_generate_experiment_entities(self.types, 10))
746
- expression = self.experiment_copy.column_definitions['Name'] == TEST_EXP_NAME_FORMAT.format(1)
747
- self.assertEqual(len(self.experiment_copy.filter(expression).entities()), 1)
748
- self.experiment_copy.filter(expression).delete()
749
- self.assertEqual(len(self.experiment_copy.filter(expression).entities()), 0)
750
-
751
- def test_delete_whole_table(self):
752
- self.experiment_copy.insert(_generate_experiment_entities(self.types, 10))
753
- self.assertEqual(len(self.experiment_copy.entities()), 10)
754
- self.experiment_copy.delete()
755
- self.assertEqual(len(self.experiment_copy.entities()), 0)
756
-
757
- def test_nondefaults(self):
758
- nondefaults = {'RID', 'RCB', 'RCT'}
759
- results = self.experiment.entities()
760
- self.assertEqual(len(results), TEST_EXP_MAX)
761
- entities_copy = self.experiment_copy.insert(results, nondefaults=nondefaults, add_system_defaults=False)
762
- self.assertEqual(len(results), len(entities_copy), 'entities not copied completely')
763
- ig = itemgetter(*nondefaults)
764
- for i in range(TEST_EXP_MAX):
765
- self.assertEqual(ig(results[i]), ig(entities_copy[i]), 'copied values do not match')
766
-
767
- def test_nondefaults_w_add_sys_defaults(self):
768
- nondefaults = {'RID', 'RCB', 'RCT'}
769
- results = self.experiment.entities()
770
- self.assertEqual(len(results), TEST_EXP_MAX)
771
- entities_copy = self.experiment_copy.insert(results, nondefaults=nondefaults)
772
- self.assertEqual(len(results), len(entities_copy), 'entities not copied completely')
773
- ig = itemgetter(*nondefaults)
774
- for i in range(TEST_EXP_MAX):
775
- self.assertEqual(ig(results[i]), ig(entities_copy[i]), 'copied values do not match')
776
-
777
- def test_deepcopy_of_paths(self):
778
- paths = [
779
- self.experiment.path,
780
- self.experiment.link(self.experiment_type),
781
- self.experiment.link(self.experiment_type, on=(self.experiment.Type == self.experiment_type.ID)),
782
- self.experiment.link(
783
- self.project,
784
- on=(
785
- (self.experiment.Project_Investigator == self.project.Investigator) &
786
- (self.experiment.Project_Num == self.project.Num)
787
- )
788
- ),
789
- self.project.filter(self.project.Num < 1000).link(self.experiment).link(self.experiment_type),
790
- self.experiment.alias('Exp').link(self.experiment_type.alias('ExpType')),
791
- self.experiment.filter(self.experiment.column_definitions['Name'] == TEST_EXP_NAME_FORMAT.format(1)),
792
- self.experiment.filter(self.experiment.column_definitions['Amount'] < 10),
793
- self.experiment.filter(
794
- self.experiment.column_definitions['Name'].ciregexp(TEST_EXP_NAME_FORMAT.format(0)[10:])
795
- ),
796
- self.experiment.filter(
797
- ~ (self.experiment.column_definitions['Name'].ciregexp(TEST_EXP_NAME_FORMAT.format(0)[10:]))
798
- ),
799
- self.experiment.filter(
800
- self.experiment.column_definitions['Name'].ciregexp(TEST_EXP_NAME_FORMAT.format(0)[10:])
801
- & (self.experiment.column_definitions['Amount'] == 0)
802
- )
803
- ]
804
- for path in paths:
805
- with self.subTest(name=path.uri):
806
- cp = deepcopy(path)
807
- self.assertNotEqual(path, cp)
808
- self.assertEqual(path.uri, cp.uri)
809
-
810
- def test_merge_paths(self):
811
- path1 = self.experiment.filter(self.experiment.Amount >= 0)
812
- path2 = self.experiment.link(self.experiment_type).filter(self.experiment_type.ID >= '0')
813
- path3 = self.experiment.link(self.project).filter(self.project.Num >= 0)
814
- original_uri = path1.uri
815
-
816
- # merge paths 1..3
817
- path1.merge(path2).merge(path3)
818
- self.assertNotEqual(path1.uri, original_uri, "Merged path's URI should have changed from its original URI")
819
- self.assertEqual(path1.context._name, path3.context._name, "Context of merged paths should equal far right-hand path's context")
820
- self.assertGreater(len(path1.Experiment.entities()), 0, "Should have returned results")
821
-
822
- def test_compose_paths(self):
823
- path1 = self.experiment.filter(self.experiment.Amount >= 0)
824
- path2 = self.experiment.link(self.experiment_type).filter(self.experiment_type.ID >= '0')
825
- path3 = self.experiment.link(self.project).filter(self.project.Num >= 0)
826
- original_uri = path1.uri
827
-
828
- # compose paths 1..3
829
- path = self.paths.compose(path1, path2, path3)
830
- self.assertNotEqual(path, path1, "Compose should have copied the first path rather than mutate it")
831
- self.assertNotEqual(path.uri, path1.uri, "Composed path URI should not match the first path URI")
832
- self.assertEqual(path1.uri, original_uri, "First path was changed")
833
- self.assertNotEqual(path.uri, original_uri, "Merged path's URI should have changed from its original URI")
834
- self.assertEqual(path.context._name, path3.context._name, "Context of composed paths should equal far right-hand path's context")
835
- self.assertGreater(len(path.Experiment.entities()), 0, "Should have returned results")
836
-
837
- def test_simple_denormalization(self):
838
- entities = self.experiment.entities()
839
- results = self.experiment.denormalize()
840
- self.assertEqual(len(entities), len(results))
841
- self.assertNotEqual(entities[0].keys(), results[0].keys())
842
- self.assertIn('Type', results[0])
843
- self.assertTrue(entities[0]['Type'].startswith('TEST:'))
844
- self.assertTrue(results[0]['Type'])
845
- self.assertFalse(results[0]['Type'].startswith('TEST:'))
846
-
847
- def test_simple_denormalization_w_entities(self):
848
- entities = self.experiment.entities()
849
- results = self.experiment.denormalize(heuristic=simple_denormalization_with_whole_entities)
850
- self.assertEqual(len(entities), len(results))
851
- self.assertLess(len(entities[0].keys()), len(results[0].keys()))
852
- self.assertIn('Experiment_Project Investigator_Project_Num_fkey', results[0])
853
- self.assertIsInstance(results[0]['Experiment_Project Investigator_Project_Num_fkey'], list)
854
- self.assertIsInstance(results[0]['Experiment_Project Investigator_Project_Num_fkey'][0], dict)
855
- self.assertIn('RID', results[0]['Experiment_Project Investigator_Project_Num_fkey'][0])
856
-
857
-
858
- if __name__ == '__main__':
859
- unittest.main()