deriva 1.7.1__tar.gz → 1.7.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. {deriva-1.7.1/deriva.egg-info → deriva-1.7.4}/PKG-INFO +1 -1
  2. {deriva-1.7.1 → deriva-1.7.4}/deriva/config/annotation_config.py +2 -2
  3. {deriva-1.7.1 → deriva-1.7.4}/deriva/config/rollback_annotation.py +1 -1
  4. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/__init__.py +1 -1
  5. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/datapath.py +203 -21
  6. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/ermrest_catalog.py +103 -23
  7. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/ermrest_model.py +955 -59
  8. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/hatrac_store.py +9 -20
  9. deriva-1.7.4/deriva/core/mmo.py +379 -0
  10. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/utils/globus_auth_utils.py +3 -1
  11. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/processors/postprocess/transfer_post_processor.py +2 -2
  12. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/processors/query/base_query_processor.py +2 -1
  13. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/upload/deriva_upload.py +5 -2
  14. {deriva-1.7.1 → deriva-1.7.4/deriva.egg-info}/PKG-INFO +1 -1
  15. {deriva-1.7.1 → deriva-1.7.4}/deriva.egg-info/SOURCES.txt +10 -1
  16. deriva-1.7.4/tests/deriva/core/mmo/__init__.py +0 -0
  17. deriva-1.7.4/tests/deriva/core/mmo/base.py +300 -0
  18. deriva-1.7.4/tests/deriva/core/mmo/test_mmo_drop.py +252 -0
  19. deriva-1.7.4/tests/deriva/core/mmo/test_mmo_find.py +90 -0
  20. deriva-1.7.4/tests/deriva/core/mmo/test_mmo_prune.py +196 -0
  21. deriva-1.7.4/tests/deriva/core/mmo/test_mmo_rename.py +222 -0
  22. deriva-1.7.4/tests/deriva/core/mmo/test_mmo_replace.py +180 -0
  23. {deriva-1.7.1 → deriva-1.7.4}/tests/deriva/core/test_datapath.py +52 -26
  24. deriva-1.7.4/tests/deriva/core/test_ermrest_model.py +782 -0
  25. {deriva-1.7.1 → deriva-1.7.4}/.gitignore +0 -0
  26. {deriva-1.7.1 → deriva-1.7.4}/CHANGELOG.md +0 -0
  27. {deriva-1.7.1 → deriva-1.7.4}/LICENSE +0 -0
  28. {deriva-1.7.1 → deriva-1.7.4}/README.md +0 -0
  29. {deriva-1.7.1 → deriva-1.7.4}/deriva/__init__.py +0 -0
  30. {deriva-1.7.1 → deriva-1.7.4}/deriva/config/__init__.py +0 -0
  31. {deriva-1.7.1 → deriva-1.7.4}/deriva/config/acl_config.py +0 -0
  32. {deriva-1.7.1 → deriva-1.7.4}/deriva/config/annotation_validate.py +0 -0
  33. {deriva-1.7.1 → deriva-1.7.4}/deriva/config/base_config.py +0 -0
  34. {deriva-1.7.1 → deriva-1.7.4}/deriva/config/dump_catalog_annotations.py +0 -0
  35. {deriva-1.7.1 → deriva-1.7.4}/deriva/config/examples/group_owner_policy.json +0 -0
  36. {deriva-1.7.1 → deriva-1.7.4}/deriva/config/examples/self_serve_policy.json +0 -0
  37. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/annotation.py +0 -0
  38. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/base_cli.py +0 -0
  39. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/catalog_cli.py +0 -0
  40. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/deriva_binding.py +0 -0
  41. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/deriva_server.py +0 -0
  42. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/hatrac_cli.py +0 -0
  43. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/polling_ermrest_catalog.py +0 -0
  44. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/schemas/app_links.schema.json +0 -0
  45. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/schemas/asset.schema.json +0 -0
  46. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/schemas/bulk_upload.schema.json +0 -0
  47. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/schemas/chaise_config.schema.json +0 -0
  48. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/schemas/citation.schema.json +0 -0
  49. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/schemas/column_display.schema.json +0 -0
  50. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/schemas/display.schema.json +0 -0
  51. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/schemas/export.schema.json +0 -0
  52. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/schemas/export_2019.schema.json +0 -0
  53. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/schemas/foreign_key.schema.json +0 -0
  54. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/schemas/generated.schema.json +0 -0
  55. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/schemas/immutable.schema.json +0 -0
  56. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/schemas/indexing_preferences.schema.json +0 -0
  57. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/schemas/key_display.schema.json +0 -0
  58. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/schemas/non_deletable.schema.json +0 -0
  59. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/schemas/required.schema.json +0 -0
  60. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/schemas/source_definitions.schema.json +0 -0
  61. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/schemas/table_alternatives.schema.json +0 -0
  62. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/schemas/table_display.schema.json +0 -0
  63. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/schemas/visible_columns.schema.json +0 -0
  64. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/schemas/visible_foreign_keys.schema.json +0 -0
  65. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/tests/__init__.py +0 -0
  66. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/utils/__init__.py +0 -0
  67. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/utils/core_utils.py +0 -0
  68. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/utils/hash_utils.py +0 -0
  69. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/utils/mime_utils.py +0 -0
  70. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/utils/version_utils.py +0 -0
  71. {deriva-1.7.1 → deriva-1.7.4}/deriva/core/utils/webauthn_utils.py +0 -0
  72. {deriva-1.7.1 → deriva-1.7.4}/deriva/seo/README.md +0 -0
  73. {deriva-1.7.1 → deriva-1.7.4}/deriva/seo/__init__.py +0 -0
  74. {deriva-1.7.1 → deriva-1.7.4}/deriva/seo/sitemap_builder.py +0 -0
  75. {deriva-1.7.1 → deriva-1.7.4}/deriva/seo/sitemap_cli.py +0 -0
  76. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/__init__.py +0 -0
  77. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/backup/__init__.py +0 -0
  78. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/backup/__main__.py +0 -0
  79. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/backup/deriva_backup.py +0 -0
  80. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/backup/deriva_backup_cli.py +0 -0
  81. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/__init__.py +0 -0
  82. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/__main__.py +0 -0
  83. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/deriva_download.py +0 -0
  84. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/deriva_download_cli.py +0 -0
  85. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/processors/__init__.py +0 -0
  86. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/processors/base_processor.py +0 -0
  87. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/processors/postprocess/__init__.py +0 -0
  88. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/processors/postprocess/identifier_post_processor.py +0 -0
  89. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/processors/postprocess/url_post_processor.py +0 -0
  90. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/processors/query/__init__.py +0 -0
  91. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/processors/query/bag_fetch_query_processor.py +0 -0
  92. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/processors/query/file_download_query_processor.py +0 -0
  93. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/processors/transform/__init__.py +0 -0
  94. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/processors/transform/base_transform_processor.py +0 -0
  95. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/processors/transform/column_transform_processor.py +0 -0
  96. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/processors/transform/fasta_transform_processor.py +0 -0
  97. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/processors/transform/format_transform_processor.py +0 -0
  98. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/processors/transform/geo_transform_processor.py +0 -0
  99. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/processors/transform/string_transform_processor.py +0 -0
  100. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/tests/__init__.py +0 -0
  101. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/tests/test1.json +0 -0
  102. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/tests/test10.json +0 -0
  103. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/tests/test11.json +0 -0
  104. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/tests/test12.json +0 -0
  105. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/tests/test13.json +0 -0
  106. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/tests/test14.json +0 -0
  107. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/tests/test15.json +0 -0
  108. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/tests/test16.json +0 -0
  109. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/tests/test19.json +0 -0
  110. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/tests/test2.json +0 -0
  111. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/tests/test20.json +0 -0
  112. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/tests/test3.json +0 -0
  113. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/tests/test4.json +0 -0
  114. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/tests/test5.json +0 -0
  115. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/tests/test6.json +0 -0
  116. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/tests/test7.json +0 -0
  117. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/tests/test8.json +0 -0
  118. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/download/tests/test9.json +0 -0
  119. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/restore/__init__.py +0 -0
  120. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/restore/__main__.py +0 -0
  121. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/restore/deriva_restore.py +0 -0
  122. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/restore/deriva_restore_cli.py +0 -0
  123. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/upload/__init__.py +0 -0
  124. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/upload/__main__.py +0 -0
  125. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/upload/deriva_upload_cli.py +0 -0
  126. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/upload/processors/__init__.py +0 -0
  127. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/upload/processors/archive_processor.py +0 -0
  128. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/upload/processors/base_processor.py +0 -0
  129. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/upload/processors/logging_processor.py +0 -0
  130. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/upload/processors/metadata_update_processor.py +0 -0
  131. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/upload/processors/rename_processor.py +0 -0
  132. {deriva-1.7.1 → deriva-1.7.4}/deriva/transfer/upload/tests/__init__.py +0 -0
  133. {deriva-1.7.1 → deriva-1.7.4}/deriva/utils/__init__.py +0 -0
  134. {deriva-1.7.1 → deriva-1.7.4}/deriva.egg-info/dependency_links.txt +0 -0
  135. {deriva-1.7.1 → deriva-1.7.4}/deriva.egg-info/entry_points.txt +0 -0
  136. {deriva-1.7.1 → deriva-1.7.4}/deriva.egg-info/requires.txt +0 -0
  137. {deriva-1.7.1 → deriva-1.7.4}/deriva.egg-info/top_level.txt +0 -0
  138. {deriva-1.7.1 → deriva-1.7.4}/docs/BUILD.md +0 -0
  139. {deriva-1.7.1 → deriva-1.7.4}/docs/Makefile +0 -0
  140. {deriva-1.7.1 → deriva-1.7.4}/docs/README.md +0 -0
  141. {deriva-1.7.1 → deriva-1.7.4}/docs/_static/README.txt +0 -0
  142. {deriva-1.7.1 → deriva-1.7.4}/docs/api/deriva.config.rst +0 -0
  143. {deriva-1.7.1 → deriva-1.7.4}/docs/api/deriva.core.rst +0 -0
  144. {deriva-1.7.1 → deriva-1.7.4}/docs/api/deriva.core.utils.rst +0 -0
  145. {deriva-1.7.1 → deriva-1.7.4}/docs/api/deriva.rst +0 -0
  146. {deriva-1.7.1 → deriva-1.7.4}/docs/api/deriva.seo.rst +0 -0
  147. {deriva-1.7.1 → deriva-1.7.4}/docs/api/deriva.transfer.backup.rst +0 -0
  148. {deriva-1.7.1 → deriva-1.7.4}/docs/api/deriva.transfer.download.processors.postprocess.rst +0 -0
  149. {deriva-1.7.1 → deriva-1.7.4}/docs/api/deriva.transfer.download.processors.query.rst +0 -0
  150. {deriva-1.7.1 → deriva-1.7.4}/docs/api/deriva.transfer.download.processors.rst +0 -0
  151. {deriva-1.7.1 → deriva-1.7.4}/docs/api/deriva.transfer.download.processors.transform.rst +0 -0
  152. {deriva-1.7.1 → deriva-1.7.4}/docs/api/deriva.transfer.download.rst +0 -0
  153. {deriva-1.7.1 → deriva-1.7.4}/docs/api/deriva.transfer.restore.rst +0 -0
  154. {deriva-1.7.1 → deriva-1.7.4}/docs/api/deriva.transfer.rst +0 -0
  155. {deriva-1.7.1 → deriva-1.7.4}/docs/api/deriva.transfer.upload.rst +0 -0
  156. {deriva-1.7.1 → deriva-1.7.4}/docs/cli/commands.md +0 -0
  157. {deriva-1.7.1 → deriva-1.7.4}/docs/cli/deriva-acl-config.md +0 -0
  158. {deriva-1.7.1 → deriva-1.7.4}/docs/cli/deriva-annotation-config.md +0 -0
  159. {deriva-1.7.1 → deriva-1.7.4}/docs/cli/deriva-annotation-validate.md +0 -0
  160. {deriva-1.7.1 → deriva-1.7.4}/docs/cli/deriva-backup-cli.md +0 -0
  161. {deriva-1.7.1 → deriva-1.7.4}/docs/cli/deriva-download-cli.md +0 -0
  162. {deriva-1.7.1 → deriva-1.7.4}/docs/cli/deriva-hatrac-cli.md +0 -0
  163. {deriva-1.7.1 → deriva-1.7.4}/docs/cli/deriva-restore-cli.md +0 -0
  164. {deriva-1.7.1 → deriva-1.7.4}/docs/cli/deriva-sitemap-cli.md +0 -0
  165. {deriva-1.7.1 → deriva-1.7.4}/docs/conf.py +0 -0
  166. {deriva-1.7.1 → deriva-1.7.4}/docs/derivapy-catalog-snapshot.ipynb +0 -0
  167. {deriva-1.7.1 → deriva-1.7.4}/docs/derivapy-catalog.ipynb +0 -0
  168. {deriva-1.7.1 → deriva-1.7.4}/docs/derivapy-datapath-example-1.ipynb +0 -0
  169. {deriva-1.7.1 → deriva-1.7.4}/docs/derivapy-datapath-example-2.ipynb +0 -0
  170. {deriva-1.7.1 → deriva-1.7.4}/docs/derivapy-datapath-example-3.ipynb +0 -0
  171. {deriva-1.7.1 → deriva-1.7.4}/docs/derivapy-datapath-example-4.ipynb +0 -0
  172. {deriva-1.7.1 → deriva-1.7.4}/docs/derivapy-datapath-update.ipynb +0 -0
  173. {deriva-1.7.1 → deriva-1.7.4}/docs/get-started.ipynb +0 -0
  174. {deriva-1.7.1 → deriva-1.7.4}/docs/index.rst +0 -0
  175. {deriva-1.7.1 → deriva-1.7.4}/docs/install.md +0 -0
  176. {deriva-1.7.1 → deriva-1.7.4}/docs/make.bat +0 -0
  177. {deriva-1.7.1 → deriva-1.7.4}/docs/project-tutorial.md +0 -0
  178. {deriva-1.7.1 → deriva-1.7.4}/docs/using-r.md +0 -0
  179. {deriva-1.7.1 → deriva-1.7.4}/requirements_dev.txt +0 -0
  180. {deriva-1.7.1 → deriva-1.7.4}/setup.cfg +0 -0
  181. {deriva-1.7.1 → deriva-1.7.4}/setup.py +0 -0
  182. {deriva-1.7.1 → deriva-1.7.4}/tests/__init__.py +0 -0
  183. {deriva-1.7.1 → deriva-1.7.4}/tests/deriva/__init__.py +0 -0
  184. {deriva-1.7.1 → deriva-1.7.4}/tests/deriva/core/__init__.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: deriva
3
- Version: 1.7.1
3
+ Version: 1.7.4
4
4
  Summary: Python APIs and CLIs (Command-Line Interfaces) for the DERIVA platform.
5
5
  Home-page: https://github.com/informatics-isi-edu/deriva-py
6
6
  Author: USC Information Sciences Institute, Informatics Systems Research Division
@@ -33,7 +33,7 @@ class AttrSpecList(BaseSpecList):
33
33
  return None
34
34
  new = []
35
35
  for item in orig_list:
36
- new.append(unicode(item))
36
+ new.append(item)
37
37
  return new
38
38
 
39
39
  def add_list(self, dictlist):
@@ -85,7 +85,7 @@ class AttrConfig:
85
85
  self.toplevel_config = ConfigUtil.find_toplevel_node(self.catalog.getCatalogModel(), schema_name, table_name)
86
86
 
87
87
  def make_speclist(self, name):
88
- d = self.config.get(unicode(name))
88
+ d = self.config.get(name)
89
89
  if d is None:
90
90
  d = [dict()]
91
91
  return AttrSpecList(self.known_attrs, d)
@@ -1,4 +1,4 @@
1
- import platform
1
+ import sys
2
2
  from deriva.core import get_credential, BaseCLI, DerivaServer, __version__
3
3
 
4
4
 
@@ -1,4 +1,4 @@
1
- __version__ = "1.7.1"
1
+ __version__ = "1.7.4"
2
2
 
3
3
  from deriva.core.utils.core_utils import *
4
4
  from deriva.core.base_cli import BaseCLI, KeyValuePairArgs
@@ -5,6 +5,7 @@ import copy
5
5
  from datetime import date
6
6
  import itertools
7
7
  import logging
8
+ import time
8
9
  import re
9
10
  from requests import HTTPError
10
11
  import warnings
@@ -695,6 +696,102 @@ class _ResultSet (object):
695
696
  logger.debug("Fetched %d entities" % len(self._results_doc))
696
697
  return self
697
698
 
699
+ def _json_size_approx(data):
700
+ """Return approximate byte count for minimal JSON encoding of data
701
+
702
+ Minimal encoding has no optional whitespace/indentation.
703
+ """
704
+ nbytes = 0
705
+
706
+ if isinstance(data, (list, tuple)):
707
+ nbytes += 2
708
+ for elem in data:
709
+ nbytes += _json_size_approx(elem) + 1
710
+ elif isinstance(data, dict):
711
+ nbytes += 2
712
+ for k, v in data.items():
713
+ nbytes += _json_size_approx(k) + _json_size_approx(v) + 2
714
+ elif isinstance(data, str):
715
+ nbytes += len(data.encode("utf-8")) + 2
716
+ else:
717
+ nbytes += len(str(data))
718
+
719
+ return nbytes
720
+
721
+ def _generate_batches(entities, max_batch_rows=1000, max_batch_bytes=250*1024):
722
+ """Generate a series of entity batches as slices of the input entities
723
+
724
+ """
725
+ if not isinstance(entities, (list, tuple)):
726
+ raise TypeError('invalid type %s for entities, list or tuple expected' % (type(entities),))
727
+
728
+ if not max_batch_rows:
729
+ logger.debug("disabling batching due to max_batch_rows=%r" % (max_batch_rows,))
730
+ return entities
731
+
732
+ top = len(entities)
733
+ lower = 0
734
+
735
+ while lower < top:
736
+ # to ensure progress, always use at least one row per batch regardless of nbytes
737
+ upper = lower + 1
738
+ batch_nbytes = _json_size_approx(entities[lower])
739
+
740
+ # advance upper position until a batch size limit is reached
741
+ while (upper - lower) < max_batch_rows:
742
+ if upper >= top:
743
+ break
744
+ batch_nbytes += _json_size_approx(entities[upper])
745
+ if batch_nbytes > max_batch_bytes:
746
+ break
747
+ upper += 1
748
+
749
+ # generate one batch and advance for next batch
750
+ logger.debug("yielding batch of %d/%d entities (%d:%d)" % (upper-lower, top, lower, upper))
751
+ yield entities[lower:upper]
752
+ lower = upper
753
+
754
+ def _request_with_retry(request_func, retry_codes={408, 429, 500, 502, 503, 504}, backoff_factor=4, max_attempts=5):
755
+ """Perform request func with exponential backoff and retry.
756
+
757
+ :param request_func: A function returning a requests.Response object or raising HTTPError
758
+ :param retry_codes: HTTPError status codes on which to attempt retry
759
+ :param backoff_factor: Base number of seconds for factor**attempt exponential backoff
760
+ :param max_attempts: Max number of request attempts.
761
+
762
+ Retry will be attempted on HTTPError exceptions which match retry_codes and
763
+ also on other unknown exceptions, presumed to be transport errors.
764
+
765
+ The request_func should do the equivalent of resp.raise_on_status() so that
766
+ it only returns a response object for successful requests.
767
+ """
768
+ attempt = 0
769
+ last_ex = None
770
+
771
+ while attempt < max_attempts:
772
+ try:
773
+ if attempt > 0:
774
+ delay = backoff_factor**(attempt-1)
775
+ logger.debug("sleeping %d seconds before retry %d..." % (delay, attempt))
776
+ time.sleep(delay)
777
+ attempt += 1
778
+ return request_func()
779
+ except HTTPError as e:
780
+ logger.debug(e.response.text)
781
+ last_ex = e
782
+ if 400 <= e.response.status_code < 500:
783
+ last_ex = DataPathException(_http_error_message(e), e)
784
+ if int(e.response.status_code) not in retry_codes:
785
+ raise last_ex
786
+ except Exception as e:
787
+ logger.debug(e.response.text)
788
+ last_ex = e
789
+
790
+ # early return means we don't get here on successful requests
791
+ logger.warning("maximum request retry limit %d exceeded" % (max_attempts,))
792
+ if last_ex is None:
793
+ raise ValueError('exceeded max_attempts without catching a request exception')
794
+ raise last_ex
698
795
 
699
796
  class _TableWrapper (object):
700
797
  """Wraps a Table for datapath expressions.
@@ -836,7 +933,7 @@ class _TableWrapper (object):
836
933
  """
837
934
  return self.path.denormalize(context_name=context_name, heuristic=heuristic, groupkey_name=groupkey_name)
838
935
 
839
- def insert(self, entities, defaults=set(), nondefaults=set(), add_system_defaults=True, on_conflict_skip=False):
936
+ def insert(self, entities, defaults=set(), nondefaults=set(), add_system_defaults=True, on_conflict_skip=False, retry_codes={408, 429, 500, 502, 503, 504}, backoff_factor=4, max_attempts=5, max_batch_rows=1000, max_batch_bytes=250*1024):
840
937
  """Inserts entities into the table.
841
938
 
842
939
  :param entities: an iterable collection of entities (i.e., rows) to be inserted into the table.
@@ -844,7 +941,23 @@ class _TableWrapper (object):
844
941
  :param nondefaults: optional, set of columns names to override implicit system defaults
845
942
  :param add_system_defaults: flag to add system columns to the set of default columns.
846
943
  :param on_conflict_skip: flag to skip entities that violate uniqueness constraints.
944
+ :param retry_codes: set of HTTP status codes for which retry should be considered.
945
+ :param backoff_factor: number of seconds for base of exponential retry backoff.
946
+ :param max_attempts: maximum number of requests attempts with retry.
947
+ :param max_batch_rows: maximum number of rows for one request, or False to disable batching.
948
+ :param max_batch_bytes: approximate maximum number of bytes for one request.
847
949
  :return a collection of newly created entities.
950
+
951
+ Retry will only be attempted for idempotent insertion
952
+ requests, which are when a user-controlled, non-nullable key
953
+ is present in the table and the key's constituent column(s)
954
+ are not listed as defaults, and on_conflict_skip=True.
955
+
956
+ When performing retries, an exponential backoff delay is
957
+ introduced after each failed attempt. The delay is
958
+ backoff_factor**attempt_number seconds for attempts 0 through
959
+ max_attempts-1.
960
+
848
961
  """
849
962
  # empty entities will be accepted but results are therefore an empty entity set
850
963
  if not entities:
@@ -879,17 +992,52 @@ class _TableWrapper (object):
879
992
  if not hasattr(entities[0], 'keys'):
880
993
  raise TypeError('entities[0] does not look like a dictionary -- does not have a "keys()" method')
881
994
 
882
- try:
883
- resp = self._schema._catalog._wrapped_catalog.post(path, json=entities, headers={'Content-Type': 'application/json'})
884
- return _ResultSet(self.path.uri, lambda ignore1, ignore2, ignore3: resp.json())
885
- except HTTPError as e:
886
- logger.debug(e.response.text)
887
- if 400 <= e.response.status_code < 500:
888
- raise DataPathException(_http_error_message(e), e)
889
- else:
890
- raise e
995
+ # perform one batch request in a helper we can hand to retry helper
996
+ def request_func(batch):
997
+ return self._schema._catalog._wrapped_catalog.post(path, json=batch, headers={'Content-Type': 'application/json'})
998
+
999
+ def _has_user_pkey(table):
1000
+ """Return True if table has at least one primary key other than the system RID key"""
1001
+ for key in table.keys:
1002
+ if { c.name for c in key.unique_columns } != {'RID'}:
1003
+ if all([ not c.nullok for c in key.unique_columns ]) \
1004
+ and all([ c.name not in defaults for c in key.unique_columns ]):
1005
+ return True
1006
+ return False
1007
+
1008
+ # determine whether insert is idempotent and therefore retry safe
1009
+ retry_safe = on_conflict_skip and _has_user_pkey(self._wrapped_table)
1010
+
1011
+ # perform all requests synchronously so the caller can get exceptions
1012
+ results = []
1013
+ for batch in _generate_batches(
1014
+ entities,
1015
+ max_batch_rows=max_batch_rows,
1016
+ max_batch_bytes=max_batch_bytes
1017
+ ):
1018
+ try:
1019
+ if retry_safe:
1020
+ resp = _request_with_retry(
1021
+ lambda: request_func(batch),
1022
+ retry_codes=retry_codes,
1023
+ backoff_factor=backoff_factor,
1024
+ max_attempts=max_attempts
1025
+ )
1026
+ else:
1027
+ resp = request_func(batch)
1028
+ results.extend(resp.json())
1029
+ except HTTPError as e:
1030
+ logger.debug(e.response.text)
1031
+ if 400 <= e.response.status_code < 500:
1032
+ raise DataPathException(_http_error_message(e), e)
1033
+ else:
1034
+ raise e
1035
+
1036
+ result = _ResultSet(self.path.uri, lambda ignore1, ignore2, ignore3: results)
1037
+ return result
1038
+
891
1039
 
892
- def update(self, entities, correlation={'RID'}, targets=None):
1040
+ def update(self, entities, correlation={'RID'}, targets=None, retry_codes={408, 429, 500, 502, 503, 504}, backoff_factor=4, max_attempts=5, max_batch_rows=1000, max_batch_bytes=250*1024):
893
1041
  """Update entities of a table.
894
1042
 
895
1043
  For more information see the ERMrest protocol for the `attributegroup` interface. By default, this method will
@@ -901,7 +1049,17 @@ class _TableWrapper (object):
901
1049
  :param correlation: an iterable collection of column names used to correlate input set to the set of rows to be
902
1050
  updated in the catalog. E.g., `{'col name'}` or `{mytable.mycolumn}` will work if you pass a _ColumnWrapper object.
903
1051
  :param targets: an iterable collection of column names used as the targets of the update operation.
904
- :return: a collection of updated entities as returned by the corresponding ERMrest interface.
1052
+ :param retry_codes: set of HTTP status codes for which retry should be considered.
1053
+ :param backoff_factor: number of seconds for base of exponential retry backoff.
1054
+ :param max_attempts: maximum number of requests attempts with retry.
1055
+ :param max_batch_rows: maximum number of rows for one request, or False to disable batching.
1056
+ :param max_batch_bytes: approximate maximum number of bytes for one request.
1057
+ :return a collection of newly created entities.
1058
+
1059
+ When performing retries, an exponential backoff delay is
1060
+ introduced after each failed attempt. The delay is
1061
+ backoff_factor**attempt_number seconds for attempts 0 through
1062
+ max_attempts-1.
905
1063
  """
906
1064
  # empty entities will be accepted but results are therefore an empty entity set
907
1065
  if not entities:
@@ -936,15 +1094,39 @@ class _TableWrapper (object):
936
1094
  targets=','.join(target_cnames)
937
1095
  )
938
1096
 
939
- try:
940
- resp = self._schema._catalog._wrapped_catalog.put(path, json=entities, headers={'Content-Type': 'application/json'})
941
- return _ResultSet(self.path.uri, lambda ignore1, ignore2, ignore3: resp.json())
942
- except HTTPError as e:
943
- logger.debug(e.response.text)
944
- if 400 <= e.response.status_code < 500:
945
- raise DataPathException(_http_error_message(e), e)
946
- else:
947
- raise e
1097
+ # perform one batch request in a helper we can hand to retry helper
1098
+ def request_func(batch):
1099
+ return self._schema._catalog._wrapped_catalog.put(path, json=batch, headers={'Content-Type': 'application/json'})
1100
+
1101
+ # perform all requests synchronously so the caller can get exceptions
1102
+ results = []
1103
+ for batch in _generate_batches(
1104
+ entities,
1105
+ max_batch_rows=max_batch_rows,
1106
+ max_batch_bytes=max_batch_bytes
1107
+ ):
1108
+ try:
1109
+ resp = _request_with_retry(
1110
+ lambda: request_func(batch),
1111
+ retry_codes=retry_codes,
1112
+ backoff_factor=backoff_factor,
1113
+ max_attempts=max_attempts
1114
+ )
1115
+ results.extend(resp.json())
1116
+ except HTTPError as e:
1117
+ logger.debug(e.response.text)
1118
+ if 400 <= e.response.status_code < 500:
1119
+ raise DataPathException(_http_error_message(e), e)
1120
+ else:
1121
+ raise e
1122
+
1123
+ result = _ResultSet(self.path.uri, lambda ignore1, ignore2, ignore3: results)
1124
+ return result
1125
+
1126
+ def delete(self):
1127
+ """Deletes the entity set referenced by the Table.
1128
+ """
1129
+ self.path.delete()
948
1130
 
949
1131
 
950
1132
  class _TableAlias (_TableWrapper):
@@ -53,11 +53,15 @@ class DerivaServer (DerivaBinding):
53
53
  """
54
54
  return ErmrestCatalog.connect(self, catalog_id, snaptime)
55
55
 
56
- def create_ermrest_catalog(self, id=None, owner=None):
56
+ def create_ermrest_catalog(self, id=None, owner=None, name=None, description=None, is_persistent=None, clone_source=None):
57
57
  """Create an ERMrest catalog.
58
58
 
59
59
  :param id: The (str) id desired by the client (default None)
60
60
  :param owner: The initial (list of str) ACL desired by the client (default None)
61
+ :param name: Initial (str) catalog name if not None
62
+ :param description: Initial (str) catalog description if not None
63
+ :param is_persistent: Initial (bool) catalog persistence flag if not None
64
+ :param clone_source: Initial catalog clone_source if not None
61
65
 
62
66
  The new catalog id will be returned in the response, and used
63
67
  in future catalog access. The use of the id parameter
@@ -77,8 +81,17 @@ class DerivaServer (DerivaBinding):
77
81
  owner ACL influences which client(s) are allowed to retry
78
82
  creation with the same id.
79
83
 
84
+ The name, description, is_persistent, and clone_source
85
+ parameters are passed through to the catalog creation service
86
+ to initialize those respective metadata fields of the new
87
+ catalog's registry entry. See ERMrest documentation for more
88
+ detail. Authorization failures may occur when attempting to
89
+ set the is_persistent flag. By default, these fields are not
90
+ initialized in the catalog creation request, and they instead
91
+ receive server-assigned defaults.
92
+
80
93
  """
81
- return ErmrestCatalog.create(self, id, owner)
94
+ return ErmrestCatalog.create(self, id, owner, name, description, is_persistent, clone_source)
82
95
 
83
96
  def connect_ermrest_alias(self, id):
84
97
  """Connect to an ERMrest alias and return the alias binding.
@@ -88,12 +101,14 @@ class DerivaServer (DerivaBinding):
88
101
  """
89
102
  return ErmrestAlias.connect(self, id)
90
103
 
91
- def create_ermrest_alias(self, id=None, owner=None, alias_target=None):
104
+ def create_ermrest_alias(self, id=None, owner=None, alias_target=None, name=None, description=None):
92
105
  """Create an ERMrest catalog alias.
93
106
 
94
107
  :param id: The (str) id desired by the client (default None)
95
108
  :param owner: The initial (list of str) ACL desired by the client (default None)
96
109
  :param alias_target: The initial target catalog id binding desired by the client (default None)
110
+ :param name: Initial (str) catalog name if not None
111
+ :param description: Initial (str) catalog description if not None
97
112
 
98
113
  The new alias id will be returned in the response, and used
99
114
  in future alias access. The use of the id parameter
@@ -118,8 +133,13 @@ class DerivaServer (DerivaBinding):
118
133
  influences which client(s) are allowed to retry creation with
119
134
  the same id.
120
135
 
136
+ The name and description parameters are passed through to the
137
+ alias creation service to initialize those respective metadata
138
+ fields of the new aliase's registry entry. See ERMrest
139
+ documentation for more detail.
140
+
121
141
  """
122
- return ErmrestAlias.create(self, id, owner, alias_target)
142
+ return ErmrestAlias.create(self, id, owner, alias_target, name, description)
123
143
 
124
144
  class ErmrestCatalogMutationError(Exception):
125
145
  pass
@@ -204,15 +224,22 @@ class ErmrestCatalog(DerivaBinding):
204
224
  )
205
225
 
206
226
  @classmethod
207
- def _digest_catalog_args(cls, id, owner):
227
+ def _digest_catalog_args(cls, id, owner, name=None, description=None, is_persistent=None, clone_source=None):
208
228
  rep = dict()
209
229
 
210
- if isinstance(id, str):
211
- rep['id'] = id
212
- elif isinstance(id, (type(nochange), type(None))):
213
- pass
214
- else:
215
- raise TypeError('id must be of type str or None or nochange, not %s' % type(id))
230
+ for v, k, typ in [
231
+ (id, 'id', str),
232
+ (name, 'name', str),
233
+ (description, 'description', str),
234
+ (is_persistent, 'is_persistent', bool),
235
+ (clone_source, 'clone_source', str),
236
+ ]:
237
+ if isinstance(v, typ):
238
+ rep[k] = v
239
+ elif isinstance(v, (type(nochange), type(None))):
240
+ pass
241
+ else:
242
+ raise TypeError('%s must be of type %s or None or nochange, not %s' % (k, typ.__name__, type(v)))
216
243
 
217
244
  if isinstance(owner, list):
218
245
  for e in owner:
@@ -227,12 +254,16 @@ class ErmrestCatalog(DerivaBinding):
227
254
  return rep
228
255
 
229
256
  @classmethod
230
- def create(cls, deriva_server, id=None, owner=None):
257
+ def create(cls, deriva_server, id=None, owner=None, name=None, description=None, is_persistent=None, clone_source=None):
231
258
  """Create an ERMrest catalog and return the ERMrest catalog binding.
232
259
 
233
260
  :param deriva_server: The DerivaServer binding which hosts ermrest.
234
261
  :param id: The (str) id desired by the client (default None)
235
262
  :param owner: The initial (list of str) ACL desired by the client (default None)
263
+ :param name: Initial (str) catalog name if not None
264
+ :param description: Initial (str) catalog description if not None
265
+ :param is_persistent: Initial (bool) catalog persistence flag if not None
266
+ :param clone_source: Initial catalog clone_source if not None
236
267
 
237
268
  The new catalog id will be returned in the response, and used
238
269
  in future catalog access. The use of the id parameter
@@ -252,9 +283,18 @@ class ErmrestCatalog(DerivaBinding):
252
283
  influences which client(s) are allowed to retry creation with
253
284
  the same id.
254
285
 
286
+ The name, description, is_persistent, and clone_source
287
+ parameters are passed through to the catalog creation service
288
+ to initialize those respective metadata fields of the new
289
+ catalog's registry entry. See ERMrest documentation for more
290
+ detail. Authorization failures may occur when attempting to
291
+ set the is_persistent flag. By default, these fields are not
292
+ initialized in the catalog creation request, and they instead
293
+ receive server-assigned defaults.
294
+
255
295
  """
256
296
  path = '/ermrest/catalog'
257
- r = deriva_server.post(path, json=cls._digest_catalog_args(id, owner))
297
+ r = deriva_server.post(path, json=cls._digest_catalog_args(id, owner, name, description, is_persistent, clone_source))
258
298
  r.raise_for_status()
259
299
  return cls.connect(deriva_server, r.json()['id'])
260
300
 
@@ -655,7 +695,8 @@ class ErmrestCatalog(DerivaBinding):
655
695
  copy_annotations=True,
656
696
  copy_policy=True,
657
697
  truncate_after=True,
658
- exclude_schemas=None):
698
+ exclude_schemas=None,
699
+ dst_properties=None):
659
700
  """Clone this catalog's content into dest_catalog, creating a new catalog if needed.
660
701
 
661
702
  :param dst_catalog: Destination catalog or None to request creation of new destination (default).
@@ -664,13 +705,22 @@ class ErmrestCatalog(DerivaBinding):
664
705
  :param copy_policy: Copy access-control policies when True (default).
665
706
  :param truncate_after: Truncate destination history after cloning when True (default).
666
707
  :param exclude_schemas: A list of schema names to exclude from the cloning process.
708
+ :param dst_properties: A dictionary of custom catalog-creation properties.
667
709
 
668
- When dest_catalog is provided, attempt an idempotent clone,
710
+ When dst_catalog is provided, attempt an idempotent clone,
669
711
  assuming content MAY be partially cloned already using the
670
712
  same parameters. This routine uses a table-level annotation
671
713
  "tag:isrd.isi.edu,2018:clone-state" to save progress markers
672
714
  which help it restart efficiently if interrupted.
673
715
 
716
+ When dst_catalog is not provided, a new catalog is
717
+ provisioned. The optional dst_properties can customize
718
+ metadata properties during this step:
719
+
720
+ - name: str
721
+ - description: str (markdown-formatted)
722
+ - is_persistent: boolean
723
+
674
724
  Cloning preserves source row RID values for application tables
675
725
  so that any RID-based foreign keys are still valid. It is not
676
726
  generally advisable to try to merge more than one source into
@@ -692,10 +742,33 @@ class ErmrestCatalog(DerivaBinding):
692
742
  session_config["allow_retry_on_all_methods"] = True
693
743
 
694
744
  if dst_catalog is None:
695
- # TODO: refactor with DerivaServer someday
696
- server = DerivaBinding(self._scheme, self._server, self._credentials, self._caching, session_config)
697
- dst_id = server.post("/ermrest/catalog").json()["id"]
698
- dst_catalog = ErmrestCatalog(self._scheme, self._server, dst_id, self._credentials, self._caching, session_config)
745
+ if dst_properties is not None:
746
+ if not isinstance(dst_properties, dict):
747
+ raise TypeError('dst_properties must be of type dict or None, not %s' % (type(dst_properties),))
748
+ else:
749
+ dst_properties = {}
750
+ kwargs = {
751
+ "name": dst_properties.get('name', 'Clone of %r' % (self._catalog_id,)),
752
+ "description": dst_properties.get(
753
+ 'description',
754
+ '''A cloned copy of catalog %r made with ErmrestCatalog.clone_catalog() using the following parameters:
755
+ - `copy_data`: %r
756
+ - `copy_annotations`: %r
757
+ - `copy_policy`: %r
758
+ - `truncate_after`: %r
759
+ - `exclude_schemas`: %r
760
+ ''' % (
761
+ self._catalog_id,
762
+ copy_data,
763
+ copy_annotations,
764
+ copy_policy,
765
+ truncate_after,
766
+ exclude_schemas,
767
+ )),
768
+ "clone_source": dst_properties.get('clone_source', self._catalog_id),
769
+ }
770
+ server = self.deriva_server
771
+ dst_catalog = server.create_ermrest_catalog(**kwargs)
699
772
 
700
773
  # set top-level config right away and find fatal usage errors...
701
774
  if copy_policy:
@@ -1051,8 +1124,8 @@ class ErmrestAlias(DerivaBinding):
1051
1124
  )
1052
1125
 
1053
1126
  @classmethod
1054
- def _digest_alias_args(cls, id, owner, alias_target):
1055
- rep = ErmrestCatalog._digest_catalog_args(id, owner)
1127
+ def _digest_alias_args(cls, id, owner, alias_target, name, description):
1128
+ rep = ErmrestCatalog._digest_catalog_args(id, owner, name, description)
1056
1129
 
1057
1130
  if isinstance(alias_target, (str, type(None))):
1058
1131
  rep['alias_target'] = alias_target
@@ -1064,13 +1137,15 @@ class ErmrestAlias(DerivaBinding):
1064
1137
  return rep
1065
1138
 
1066
1139
  @classmethod
1067
- def create(cls, deriva_server, id=None, owner=None, alias_target=None):
1140
+ def create(cls, deriva_server, id=None, owner=None, alias_target=None, name=None, description=None):
1068
1141
  """Create an ERMrest catalog alias.
1069
1142
 
1070
1143
  :param deriva_server: The DerivaServer binding which hosts ermrest
1071
1144
  :param id: The (str) id desired by the client (default None)
1072
1145
  :param owner: The initial (list of str) ACL desired by the client (default None)
1073
1146
  :param alias_target: The initial target catalog id desired by the client (default None)
1147
+ :param name: Initial (str) catalog name if not None
1148
+ :param description: Initial (str) catalog description if not None
1074
1149
 
1075
1150
  The new alias id will be returned in the response, and used
1076
1151
  in future alias access. The use of the id parameter
@@ -1095,9 +1170,14 @@ class ErmrestAlias(DerivaBinding):
1095
1170
  influences which client(s) are allowed to retry creation with
1096
1171
  the same id.
1097
1172
 
1173
+ The name and description parameters are passed through to the
1174
+ alias creation service to initialize those respective metadata
1175
+ fields of the new aliase's registry entry. See ERMrest
1176
+ documentation for more detail.
1177
+
1098
1178
  """
1099
1179
  path = '/ermrest/alias'
1100
- r = deriva_server.post(path, json=cls._digest_alias_args(id, owner, alias_target))
1180
+ r = deriva_server.post(path, json=cls._digest_alias_args(id, owner, alias_target, name, description))
1101
1181
  r.raise_for_status()
1102
1182
  return cls.connect(deriva_server, r.json()['id'])
1103
1183