UncountablePythonSDK 0.0.41__py3-none-any.whl → 0.0.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of UncountablePythonSDK might be problematic. Click here for more details.

Files changed (123) hide show
  1. {UncountablePythonSDK-0.0.41.dist-info → UncountablePythonSDK-0.0.42.dist-info}/METADATA +5 -1
  2. {UncountablePythonSDK-0.0.41.dist-info → UncountablePythonSDK-0.0.42.dist-info}/RECORD +122 -104
  3. docs/requirements.txt +3 -3
  4. examples/invoke_uploader.py +23 -0
  5. pkgs/argument_parser/argument_parser.py +1 -1
  6. pkgs/filesystem_utils/__init__.py +17 -0
  7. pkgs/filesystem_utils/_gdrive_session.py +306 -0
  8. pkgs/filesystem_utils/_local_session.py +69 -0
  9. pkgs/filesystem_utils/_sftp_session.py +147 -0
  10. pkgs/filesystem_utils/file_type_utils.py +61 -0
  11. pkgs/filesystem_utils/filesystem_session.py +39 -0
  12. pkgs/type_spec/emit_open_api.py +4 -2
  13. pkgs/type_spec/emit_open_api_util.py +4 -2
  14. pkgs/type_spec/emit_python.py +13 -14
  15. uncountable/core/file_upload.py +13 -3
  16. uncountable/integration/construct_client.py +1 -1
  17. uncountable/integration/cron.py +9 -6
  18. uncountable/integration/entrypoint.py +1 -1
  19. uncountable/integration/executors/executors.py +24 -0
  20. uncountable/integration/executors/generic_upload_executor.py +245 -0
  21. uncountable/integration/executors/script_executor.py +1 -1
  22. uncountable/integration/job.py +18 -1
  23. uncountable/integration/secret_retrieval/__init__.py +3 -0
  24. uncountable/integration/secret_retrieval/retrieve_secret.py +40 -0
  25. uncountable/integration/server.py +1 -1
  26. uncountable/types/__init__.py +8 -0
  27. uncountable/types/api/batch/execute_batch.py +5 -5
  28. uncountable/types/api/batch/execute_batch_load_async.py +3 -3
  29. uncountable/types/api/chemical/convert_chemical_formats.py +4 -4
  30. uncountable/types/api/entity/create_entities.py +4 -4
  31. uncountable/types/api/entity/create_entity.py +4 -4
  32. uncountable/types/api/entity/get_entities_data.py +4 -4
  33. uncountable/types/api/entity/list_entities.py +5 -5
  34. uncountable/types/api/entity/lock_entity.py +3 -3
  35. uncountable/types/api/entity/resolve_entity_ids.py +4 -4
  36. uncountable/types/api/entity/set_values.py +3 -3
  37. uncountable/types/api/entity/transition_entity_phase.py +5 -5
  38. uncountable/types/api/entity/unlock_entity.py +3 -3
  39. uncountable/types/api/equipment/associate_equipment_input.py +3 -3
  40. uncountable/types/api/field_options/upsert_field_options.py +4 -4
  41. uncountable/types/api/id_source/list_id_source.py +4 -4
  42. uncountable/types/api/id_source/match_id_source.py +4 -4
  43. uncountable/types/api/input_groups/get_input_group_names.py +4 -4
  44. uncountable/types/api/inputs/create_inputs.py +5 -5
  45. uncountable/types/api/inputs/get_input_data.py +7 -7
  46. uncountable/types/api/inputs/get_input_names.py +4 -4
  47. uncountable/types/api/inputs/get_inputs_data.py +7 -7
  48. uncountable/types/api/inputs/set_input_attribute_values.py +4 -4
  49. uncountable/types/api/inputs/set_input_category.py +3 -3
  50. uncountable/types/api/inputs/set_input_subcategories.py +3 -3
  51. uncountable/types/api/inputs/set_intermediate_type.py +3 -3
  52. uncountable/types/api/material_families/update_entity_material_families.py +3 -3
  53. uncountable/types/api/outputs/get_output_data.py +7 -7
  54. uncountable/types/api/outputs/get_output_names.py +4 -4
  55. uncountable/types/api/outputs/resolve_output_conditions.py +6 -6
  56. uncountable/types/api/permissions/set_core_permissions.py +7 -7
  57. uncountable/types/api/project/get_projects.py +4 -4
  58. uncountable/types/api/project/get_projects_data.py +4 -4
  59. uncountable/types/api/recipe_links/create_recipe_link.py +3 -3
  60. uncountable/types/api/recipe_links/remove_recipe_link.py +3 -3
  61. uncountable/types/api/recipe_metadata/get_recipe_metadata_data.py +4 -4
  62. uncountable/types/api/recipes/add_recipe_to_project.py +3 -3
  63. uncountable/types/api/recipes/archive_recipes.py +3 -3
  64. uncountable/types/api/recipes/associate_recipe_as_input.py +3 -3
  65. uncountable/types/api/recipes/associate_recipe_as_lot.py +3 -3
  66. uncountable/types/api/recipes/create_recipe.py +3 -3
  67. uncountable/types/api/recipes/create_recipes.py +5 -5
  68. uncountable/types/api/recipes/disassociate_recipe_as_input.py +3 -3
  69. uncountable/types/api/recipes/edit_recipe_inputs.py +12 -12
  70. uncountable/types/api/recipes/get_curve.py +3 -3
  71. uncountable/types/api/recipes/get_recipe_calculations.py +4 -4
  72. uncountable/types/api/recipes/get_recipe_links.py +3 -3
  73. uncountable/types/api/recipes/get_recipe_names.py +4 -4
  74. uncountable/types/api/recipes/get_recipe_output_metadata.py +4 -4
  75. uncountable/types/api/recipes/get_recipes_data.py +12 -12
  76. uncountable/types/api/recipes/lock_recipes.py +4 -4
  77. uncountable/types/api/recipes/remove_recipe_from_project.py +3 -3
  78. uncountable/types/api/recipes/set_recipe_inputs.py +4 -4
  79. uncountable/types/api/recipes/set_recipe_metadata.py +3 -3
  80. uncountable/types/api/recipes/set_recipe_output_annotations.py +7 -7
  81. uncountable/types/api/recipes/set_recipe_outputs.py +5 -5
  82. uncountable/types/api/recipes/set_recipe_tags.py +7 -7
  83. uncountable/types/api/recipes/unarchive_recipes.py +3 -3
  84. uncountable/types/api/recipes/unlock_recipes.py +3 -3
  85. uncountable/types/api/triggers/run_trigger.py +3 -3
  86. uncountable/types/api/uploader/__init__.py +1 -0
  87. uncountable/types/api/uploader/invoke_uploader.py +38 -0
  88. uncountable/types/async_batch_processor.py +36 -0
  89. uncountable/types/async_batch_t.py +6 -4
  90. uncountable/types/calculations_t.py +2 -2
  91. uncountable/types/chemical_structure_t.py +2 -2
  92. uncountable/types/client_base.py +25 -2
  93. uncountable/types/curves_t.py +3 -3
  94. uncountable/types/entity_t.py +2 -2
  95. uncountable/types/experiment_groups_t.py +2 -2
  96. uncountable/types/field_values_t.py +5 -5
  97. uncountable/types/fields_t.py +2 -2
  98. uncountable/types/generic_upload.py +9 -0
  99. uncountable/types/generic_upload_t.py +41 -0
  100. uncountable/types/id_source_t.py +5 -5
  101. uncountable/types/identifier_t.py +4 -4
  102. uncountable/types/input_attributes_t.py +2 -2
  103. uncountable/types/inputs_t.py +2 -2
  104. uncountable/types/job_definition.py +26 -0
  105. uncountable/types/job_definition_t.py +203 -0
  106. uncountable/types/outputs_t.py +2 -2
  107. uncountable/types/phases_t.py +2 -2
  108. uncountable/types/recipe_identifiers_t.py +4 -4
  109. uncountable/types/recipe_links_t.py +2 -2
  110. uncountable/types/recipe_metadata_t.py +4 -4
  111. uncountable/types/recipe_output_metadata_t.py +2 -2
  112. uncountable/types/recipe_tags_t.py +2 -2
  113. uncountable/types/recipe_workflow_steps_t.py +5 -5
  114. uncountable/types/recipes_t.py +2 -2
  115. uncountable/types/response_t.py +2 -2
  116. uncountable/types/secret_retrieval.py +12 -0
  117. uncountable/types/secret_retrieval_t.py +69 -0
  118. uncountable/types/units_t.py +2 -2
  119. uncountable/types/users_t.py +2 -2
  120. uncountable/types/workflows_t.py +3 -3
  121. uncountable/integration/types.py +0 -89
  122. {UncountablePythonSDK-0.0.41.dist-info → UncountablePythonSDK-0.0.42.dist-info}/WHEEL +0 -0
  123. {UncountablePythonSDK-0.0.41.dist-info → UncountablePythonSDK-0.0.42.dist-info}/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
+ import dataclasses
1
2
  import io
2
3
  import os
3
- from dataclasses import dataclass, field
4
4
  from decimal import Decimal
5
5
  from typing import Any, Optional
6
6
 
@@ -35,11 +35,11 @@ QUEUED_BATCH_REQUEST_STYPE = builder.SpecTypeDefnObject(
35
35
  )
36
36
 
37
37
 
38
- @dataclass(kw_only=True)
38
+ @dataclasses.dataclass(kw_only=True)
39
39
  class TrackingContext:
40
40
  namespace: Optional[builder.SpecNamespace] = None
41
- namespaces: set[builder.SpecNamespace] = field(default_factory=set)
42
- names: set[str] = field(default_factory=set)
41
+ namespaces: set[builder.SpecNamespace] = dataclasses.field(default_factory=set)
42
+ names: set[str] = dataclasses.field(default_factory=set)
43
43
 
44
44
  use_enum: bool = False
45
45
  use_serial_string_enum: bool = False
@@ -50,7 +50,7 @@ class TrackingContext:
50
50
  use_opaque_key: bool = False
51
51
 
52
52
 
53
- @dataclass(kw_only=True)
53
+ @dataclasses.dataclass(kw_only=True)
54
54
  class Context(TrackingContext):
55
55
  out: io.StringIO
56
56
  namespace: builder.SpecNamespace
@@ -221,7 +221,7 @@ def _emit_types_imports(*, out: io.StringIO, ctx: Context) -> None:
221
221
  if ctx.use_enum:
222
222
  out.write("from pkgs.strenum_compat import StrEnum\n")
223
223
  if ctx.use_dataclass:
224
- out.write("from dataclasses import dataclass\n")
224
+ out.write("import dataclasses\n")
225
225
  if ctx.use_serial_class:
226
226
  out.write("from pkgs.serialization import serial_class\n")
227
227
  if ctx.use_serial_union:
@@ -652,7 +652,7 @@ def _emit_string_enum(ctx: Context, stype: builder.SpecTypeDefnStringEnum) -> No
652
652
  )
653
653
 
654
654
 
655
- @dataclass
655
+ @dataclasses.dataclass
656
656
  class EmittedPropertiesMetadata:
657
657
  unconverted_keys: set[str]
658
658
  unconverted_values: set[str]
@@ -719,7 +719,6 @@ def _emit_properties(
719
719
  default = "None"
720
720
  elif prop.has_default:
721
721
  default = _emit_value(ctx, prop.spec_type, prop.default)
722
-
723
722
  class_out.write(f"{INDENT * num_indent}{py_name}: {ref_type}")
724
723
  if default:
725
724
  class_out.write(f" = {default}")
@@ -833,7 +832,7 @@ def _emit_type(ctx: Context, stype: builder.SpecType) -> None:
833
832
 
834
833
  ctx.out.write(")\n")
835
834
 
836
- dataclass = "@dataclass"
835
+ dataclass = "@dataclasses.dataclass"
837
836
  dc_args = []
838
837
  if stype.is_kw_only():
839
838
  dc_args.append("kw_only=True")
@@ -1159,7 +1158,7 @@ def _emit_api_argument_lookup(
1159
1158
  for endpoint_root in builder.api_endpoints:
1160
1159
  routes_output = config.routes_output[endpoint_root]
1161
1160
 
1162
- imports = ["import typing", "from dataclasses import dataclass"]
1161
+ imports = ["import typing", "import dataclasses"]
1163
1162
  mappings = []
1164
1163
  for namespace in sorted(
1165
1164
  builder.namespaces.values(),
@@ -1212,13 +1211,13 @@ AT = typing.TypeVar("AT")
1212
1211
  DT = typing.TypeVar("DT")
1213
1212
 
1214
1213
 
1215
- @dataclass(kw_only=True, frozen=True)
1214
+ @dataclasses.dataclass(kw_only=True, frozen=True)
1216
1215
  class ApiEndpointKey:
1217
1216
  method: str
1218
1217
  route: str
1219
1218
 
1220
1219
 
1221
- @dataclass(kw_only=True)
1220
+ @dataclasses.dataclass(kw_only=True)
1222
1221
  class ApiEndpointSpec(typing.Generic[AT, DT]):
1223
1222
  route: str
1224
1223
  arguments_type: type[AT]
@@ -1246,7 +1245,7 @@ __all__ = ["{API_ARGUMENTS_NAME}"]
1246
1245
  CLIENT_CLASS_FILENAME = "client_base"
1247
1246
  CLIENT_CLASS_IMPORTS = [
1248
1247
  "from abc import ABC, abstractmethod",
1249
- "from dataclasses import dataclass",
1248
+ "import dataclasses",
1250
1249
  ]
1251
1250
  ASYNC_BATCH_PROCESSOR_FILENAME = "async_batch_processor"
1252
1251
  ASYNC_BATCH_PROCESSOR_IMPORTS = [
@@ -1339,7 +1338,7 @@ def _emit_client_class(
1339
1338
  DT = typing.TypeVar("DT")
1340
1339
 
1341
1340
 
1342
- @dataclass(kw_only=True)
1341
+ @dataclasses.dataclass(kw_only=True)
1343
1342
  class APIRequest:
1344
1343
  method: str
1345
1344
  endpoint: str
@@ -16,6 +16,7 @@ _CHUNK_SIZE = 5 * 1024 * 1024 # s3 requires 5MiB minimum
16
16
 
17
17
  class FileUploadType(StrEnum):
18
18
  MEDIA_FILE_UPLOAD = "MEDIA_FILE_UPLOAD"
19
+ DATA_FILE_UPLOAD = "DATA_FILE_UPLOAD"
19
20
 
20
21
 
21
22
  @dataclass(kw_only=True)
@@ -26,7 +27,14 @@ class MediaFileUpload:
26
27
  type: Literal[FileUploadType.MEDIA_FILE_UPLOAD] = FileUploadType.MEDIA_FILE_UPLOAD
27
28
 
28
29
 
29
- FileUpload = MediaFileUpload
30
+ @dataclass(kw_only=True)
31
+ class DataFileUpload:
32
+ data: BytesIO
33
+ name: str
34
+ type: Literal[FileUploadType.DATA_FILE_UPLOAD] = FileUploadType.DATA_FILE_UPLOAD
35
+
36
+
37
+ FileUpload = MediaFileUpload | DataFileUpload
30
38
 
31
39
 
32
40
  @dataclass(kw_only=True)
@@ -37,12 +45,14 @@ class FileBytes:
37
45
 
38
46
  @contextmanager
39
47
  def file_upload_data(file_upload: FileUpload) -> Generator[FileBytes, None, None]:
40
- match file_upload.type:
41
- case FileUploadType.MEDIA_FILE_UPLOAD:
48
+ match file_upload:
49
+ case MediaFileUpload():
42
50
  with open(file_upload.path, "rb") as f:
43
51
  yield FileBytes(
44
52
  name=Path(file_upload.path).name, bytes_data=BytesIO(f.read())
45
53
  )
54
+ case DataFileUpload():
55
+ yield FileBytes(name=file_upload.name, bytes_data=file_upload.data)
46
56
 
47
57
 
48
58
  @dataclass(kw_only=True)
@@ -2,7 +2,7 @@ import os
2
2
  from typing import assert_never
3
3
 
4
4
  from uncountable.core import AuthDetailsApiKey, Client
5
- from uncountable.integration.types import (
5
+ from uncountable.types.job_definition_t import (
6
6
  AuthRetrievalEnv,
7
7
  ProfileMetadata,
8
8
  )
@@ -2,9 +2,9 @@ from dataclasses import dataclass
2
2
 
3
3
  from pkgs.argument_parser import CachedParser
4
4
  from uncountable.integration.construct_client import construct_uncountable_client
5
- from uncountable.integration.executors.script_executor import resolve_script_executor
6
- from uncountable.integration.job import CronJobArguments
7
- from uncountable.integration.types import JobDefinition, ProfileMetadata
5
+ from uncountable.integration.executors.executors import resolve_executor
6
+ from uncountable.integration.job import CronJobArguments, JobLogger
7
+ from uncountable.types.job_definition_t import JobDefinition, ProfileMetadata
8
8
 
9
9
 
10
10
  @dataclass
@@ -21,11 +21,14 @@ def cron_job_executor(**kwargs: dict) -> None:
21
21
  args = CronJobArguments(
22
22
  job_definition=args_passed.definition,
23
23
  client=construct_uncountable_client(profile_meta=args_passed.profile_metadata),
24
+ profile_metadata=args_passed.profile_metadata,
25
+ logger=JobLogger(
26
+ profile_metadata=args_passed.profile_metadata,
27
+ job_definition=args_passed.definition,
28
+ ),
24
29
  )
25
30
 
26
- job = resolve_script_executor(
27
- args_passed.definition.executor, args_passed.profile_metadata
28
- )
31
+ job = resolve_executor(args_passed.definition.executor, args_passed.profile_metadata)
29
32
 
30
33
  print(f"running job {args_passed.definition.name}")
31
34
 
@@ -4,7 +4,7 @@ from importlib import resources
4
4
  from pkgs.argument_parser import CachedParser
5
5
  from uncountable.integration.db.connect import create_db_engine
6
6
  from uncountable.integration.server import IntegrationServer
7
- from uncountable.integration.types import ProfileDefinition
7
+ from uncountable.types.job_definition_t import ProfileDefinition
8
8
 
9
9
  profile_parser = CachedParser(ProfileDefinition)
10
10
 
@@ -0,0 +1,24 @@
1
+ from typing import assert_never
2
+
3
+ from uncountable.integration.executors.generic_upload_executor import GenericUploadJob
4
+ from uncountable.integration.executors.script_executor import resolve_script_executor
5
+ from uncountable.integration.job import Job
6
+ from uncountable.types import job_definition_t
7
+
8
+
9
+ def resolve_executor(
10
+ job_executor: job_definition_t.JobExecutor,
11
+ profile_metadata: job_definition_t.ProfileMetadata,
12
+ ) -> Job:
13
+ match job_executor:
14
+ case job_definition_t.JobExecutorScript():
15
+ return resolve_script_executor(
16
+ job_executor, profile_metadata=profile_metadata
17
+ )
18
+ case job_definition_t.JobExecutorGenericUpload():
19
+ return GenericUploadJob(
20
+ remote_directories=job_executor.remote_directories,
21
+ upload_strategy=job_executor.upload_strategy,
22
+ data_source=job_executor.data_source,
23
+ )
24
+ assert_never(job_executor)
@@ -0,0 +1,245 @@
1
+ import io
2
+ import os
3
+ import re
4
+ from datetime import datetime, timezone
5
+
6
+ import paramiko
7
+
8
+ from pkgs.filesystem_utils import (
9
+ FileObjectData,
10
+ FileSystemFileReference,
11
+ FileSystemObject,
12
+ FileSystemSFTPConfig,
13
+ FileTransfer,
14
+ SFTPSession,
15
+ )
16
+ from pkgs.filesystem_utils.filesystem_session import FileSystemSession
17
+ from uncountable.core.async_batch import AsyncBatchProcessor
18
+ from uncountable.core.file_upload import DataFileUpload, FileUpload
19
+ from uncountable.integration.job import Job, JobArguments, JobLogger, JobResult
20
+ from uncountable.integration.secret_retrieval import retrieve_secret
21
+ from uncountable.types.generic_upload_t import (
22
+ GenericRemoteDirectoryScope,
23
+ GenericUploadStrategy,
24
+ )
25
+ from uncountable.types.job_definition_t import (
26
+ GenericUploadDataSource,
27
+ GenericUploadDataSourceSFTP,
28
+ )
29
+
30
+
31
+ def _filter_files_by_keyword(
32
+ remote_directory: GenericRemoteDirectoryScope,
33
+ files: list[FileObjectData],
34
+ logger: JobLogger,
35
+ ) -> list[FileObjectData]:
36
+ if remote_directory.detection_keyword is None:
37
+ return files
38
+
39
+ raise NotImplementedError("keyword detection not implemented yet")
40
+
41
+
42
+ def _filter_by_filename(
43
+ remote_directory: GenericRemoteDirectoryScope, files: list[FileSystemObject]
44
+ ) -> list[FileSystemObject]:
45
+ if remote_directory.filename_regex is None:
46
+ return files
47
+
48
+ return [
49
+ file
50
+ for file in files
51
+ if file.filename is not None
52
+ and re.search(remote_directory.filename_regex, file.filename)
53
+ ]
54
+
55
+
56
+ def _filter_by_file_extension(
57
+ remote_directory: GenericRemoteDirectoryScope, files: list[FileSystemObject]
58
+ ) -> list[FileSystemObject]:
59
+ if remote_directory.valid_file_extensions is None:
60
+ return files
61
+
62
+ return [
63
+ file
64
+ for file in files
65
+ if file.filename is not None
66
+ and os.path.splitext(file.filename)[-1] in remote_directory.valid_file_extensions
67
+ ]
68
+
69
+
70
+ def _filter_by_max_files(
71
+ remote_directory: GenericRemoteDirectoryScope, files: list[FileSystemObject]
72
+ ) -> list[FileSystemObject]:
73
+ if remote_directory.max_files is None:
74
+ return files
75
+
76
+ return files[: remote_directory.max_files]
77
+
78
+
79
+ def _pull_remote_directory_data(
80
+ *,
81
+ filesystem_session: FileSystemSession,
82
+ remote_directory: GenericRemoteDirectoryScope,
83
+ logger: JobLogger,
84
+ ) -> list[FileObjectData]:
85
+ files_to_pull = filesystem_session.list_files(
86
+ dir_path=FileSystemFileReference(
87
+ filepath=remote_directory.src_path,
88
+ ),
89
+ recursive=remote_directory.recursive,
90
+ )
91
+ logger.log_info(
92
+ f"Pulled the following files {files_to_pull} from the remote directory {remote_directory}.",
93
+ )
94
+
95
+ files_to_pull = _filter_by_file_extension(remote_directory, files_to_pull)
96
+ files_to_pull = _filter_by_filename(remote_directory, files_to_pull)
97
+ files_to_pull = _filter_by_max_files(remote_directory, files_to_pull)
98
+
99
+ logger.log_info(
100
+ f"Accessing SFTP directory: {remote_directory.src_path} and pulling files: {', '.join([f.filename for f in files_to_pull if f.filename is not None])}",
101
+ )
102
+ return filesystem_session.download_files(files_to_pull)
103
+
104
+
105
+ def _filter_downloaded_file_data(
106
+ remote_directory: GenericRemoteDirectoryScope,
107
+ pulled_file_data: list[FileObjectData],
108
+ logger: JobLogger,
109
+ ) -> list[FileObjectData]:
110
+ filtered_file_data = _filter_files_by_keyword(
111
+ remote_directory=remote_directory, files=pulled_file_data, logger=logger
112
+ )
113
+ return filtered_file_data
114
+
115
+
116
+ def _move_files_post_upload(
117
+ *,
118
+ filesystem_session: FileSystemSession,
119
+ remote_directory_scope: GenericRemoteDirectoryScope,
120
+ success_file_paths: list[str],
121
+ failed_file_paths: list[str],
122
+ ) -> None:
123
+ success_file_transfers: list[FileTransfer] = []
124
+ appended_text = ""
125
+
126
+ if remote_directory_scope.prepend_date_on_archive:
127
+ appended_text = f"-{datetime.now(timezone.utc).timestamp()}"
128
+
129
+ for file_path in success_file_paths:
130
+ filename = os.path.split(file_path)[-1]
131
+ root, extension = os.path.splitext(filename)
132
+ new_filename = f"{root}{appended_text}{extension}"
133
+ # format is source, dest in the tuple
134
+ success_file_transfers.append((
135
+ FileSystemFileReference(file_path),
136
+ FileSystemFileReference(
137
+ os.path.join(
138
+ remote_directory_scope.success_archive_path,
139
+ new_filename,
140
+ )
141
+ ),
142
+ ))
143
+
144
+ failed_file_transfers: list[FileTransfer] = []
145
+ for file_path in failed_file_paths:
146
+ filename = os.path.split(file_path)[-1]
147
+ root, extension = os.path.splitext(filename)
148
+ new_filename = f"{root}{appended_text}{extension}"
149
+ failed_file_transfers.append((
150
+ FileSystemFileReference(file_path),
151
+ FileSystemFileReference(
152
+ os.path.join(
153
+ remote_directory_scope.failure_archive_path,
154
+ new_filename,
155
+ )
156
+ ),
157
+ ))
158
+
159
+ filesystem_session.move_files([*success_file_transfers, *failed_file_transfers])
160
+
161
+
162
+ class GenericUploadJob(Job):
163
+ def __init__(
164
+ self,
165
+ data_source: GenericUploadDataSource,
166
+ remote_directories: list[GenericRemoteDirectoryScope],
167
+ upload_strategy: GenericUploadStrategy,
168
+ ) -> None:
169
+ super().__init__()
170
+ self.remote_directories = remote_directories
171
+ self.upload_strategy = upload_strategy
172
+ self.data_source = data_source
173
+
174
+ def _construct_filesystem_session(self, args: JobArguments) -> FileSystemSession:
175
+ match self.data_source:
176
+ case GenericUploadDataSourceSFTP():
177
+ pem_secret = retrieve_secret(
178
+ self.data_source.pem_secret, profile_metadata=args.profile_metadata
179
+ )
180
+ private_key = paramiko.AgentKey.from_private_key(io.StringIO(pem_secret))
181
+ sftp_config = FileSystemSFTPConfig(
182
+ ip=self.data_source.host,
183
+ username=self.data_source.username,
184
+ pem_key=private_key,
185
+ pem_path=None,
186
+ )
187
+ return SFTPSession(sftp_config=sftp_config)
188
+
189
+ def run(self, args: JobArguments) -> JobResult:
190
+ client = args.client
191
+ logger = args.logger
192
+
193
+ batch_executor = AsyncBatchProcessor(client=client)
194
+ with self._construct_filesystem_session(args) as filesystem_session:
195
+ files_to_upload: list[FileUpload] = []
196
+ for remote_directory in self.remote_directories:
197
+ pulled_file_data = _pull_remote_directory_data(
198
+ filesystem_session=filesystem_session,
199
+ remote_directory=remote_directory,
200
+ logger=logger,
201
+ )
202
+ filtered_file_data = _filter_downloaded_file_data(
203
+ remote_directory=remote_directory,
204
+ pulled_file_data=pulled_file_data,
205
+ logger=args.logger,
206
+ )
207
+ for file_data in filtered_file_data:
208
+ files_to_upload.append(
209
+ DataFileUpload(
210
+ data=io.BytesIO(file_data.file_data), name=file_data.filename
211
+ )
212
+ )
213
+ if not self.upload_strategy.skip_moving_files:
214
+ _move_files_post_upload(
215
+ filesystem_session=filesystem_session,
216
+ remote_directory_scope=remote_directory,
217
+ success_file_paths=[
218
+ file.filepath if file.filepath is not None else file.filename
219
+ for file in filtered_file_data
220
+ ],
221
+ # IMPROVE: use triggers/webhooks to mark failed files as failed
222
+ failed_file_paths=[],
223
+ )
224
+
225
+ uploaded_files = client.upload_files(file_uploads=files_to_upload)
226
+
227
+ file_ids = [file.file_id for file in uploaded_files]
228
+
229
+ if self.upload_strategy.parse_files_individually:
230
+ for file_id in file_ids:
231
+ batch_executor.invoke_uploader(
232
+ file_ids=[file_id],
233
+ uploader_key=self.upload_strategy.uploader_key,
234
+ material_family_keys=self.upload_strategy.material_family_keys,
235
+ )
236
+ else:
237
+ batch_executor.invoke_uploader(
238
+ file_ids=file_ids,
239
+ uploader_key=self.upload_strategy.uploader_key,
240
+ material_family_keys=self.upload_strategy.material_family_keys,
241
+ )
242
+
243
+ batch_executor.send()
244
+
245
+ return JobResult(success=True)
@@ -3,7 +3,7 @@ import inspect
3
3
  import os
4
4
 
5
5
  from uncountable.integration.job import Job
6
- from uncountable.integration.types import JobExecutorScript, ProfileMetadata
6
+ from uncountable.types.job_definition_t import JobExecutorScript, ProfileMetadata
7
7
 
8
8
 
9
9
  def resolve_script_executor(
@@ -2,13 +2,30 @@ from abc import ABC, abstractmethod
2
2
  from dataclasses import dataclass
3
3
 
4
4
  from uncountable.core.client import Client
5
- from uncountable.integration.types import JobDefinition
5
+ from uncountable.types.job_definition_t import JobDefinition, ProfileMetadata
6
+
7
+
8
+ class JobLogger:
9
+ def __init__(
10
+ self, *, profile_metadata: ProfileMetadata, job_definition: JobDefinition
11
+ ) -> None:
12
+ self.profile_metadata = profile_metadata
13
+ self.job_definition = job_definition
14
+
15
+ def log_info(self, *log_objects: object) -> None:
16
+ # IMPROVE: log a json message with context that can be parsed by OT
17
+ print(
18
+ f"[{self.job_definition.id}] in profile ({self.profile_metadata.name}): ",
19
+ *log_objects,
20
+ )
6
21
 
7
22
 
8
23
  @dataclass
9
24
  class JobArgumentsBase:
10
25
  job_definition: JobDefinition
26
+ profile_metadata: ProfileMetadata
11
27
  client: Client
28
+ logger: JobLogger
12
29
 
13
30
 
14
31
  @dataclass
@@ -0,0 +1,3 @@
1
+ from .retrieve_secret import retrieve_secret
2
+
3
+ __all__: list[str] = ["retrieve_secret"]
@@ -0,0 +1,40 @@
1
+ import os
2
+
3
+ from uncountable.types.job_definition_t import ProfileMetadata
4
+ from uncountable.types.secret_retrieval_t import (
5
+ SecretRetrieval,
6
+ SecretRetrievalAWS,
7
+ SecretRetrievalEnv,
8
+ )
9
+
10
+
11
+ class SecretRetrievalError(BaseException):
12
+ def __init__(
13
+ self, secret_retrieval: SecretRetrieval, message: str | None = None
14
+ ) -> None:
15
+ self.secret_retrieval = secret_retrieval
16
+ self.message = message
17
+
18
+ def __str__(self) -> str:
19
+ append_message = ""
20
+ if self.message is not None:
21
+ append_message = f": {self.message}"
22
+ return f"{self.secret_retrieval.type} secret retrieval failed{append_message}"
23
+
24
+
25
+ def retrieve_secret(
26
+ secret_retrieval: SecretRetrieval, profile_metadata: ProfileMetadata
27
+ ) -> str:
28
+ match secret_retrieval:
29
+ case SecretRetrievalEnv():
30
+ env_name = (
31
+ f"UNC_{profile_metadata.name.upper()}_{secret_retrieval.env_key.upper()}"
32
+ )
33
+ secret = os.environ.get(env_name)
34
+ if secret is None:
35
+ raise SecretRetrievalError(
36
+ secret_retrieval, f"environment variable {env_name} missing"
37
+ )
38
+ return secret
39
+ case SecretRetrievalAWS():
40
+ raise NotImplementedError("aws secret retrieval not yet implemented")
@@ -11,7 +11,7 @@ from apscheduler.triggers.cron import CronTrigger
11
11
  from sqlalchemy.engine.base import Engine
12
12
 
13
13
  from uncountable.integration.cron import CronJobArgs, cron_job_executor
14
- from uncountable.integration.types import (
14
+ from uncountable.types.job_definition_t import (
15
15
  AuthRetrieval,
16
16
  CronJobDefinition,
17
17
  JobDefinition,
@@ -28,6 +28,7 @@ from .api.batch import execute_batch_load_async as execute_batch_load_async_t
28
28
  from . import experiment_groups_t as experiment_groups_t
29
29
  from . import field_values_t as field_values_t
30
30
  from . import fields_t as fields_t
31
+ from . import generic_upload_t as generic_upload_t
31
32
  from .api.recipes import get_curve as get_curve_t
32
33
  from .api.entity import get_entities_data as get_entities_data_t
33
34
  from .api.inputs import get_input_data as get_input_data_t
@@ -48,6 +49,8 @@ from . import id_source_t as id_source_t
48
49
  from . import identifier_t as identifier_t
49
50
  from . import input_attributes_t as input_attributes_t
50
51
  from . import inputs_t as inputs_t
52
+ from .api.uploader import invoke_uploader as invoke_uploader_t
53
+ from . import job_definition_t as job_definition_t
51
54
  from .api.entity import list_entities as list_entities_t
52
55
  from .api.id_source import list_id_source as list_id_source_t
53
56
  from .api.entity import lock_entity as lock_entity_t
@@ -71,6 +74,7 @@ from .api.entity import resolve_entity_ids as resolve_entity_ids_t
71
74
  from .api.outputs import resolve_output_conditions as resolve_output_conditions_t
72
75
  from . import response_t as response_t
73
76
  from .api.triggers import run_trigger as run_trigger_t
77
+ from . import secret_retrieval_t as secret_retrieval_t
74
78
  from .api.permissions import set_core_permissions as set_core_permissions_t
75
79
  from .api.inputs import set_input_attribute_values as set_input_attribute_values_t
76
80
  from .api.inputs import set_input_category as set_input_category_t
@@ -119,6 +123,7 @@ __all__: list[str] = [
119
123
  "experiment_groups_t",
120
124
  "field_values_t",
121
125
  "fields_t",
126
+ "generic_upload_t",
122
127
  "get_curve_t",
123
128
  "get_entities_data_t",
124
129
  "get_input_data_t",
@@ -139,6 +144,8 @@ __all__: list[str] = [
139
144
  "identifier_t",
140
145
  "input_attributes_t",
141
146
  "inputs_t",
147
+ "invoke_uploader_t",
148
+ "job_definition_t",
142
149
  "list_entities_t",
143
150
  "list_id_source_t",
144
151
  "lock_entity_t",
@@ -162,6 +169,7 @@ __all__: list[str] = [
162
169
  "resolve_output_conditions_t",
163
170
  "response_t",
164
171
  "run_trigger_t",
172
+ "secret_retrieval_t",
165
173
  "set_core_permissions_t",
166
174
  "set_input_attribute_values_t",
167
175
  "set_input_category_t",
@@ -8,7 +8,7 @@ import typing # noqa: F401
8
8
  import datetime # noqa: F401
9
9
  from decimal import Decimal # noqa: F401
10
10
  from pkgs.strenum_compat import StrEnum
11
- from dataclasses import dataclass
11
+ import dataclasses
12
12
  from pkgs.serialization import serial_class
13
13
  from ... import base_t
14
14
 
@@ -43,7 +43,7 @@ class RequestMethod(StrEnum):
43
43
  @serial_class(
44
44
  unconverted_values={"data"},
45
45
  )
46
- @dataclass(kw_only=True)
46
+ @dataclasses.dataclass(kw_only=True)
47
47
  class BatchRequest:
48
48
  path: str
49
49
  method: RequestMethod
@@ -51,7 +51,7 @@ class BatchRequest:
51
51
 
52
52
 
53
53
  # DO NOT MODIFY -- This file is generated by type_spec
54
- @dataclass(kw_only=True)
54
+ @dataclasses.dataclass(kw_only=True)
55
55
  class Arguments:
56
56
  requests: list[BatchRequest]
57
57
 
@@ -60,7 +60,7 @@ class Arguments:
60
60
  @serial_class(
61
61
  unconverted_values={"response"},
62
62
  )
63
- @dataclass(kw_only=True)
63
+ @dataclasses.dataclass(kw_only=True)
64
64
  class BatchResponse:
65
65
  path: str
66
66
  method: RequestMethod
@@ -69,7 +69,7 @@ class BatchResponse:
69
69
 
70
70
 
71
71
  # DO NOT MODIFY -- This file is generated by type_spec
72
- @dataclass(kw_only=True)
72
+ @dataclasses.dataclass(kw_only=True)
73
73
  class Data:
74
74
  responses: list[BatchResponse]
75
75
  # DO NOT MODIFY -- This file is generated by type_spec
@@ -7,7 +7,7 @@ from __future__ import annotations
7
7
  import typing # noqa: F401
8
8
  import datetime # noqa: F401
9
9
  from decimal import Decimal # noqa: F401
10
- from dataclasses import dataclass
10
+ import dataclasses
11
11
  from ... import async_batch_t
12
12
  from ... import base_t
13
13
 
@@ -23,13 +23,13 @@ ENDPOINT_PATH = "api/external/batch/execute_batch_load_async"
23
23
 
24
24
 
25
25
  # DO NOT MODIFY -- This file is generated by type_spec
26
- @dataclass(kw_only=True)
26
+ @dataclasses.dataclass(kw_only=True)
27
27
  class Arguments:
28
28
  requests: list[async_batch_t.AsyncBatchRequest]
29
29
 
30
30
 
31
31
  # DO NOT MODIFY -- This file is generated by type_spec
32
- @dataclass(kw_only=True)
32
+ @dataclasses.dataclass(kw_only=True)
33
33
  class Data:
34
34
  job_id: base_t.ObjectId
35
35
  # DO NOT MODIFY -- This file is generated by type_spec