UncountablePythonSDK 0.0.41__py3-none-any.whl → 0.0.42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of UncountablePythonSDK might be problematic. Click here for more details.
- {UncountablePythonSDK-0.0.41.dist-info → UncountablePythonSDK-0.0.42.dist-info}/METADATA +5 -1
- {UncountablePythonSDK-0.0.41.dist-info → UncountablePythonSDK-0.0.42.dist-info}/RECORD +122 -104
- docs/requirements.txt +3 -3
- examples/invoke_uploader.py +23 -0
- pkgs/argument_parser/argument_parser.py +1 -1
- pkgs/filesystem_utils/__init__.py +17 -0
- pkgs/filesystem_utils/_gdrive_session.py +306 -0
- pkgs/filesystem_utils/_local_session.py +69 -0
- pkgs/filesystem_utils/_sftp_session.py +147 -0
- pkgs/filesystem_utils/file_type_utils.py +61 -0
- pkgs/filesystem_utils/filesystem_session.py +39 -0
- pkgs/type_spec/emit_open_api.py +4 -2
- pkgs/type_spec/emit_open_api_util.py +4 -2
- pkgs/type_spec/emit_python.py +13 -14
- uncountable/core/file_upload.py +13 -3
- uncountable/integration/construct_client.py +1 -1
- uncountable/integration/cron.py +9 -6
- uncountable/integration/entrypoint.py +1 -1
- uncountable/integration/executors/executors.py +24 -0
- uncountable/integration/executors/generic_upload_executor.py +245 -0
- uncountable/integration/executors/script_executor.py +1 -1
- uncountable/integration/job.py +18 -1
- uncountable/integration/secret_retrieval/__init__.py +3 -0
- uncountable/integration/secret_retrieval/retrieve_secret.py +40 -0
- uncountable/integration/server.py +1 -1
- uncountable/types/__init__.py +8 -0
- uncountable/types/api/batch/execute_batch.py +5 -5
- uncountable/types/api/batch/execute_batch_load_async.py +3 -3
- uncountable/types/api/chemical/convert_chemical_formats.py +4 -4
- uncountable/types/api/entity/create_entities.py +4 -4
- uncountable/types/api/entity/create_entity.py +4 -4
- uncountable/types/api/entity/get_entities_data.py +4 -4
- uncountable/types/api/entity/list_entities.py +5 -5
- uncountable/types/api/entity/lock_entity.py +3 -3
- uncountable/types/api/entity/resolve_entity_ids.py +4 -4
- uncountable/types/api/entity/set_values.py +3 -3
- uncountable/types/api/entity/transition_entity_phase.py +5 -5
- uncountable/types/api/entity/unlock_entity.py +3 -3
- uncountable/types/api/equipment/associate_equipment_input.py +3 -3
- uncountable/types/api/field_options/upsert_field_options.py +4 -4
- uncountable/types/api/id_source/list_id_source.py +4 -4
- uncountable/types/api/id_source/match_id_source.py +4 -4
- uncountable/types/api/input_groups/get_input_group_names.py +4 -4
- uncountable/types/api/inputs/create_inputs.py +5 -5
- uncountable/types/api/inputs/get_input_data.py +7 -7
- uncountable/types/api/inputs/get_input_names.py +4 -4
- uncountable/types/api/inputs/get_inputs_data.py +7 -7
- uncountable/types/api/inputs/set_input_attribute_values.py +4 -4
- uncountable/types/api/inputs/set_input_category.py +3 -3
- uncountable/types/api/inputs/set_input_subcategories.py +3 -3
- uncountable/types/api/inputs/set_intermediate_type.py +3 -3
- uncountable/types/api/material_families/update_entity_material_families.py +3 -3
- uncountable/types/api/outputs/get_output_data.py +7 -7
- uncountable/types/api/outputs/get_output_names.py +4 -4
- uncountable/types/api/outputs/resolve_output_conditions.py +6 -6
- uncountable/types/api/permissions/set_core_permissions.py +7 -7
- uncountable/types/api/project/get_projects.py +4 -4
- uncountable/types/api/project/get_projects_data.py +4 -4
- uncountable/types/api/recipe_links/create_recipe_link.py +3 -3
- uncountable/types/api/recipe_links/remove_recipe_link.py +3 -3
- uncountable/types/api/recipe_metadata/get_recipe_metadata_data.py +4 -4
- uncountable/types/api/recipes/add_recipe_to_project.py +3 -3
- uncountable/types/api/recipes/archive_recipes.py +3 -3
- uncountable/types/api/recipes/associate_recipe_as_input.py +3 -3
- uncountable/types/api/recipes/associate_recipe_as_lot.py +3 -3
- uncountable/types/api/recipes/create_recipe.py +3 -3
- uncountable/types/api/recipes/create_recipes.py +5 -5
- uncountable/types/api/recipes/disassociate_recipe_as_input.py +3 -3
- uncountable/types/api/recipes/edit_recipe_inputs.py +12 -12
- uncountable/types/api/recipes/get_curve.py +3 -3
- uncountable/types/api/recipes/get_recipe_calculations.py +4 -4
- uncountable/types/api/recipes/get_recipe_links.py +3 -3
- uncountable/types/api/recipes/get_recipe_names.py +4 -4
- uncountable/types/api/recipes/get_recipe_output_metadata.py +4 -4
- uncountable/types/api/recipes/get_recipes_data.py +12 -12
- uncountable/types/api/recipes/lock_recipes.py +4 -4
- uncountable/types/api/recipes/remove_recipe_from_project.py +3 -3
- uncountable/types/api/recipes/set_recipe_inputs.py +4 -4
- uncountable/types/api/recipes/set_recipe_metadata.py +3 -3
- uncountable/types/api/recipes/set_recipe_output_annotations.py +7 -7
- uncountable/types/api/recipes/set_recipe_outputs.py +5 -5
- uncountable/types/api/recipes/set_recipe_tags.py +7 -7
- uncountable/types/api/recipes/unarchive_recipes.py +3 -3
- uncountable/types/api/recipes/unlock_recipes.py +3 -3
- uncountable/types/api/triggers/run_trigger.py +3 -3
- uncountable/types/api/uploader/__init__.py +1 -0
- uncountable/types/api/uploader/invoke_uploader.py +38 -0
- uncountable/types/async_batch_processor.py +36 -0
- uncountable/types/async_batch_t.py +6 -4
- uncountable/types/calculations_t.py +2 -2
- uncountable/types/chemical_structure_t.py +2 -2
- uncountable/types/client_base.py +25 -2
- uncountable/types/curves_t.py +3 -3
- uncountable/types/entity_t.py +2 -2
- uncountable/types/experiment_groups_t.py +2 -2
- uncountable/types/field_values_t.py +5 -5
- uncountable/types/fields_t.py +2 -2
- uncountable/types/generic_upload.py +9 -0
- uncountable/types/generic_upload_t.py +41 -0
- uncountable/types/id_source_t.py +5 -5
- uncountable/types/identifier_t.py +4 -4
- uncountable/types/input_attributes_t.py +2 -2
- uncountable/types/inputs_t.py +2 -2
- uncountable/types/job_definition.py +26 -0
- uncountable/types/job_definition_t.py +203 -0
- uncountable/types/outputs_t.py +2 -2
- uncountable/types/phases_t.py +2 -2
- uncountable/types/recipe_identifiers_t.py +4 -4
- uncountable/types/recipe_links_t.py +2 -2
- uncountable/types/recipe_metadata_t.py +4 -4
- uncountable/types/recipe_output_metadata_t.py +2 -2
- uncountable/types/recipe_tags_t.py +2 -2
- uncountable/types/recipe_workflow_steps_t.py +5 -5
- uncountable/types/recipes_t.py +2 -2
- uncountable/types/response_t.py +2 -2
- uncountable/types/secret_retrieval.py +12 -0
- uncountable/types/secret_retrieval_t.py +69 -0
- uncountable/types/units_t.py +2 -2
- uncountable/types/users_t.py +2 -2
- uncountable/types/workflows_t.py +3 -3
- uncountable/integration/types.py +0 -89
- {UncountablePythonSDK-0.0.41.dist-info → UncountablePythonSDK-0.0.42.dist-info}/WHEEL +0 -0
- {UncountablePythonSDK-0.0.41.dist-info → UncountablePythonSDK-0.0.42.dist-info}/top_level.txt +0 -0
pkgs/type_spec/emit_python.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
+
import dataclasses
|
|
1
2
|
import io
|
|
2
3
|
import os
|
|
3
|
-
from dataclasses import dataclass, field
|
|
4
4
|
from decimal import Decimal
|
|
5
5
|
from typing import Any, Optional
|
|
6
6
|
|
|
@@ -35,11 +35,11 @@ QUEUED_BATCH_REQUEST_STYPE = builder.SpecTypeDefnObject(
|
|
|
35
35
|
)
|
|
36
36
|
|
|
37
37
|
|
|
38
|
-
@dataclass(kw_only=True)
|
|
38
|
+
@dataclasses.dataclass(kw_only=True)
|
|
39
39
|
class TrackingContext:
|
|
40
40
|
namespace: Optional[builder.SpecNamespace] = None
|
|
41
|
-
namespaces: set[builder.SpecNamespace] = field(default_factory=set)
|
|
42
|
-
names: set[str] = field(default_factory=set)
|
|
41
|
+
namespaces: set[builder.SpecNamespace] = dataclasses.field(default_factory=set)
|
|
42
|
+
names: set[str] = dataclasses.field(default_factory=set)
|
|
43
43
|
|
|
44
44
|
use_enum: bool = False
|
|
45
45
|
use_serial_string_enum: bool = False
|
|
@@ -50,7 +50,7 @@ class TrackingContext:
|
|
|
50
50
|
use_opaque_key: bool = False
|
|
51
51
|
|
|
52
52
|
|
|
53
|
-
@dataclass(kw_only=True)
|
|
53
|
+
@dataclasses.dataclass(kw_only=True)
|
|
54
54
|
class Context(TrackingContext):
|
|
55
55
|
out: io.StringIO
|
|
56
56
|
namespace: builder.SpecNamespace
|
|
@@ -221,7 +221,7 @@ def _emit_types_imports(*, out: io.StringIO, ctx: Context) -> None:
|
|
|
221
221
|
if ctx.use_enum:
|
|
222
222
|
out.write("from pkgs.strenum_compat import StrEnum\n")
|
|
223
223
|
if ctx.use_dataclass:
|
|
224
|
-
out.write("
|
|
224
|
+
out.write("import dataclasses\n")
|
|
225
225
|
if ctx.use_serial_class:
|
|
226
226
|
out.write("from pkgs.serialization import serial_class\n")
|
|
227
227
|
if ctx.use_serial_union:
|
|
@@ -652,7 +652,7 @@ def _emit_string_enum(ctx: Context, stype: builder.SpecTypeDefnStringEnum) -> No
|
|
|
652
652
|
)
|
|
653
653
|
|
|
654
654
|
|
|
655
|
-
@dataclass
|
|
655
|
+
@dataclasses.dataclass
|
|
656
656
|
class EmittedPropertiesMetadata:
|
|
657
657
|
unconverted_keys: set[str]
|
|
658
658
|
unconverted_values: set[str]
|
|
@@ -719,7 +719,6 @@ def _emit_properties(
|
|
|
719
719
|
default = "None"
|
|
720
720
|
elif prop.has_default:
|
|
721
721
|
default = _emit_value(ctx, prop.spec_type, prop.default)
|
|
722
|
-
|
|
723
722
|
class_out.write(f"{INDENT * num_indent}{py_name}: {ref_type}")
|
|
724
723
|
if default:
|
|
725
724
|
class_out.write(f" = {default}")
|
|
@@ -833,7 +832,7 @@ def _emit_type(ctx: Context, stype: builder.SpecType) -> None:
|
|
|
833
832
|
|
|
834
833
|
ctx.out.write(")\n")
|
|
835
834
|
|
|
836
|
-
dataclass = "@dataclass"
|
|
835
|
+
dataclass = "@dataclasses.dataclass"
|
|
837
836
|
dc_args = []
|
|
838
837
|
if stype.is_kw_only():
|
|
839
838
|
dc_args.append("kw_only=True")
|
|
@@ -1159,7 +1158,7 @@ def _emit_api_argument_lookup(
|
|
|
1159
1158
|
for endpoint_root in builder.api_endpoints:
|
|
1160
1159
|
routes_output = config.routes_output[endpoint_root]
|
|
1161
1160
|
|
|
1162
|
-
imports = ["import typing", "
|
|
1161
|
+
imports = ["import typing", "import dataclasses"]
|
|
1163
1162
|
mappings = []
|
|
1164
1163
|
for namespace in sorted(
|
|
1165
1164
|
builder.namespaces.values(),
|
|
@@ -1212,13 +1211,13 @@ AT = typing.TypeVar("AT")
|
|
|
1212
1211
|
DT = typing.TypeVar("DT")
|
|
1213
1212
|
|
|
1214
1213
|
|
|
1215
|
-
@dataclass(kw_only=True, frozen=True)
|
|
1214
|
+
@dataclasses.dataclass(kw_only=True, frozen=True)
|
|
1216
1215
|
class ApiEndpointKey:
|
|
1217
1216
|
method: str
|
|
1218
1217
|
route: str
|
|
1219
1218
|
|
|
1220
1219
|
|
|
1221
|
-
@dataclass(kw_only=True)
|
|
1220
|
+
@dataclasses.dataclass(kw_only=True)
|
|
1222
1221
|
class ApiEndpointSpec(typing.Generic[AT, DT]):
|
|
1223
1222
|
route: str
|
|
1224
1223
|
arguments_type: type[AT]
|
|
@@ -1246,7 +1245,7 @@ __all__ = ["{API_ARGUMENTS_NAME}"]
|
|
|
1246
1245
|
CLIENT_CLASS_FILENAME = "client_base"
|
|
1247
1246
|
CLIENT_CLASS_IMPORTS = [
|
|
1248
1247
|
"from abc import ABC, abstractmethod",
|
|
1249
|
-
"
|
|
1248
|
+
"import dataclasses",
|
|
1250
1249
|
]
|
|
1251
1250
|
ASYNC_BATCH_PROCESSOR_FILENAME = "async_batch_processor"
|
|
1252
1251
|
ASYNC_BATCH_PROCESSOR_IMPORTS = [
|
|
@@ -1339,7 +1338,7 @@ def _emit_client_class(
|
|
|
1339
1338
|
DT = typing.TypeVar("DT")
|
|
1340
1339
|
|
|
1341
1340
|
|
|
1342
|
-
@dataclass(kw_only=True)
|
|
1341
|
+
@dataclasses.dataclass(kw_only=True)
|
|
1343
1342
|
class APIRequest:
|
|
1344
1343
|
method: str
|
|
1345
1344
|
endpoint: str
|
uncountable/core/file_upload.py
CHANGED
|
@@ -16,6 +16,7 @@ _CHUNK_SIZE = 5 * 1024 * 1024 # s3 requires 5MiB minimum
|
|
|
16
16
|
|
|
17
17
|
class FileUploadType(StrEnum):
|
|
18
18
|
MEDIA_FILE_UPLOAD = "MEDIA_FILE_UPLOAD"
|
|
19
|
+
DATA_FILE_UPLOAD = "DATA_FILE_UPLOAD"
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
@dataclass(kw_only=True)
|
|
@@ -26,7 +27,14 @@ class MediaFileUpload:
|
|
|
26
27
|
type: Literal[FileUploadType.MEDIA_FILE_UPLOAD] = FileUploadType.MEDIA_FILE_UPLOAD
|
|
27
28
|
|
|
28
29
|
|
|
29
|
-
|
|
30
|
+
@dataclass(kw_only=True)
|
|
31
|
+
class DataFileUpload:
|
|
32
|
+
data: BytesIO
|
|
33
|
+
name: str
|
|
34
|
+
type: Literal[FileUploadType.DATA_FILE_UPLOAD] = FileUploadType.DATA_FILE_UPLOAD
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
FileUpload = MediaFileUpload | DataFileUpload
|
|
30
38
|
|
|
31
39
|
|
|
32
40
|
@dataclass(kw_only=True)
|
|
@@ -37,12 +45,14 @@ class FileBytes:
|
|
|
37
45
|
|
|
38
46
|
@contextmanager
|
|
39
47
|
def file_upload_data(file_upload: FileUpload) -> Generator[FileBytes, None, None]:
|
|
40
|
-
match file_upload
|
|
41
|
-
case
|
|
48
|
+
match file_upload:
|
|
49
|
+
case MediaFileUpload():
|
|
42
50
|
with open(file_upload.path, "rb") as f:
|
|
43
51
|
yield FileBytes(
|
|
44
52
|
name=Path(file_upload.path).name, bytes_data=BytesIO(f.read())
|
|
45
53
|
)
|
|
54
|
+
case DataFileUpload():
|
|
55
|
+
yield FileBytes(name=file_upload.name, bytes_data=file_upload.data)
|
|
46
56
|
|
|
47
57
|
|
|
48
58
|
@dataclass(kw_only=True)
|
uncountable/integration/cron.py
CHANGED
|
@@ -2,9 +2,9 @@ from dataclasses import dataclass
|
|
|
2
2
|
|
|
3
3
|
from pkgs.argument_parser import CachedParser
|
|
4
4
|
from uncountable.integration.construct_client import construct_uncountable_client
|
|
5
|
-
from uncountable.integration.executors.
|
|
6
|
-
from uncountable.integration.job import CronJobArguments
|
|
7
|
-
from uncountable.
|
|
5
|
+
from uncountable.integration.executors.executors import resolve_executor
|
|
6
|
+
from uncountable.integration.job import CronJobArguments, JobLogger
|
|
7
|
+
from uncountable.types.job_definition_t import JobDefinition, ProfileMetadata
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
@dataclass
|
|
@@ -21,11 +21,14 @@ def cron_job_executor(**kwargs: dict) -> None:
|
|
|
21
21
|
args = CronJobArguments(
|
|
22
22
|
job_definition=args_passed.definition,
|
|
23
23
|
client=construct_uncountable_client(profile_meta=args_passed.profile_metadata),
|
|
24
|
+
profile_metadata=args_passed.profile_metadata,
|
|
25
|
+
logger=JobLogger(
|
|
26
|
+
profile_metadata=args_passed.profile_metadata,
|
|
27
|
+
job_definition=args_passed.definition,
|
|
28
|
+
),
|
|
24
29
|
)
|
|
25
30
|
|
|
26
|
-
job =
|
|
27
|
-
args_passed.definition.executor, args_passed.profile_metadata
|
|
28
|
-
)
|
|
31
|
+
job = resolve_executor(args_passed.definition.executor, args_passed.profile_metadata)
|
|
29
32
|
|
|
30
33
|
print(f"running job {args_passed.definition.name}")
|
|
31
34
|
|
|
@@ -4,7 +4,7 @@ from importlib import resources
|
|
|
4
4
|
from pkgs.argument_parser import CachedParser
|
|
5
5
|
from uncountable.integration.db.connect import create_db_engine
|
|
6
6
|
from uncountable.integration.server import IntegrationServer
|
|
7
|
-
from uncountable.
|
|
7
|
+
from uncountable.types.job_definition_t import ProfileDefinition
|
|
8
8
|
|
|
9
9
|
profile_parser = CachedParser(ProfileDefinition)
|
|
10
10
|
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from typing import assert_never
|
|
2
|
+
|
|
3
|
+
from uncountable.integration.executors.generic_upload_executor import GenericUploadJob
|
|
4
|
+
from uncountable.integration.executors.script_executor import resolve_script_executor
|
|
5
|
+
from uncountable.integration.job import Job
|
|
6
|
+
from uncountable.types import job_definition_t
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def resolve_executor(
|
|
10
|
+
job_executor: job_definition_t.JobExecutor,
|
|
11
|
+
profile_metadata: job_definition_t.ProfileMetadata,
|
|
12
|
+
) -> Job:
|
|
13
|
+
match job_executor:
|
|
14
|
+
case job_definition_t.JobExecutorScript():
|
|
15
|
+
return resolve_script_executor(
|
|
16
|
+
job_executor, profile_metadata=profile_metadata
|
|
17
|
+
)
|
|
18
|
+
case job_definition_t.JobExecutorGenericUpload():
|
|
19
|
+
return GenericUploadJob(
|
|
20
|
+
remote_directories=job_executor.remote_directories,
|
|
21
|
+
upload_strategy=job_executor.upload_strategy,
|
|
22
|
+
data_source=job_executor.data_source,
|
|
23
|
+
)
|
|
24
|
+
assert_never(job_executor)
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
|
|
6
|
+
import paramiko
|
|
7
|
+
|
|
8
|
+
from pkgs.filesystem_utils import (
|
|
9
|
+
FileObjectData,
|
|
10
|
+
FileSystemFileReference,
|
|
11
|
+
FileSystemObject,
|
|
12
|
+
FileSystemSFTPConfig,
|
|
13
|
+
FileTransfer,
|
|
14
|
+
SFTPSession,
|
|
15
|
+
)
|
|
16
|
+
from pkgs.filesystem_utils.filesystem_session import FileSystemSession
|
|
17
|
+
from uncountable.core.async_batch import AsyncBatchProcessor
|
|
18
|
+
from uncountable.core.file_upload import DataFileUpload, FileUpload
|
|
19
|
+
from uncountable.integration.job import Job, JobArguments, JobLogger, JobResult
|
|
20
|
+
from uncountable.integration.secret_retrieval import retrieve_secret
|
|
21
|
+
from uncountable.types.generic_upload_t import (
|
|
22
|
+
GenericRemoteDirectoryScope,
|
|
23
|
+
GenericUploadStrategy,
|
|
24
|
+
)
|
|
25
|
+
from uncountable.types.job_definition_t import (
|
|
26
|
+
GenericUploadDataSource,
|
|
27
|
+
GenericUploadDataSourceSFTP,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _filter_files_by_keyword(
|
|
32
|
+
remote_directory: GenericRemoteDirectoryScope,
|
|
33
|
+
files: list[FileObjectData],
|
|
34
|
+
logger: JobLogger,
|
|
35
|
+
) -> list[FileObjectData]:
|
|
36
|
+
if remote_directory.detection_keyword is None:
|
|
37
|
+
return files
|
|
38
|
+
|
|
39
|
+
raise NotImplementedError("keyword detection not implemented yet")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _filter_by_filename(
|
|
43
|
+
remote_directory: GenericRemoteDirectoryScope, files: list[FileSystemObject]
|
|
44
|
+
) -> list[FileSystemObject]:
|
|
45
|
+
if remote_directory.filename_regex is None:
|
|
46
|
+
return files
|
|
47
|
+
|
|
48
|
+
return [
|
|
49
|
+
file
|
|
50
|
+
for file in files
|
|
51
|
+
if file.filename is not None
|
|
52
|
+
and re.search(remote_directory.filename_regex, file.filename)
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _filter_by_file_extension(
|
|
57
|
+
remote_directory: GenericRemoteDirectoryScope, files: list[FileSystemObject]
|
|
58
|
+
) -> list[FileSystemObject]:
|
|
59
|
+
if remote_directory.valid_file_extensions is None:
|
|
60
|
+
return files
|
|
61
|
+
|
|
62
|
+
return [
|
|
63
|
+
file
|
|
64
|
+
for file in files
|
|
65
|
+
if file.filename is not None
|
|
66
|
+
and os.path.splitext(file.filename)[-1] in remote_directory.valid_file_extensions
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _filter_by_max_files(
|
|
71
|
+
remote_directory: GenericRemoteDirectoryScope, files: list[FileSystemObject]
|
|
72
|
+
) -> list[FileSystemObject]:
|
|
73
|
+
if remote_directory.max_files is None:
|
|
74
|
+
return files
|
|
75
|
+
|
|
76
|
+
return files[: remote_directory.max_files]
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _pull_remote_directory_data(
|
|
80
|
+
*,
|
|
81
|
+
filesystem_session: FileSystemSession,
|
|
82
|
+
remote_directory: GenericRemoteDirectoryScope,
|
|
83
|
+
logger: JobLogger,
|
|
84
|
+
) -> list[FileObjectData]:
|
|
85
|
+
files_to_pull = filesystem_session.list_files(
|
|
86
|
+
dir_path=FileSystemFileReference(
|
|
87
|
+
filepath=remote_directory.src_path,
|
|
88
|
+
),
|
|
89
|
+
recursive=remote_directory.recursive,
|
|
90
|
+
)
|
|
91
|
+
logger.log_info(
|
|
92
|
+
f"Pulled the following files {files_to_pull} from the remote directory {remote_directory}.",
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
files_to_pull = _filter_by_file_extension(remote_directory, files_to_pull)
|
|
96
|
+
files_to_pull = _filter_by_filename(remote_directory, files_to_pull)
|
|
97
|
+
files_to_pull = _filter_by_max_files(remote_directory, files_to_pull)
|
|
98
|
+
|
|
99
|
+
logger.log_info(
|
|
100
|
+
f"Accessing SFTP directory: {remote_directory.src_path} and pulling files: {', '.join([f.filename for f in files_to_pull if f.filename is not None])}",
|
|
101
|
+
)
|
|
102
|
+
return filesystem_session.download_files(files_to_pull)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _filter_downloaded_file_data(
|
|
106
|
+
remote_directory: GenericRemoteDirectoryScope,
|
|
107
|
+
pulled_file_data: list[FileObjectData],
|
|
108
|
+
logger: JobLogger,
|
|
109
|
+
) -> list[FileObjectData]:
|
|
110
|
+
filtered_file_data = _filter_files_by_keyword(
|
|
111
|
+
remote_directory=remote_directory, files=pulled_file_data, logger=logger
|
|
112
|
+
)
|
|
113
|
+
return filtered_file_data
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _move_files_post_upload(
|
|
117
|
+
*,
|
|
118
|
+
filesystem_session: FileSystemSession,
|
|
119
|
+
remote_directory_scope: GenericRemoteDirectoryScope,
|
|
120
|
+
success_file_paths: list[str],
|
|
121
|
+
failed_file_paths: list[str],
|
|
122
|
+
) -> None:
|
|
123
|
+
success_file_transfers: list[FileTransfer] = []
|
|
124
|
+
appended_text = ""
|
|
125
|
+
|
|
126
|
+
if remote_directory_scope.prepend_date_on_archive:
|
|
127
|
+
appended_text = f"-{datetime.now(timezone.utc).timestamp()}"
|
|
128
|
+
|
|
129
|
+
for file_path in success_file_paths:
|
|
130
|
+
filename = os.path.split(file_path)[-1]
|
|
131
|
+
root, extension = os.path.splitext(filename)
|
|
132
|
+
new_filename = f"{root}{appended_text}{extension}"
|
|
133
|
+
# format is source, dest in the tuple
|
|
134
|
+
success_file_transfers.append((
|
|
135
|
+
FileSystemFileReference(file_path),
|
|
136
|
+
FileSystemFileReference(
|
|
137
|
+
os.path.join(
|
|
138
|
+
remote_directory_scope.success_archive_path,
|
|
139
|
+
new_filename,
|
|
140
|
+
)
|
|
141
|
+
),
|
|
142
|
+
))
|
|
143
|
+
|
|
144
|
+
failed_file_transfers: list[FileTransfer] = []
|
|
145
|
+
for file_path in failed_file_paths:
|
|
146
|
+
filename = os.path.split(file_path)[-1]
|
|
147
|
+
root, extension = os.path.splitext(filename)
|
|
148
|
+
new_filename = f"{root}{appended_text}{extension}"
|
|
149
|
+
failed_file_transfers.append((
|
|
150
|
+
FileSystemFileReference(file_path),
|
|
151
|
+
FileSystemFileReference(
|
|
152
|
+
os.path.join(
|
|
153
|
+
remote_directory_scope.failure_archive_path,
|
|
154
|
+
new_filename,
|
|
155
|
+
)
|
|
156
|
+
),
|
|
157
|
+
))
|
|
158
|
+
|
|
159
|
+
filesystem_session.move_files([*success_file_transfers, *failed_file_transfers])
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class GenericUploadJob(Job):
|
|
163
|
+
def __init__(
|
|
164
|
+
self,
|
|
165
|
+
data_source: GenericUploadDataSource,
|
|
166
|
+
remote_directories: list[GenericRemoteDirectoryScope],
|
|
167
|
+
upload_strategy: GenericUploadStrategy,
|
|
168
|
+
) -> None:
|
|
169
|
+
super().__init__()
|
|
170
|
+
self.remote_directories = remote_directories
|
|
171
|
+
self.upload_strategy = upload_strategy
|
|
172
|
+
self.data_source = data_source
|
|
173
|
+
|
|
174
|
+
def _construct_filesystem_session(self, args: JobArguments) -> FileSystemSession:
|
|
175
|
+
match self.data_source:
|
|
176
|
+
case GenericUploadDataSourceSFTP():
|
|
177
|
+
pem_secret = retrieve_secret(
|
|
178
|
+
self.data_source.pem_secret, profile_metadata=args.profile_metadata
|
|
179
|
+
)
|
|
180
|
+
private_key = paramiko.AgentKey.from_private_key(io.StringIO(pem_secret))
|
|
181
|
+
sftp_config = FileSystemSFTPConfig(
|
|
182
|
+
ip=self.data_source.host,
|
|
183
|
+
username=self.data_source.username,
|
|
184
|
+
pem_key=private_key,
|
|
185
|
+
pem_path=None,
|
|
186
|
+
)
|
|
187
|
+
return SFTPSession(sftp_config=sftp_config)
|
|
188
|
+
|
|
189
|
+
def run(self, args: JobArguments) -> JobResult:
|
|
190
|
+
client = args.client
|
|
191
|
+
logger = args.logger
|
|
192
|
+
|
|
193
|
+
batch_executor = AsyncBatchProcessor(client=client)
|
|
194
|
+
with self._construct_filesystem_session(args) as filesystem_session:
|
|
195
|
+
files_to_upload: list[FileUpload] = []
|
|
196
|
+
for remote_directory in self.remote_directories:
|
|
197
|
+
pulled_file_data = _pull_remote_directory_data(
|
|
198
|
+
filesystem_session=filesystem_session,
|
|
199
|
+
remote_directory=remote_directory,
|
|
200
|
+
logger=logger,
|
|
201
|
+
)
|
|
202
|
+
filtered_file_data = _filter_downloaded_file_data(
|
|
203
|
+
remote_directory=remote_directory,
|
|
204
|
+
pulled_file_data=pulled_file_data,
|
|
205
|
+
logger=args.logger,
|
|
206
|
+
)
|
|
207
|
+
for file_data in filtered_file_data:
|
|
208
|
+
files_to_upload.append(
|
|
209
|
+
DataFileUpload(
|
|
210
|
+
data=io.BytesIO(file_data.file_data), name=file_data.filename
|
|
211
|
+
)
|
|
212
|
+
)
|
|
213
|
+
if not self.upload_strategy.skip_moving_files:
|
|
214
|
+
_move_files_post_upload(
|
|
215
|
+
filesystem_session=filesystem_session,
|
|
216
|
+
remote_directory_scope=remote_directory,
|
|
217
|
+
success_file_paths=[
|
|
218
|
+
file.filepath if file.filepath is not None else file.filename
|
|
219
|
+
for file in filtered_file_data
|
|
220
|
+
],
|
|
221
|
+
# IMPROVE: use triggers/webhooks to mark failed files as failed
|
|
222
|
+
failed_file_paths=[],
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
uploaded_files = client.upload_files(file_uploads=files_to_upload)
|
|
226
|
+
|
|
227
|
+
file_ids = [file.file_id for file in uploaded_files]
|
|
228
|
+
|
|
229
|
+
if self.upload_strategy.parse_files_individually:
|
|
230
|
+
for file_id in file_ids:
|
|
231
|
+
batch_executor.invoke_uploader(
|
|
232
|
+
file_ids=[file_id],
|
|
233
|
+
uploader_key=self.upload_strategy.uploader_key,
|
|
234
|
+
material_family_keys=self.upload_strategy.material_family_keys,
|
|
235
|
+
)
|
|
236
|
+
else:
|
|
237
|
+
batch_executor.invoke_uploader(
|
|
238
|
+
file_ids=file_ids,
|
|
239
|
+
uploader_key=self.upload_strategy.uploader_key,
|
|
240
|
+
material_family_keys=self.upload_strategy.material_family_keys,
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
batch_executor.send()
|
|
244
|
+
|
|
245
|
+
return JobResult(success=True)
|
|
@@ -3,7 +3,7 @@ import inspect
|
|
|
3
3
|
import os
|
|
4
4
|
|
|
5
5
|
from uncountable.integration.job import Job
|
|
6
|
-
from uncountable.
|
|
6
|
+
from uncountable.types.job_definition_t import JobExecutorScript, ProfileMetadata
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
def resolve_script_executor(
|
uncountable/integration/job.py
CHANGED
|
@@ -2,13 +2,30 @@ from abc import ABC, abstractmethod
|
|
|
2
2
|
from dataclasses import dataclass
|
|
3
3
|
|
|
4
4
|
from uncountable.core.client import Client
|
|
5
|
-
from uncountable.
|
|
5
|
+
from uncountable.types.job_definition_t import JobDefinition, ProfileMetadata
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class JobLogger:
|
|
9
|
+
def __init__(
|
|
10
|
+
self, *, profile_metadata: ProfileMetadata, job_definition: JobDefinition
|
|
11
|
+
) -> None:
|
|
12
|
+
self.profile_metadata = profile_metadata
|
|
13
|
+
self.job_definition = job_definition
|
|
14
|
+
|
|
15
|
+
def log_info(self, *log_objects: object) -> None:
|
|
16
|
+
# IMPROVE: log a json message with context that can be parsed by OT
|
|
17
|
+
print(
|
|
18
|
+
f"[{self.job_definition.id}] in profile ({self.profile_metadata.name}): ",
|
|
19
|
+
*log_objects,
|
|
20
|
+
)
|
|
6
21
|
|
|
7
22
|
|
|
8
23
|
@dataclass
|
|
9
24
|
class JobArgumentsBase:
|
|
10
25
|
job_definition: JobDefinition
|
|
26
|
+
profile_metadata: ProfileMetadata
|
|
11
27
|
client: Client
|
|
28
|
+
logger: JobLogger
|
|
12
29
|
|
|
13
30
|
|
|
14
31
|
@dataclass
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
from uncountable.types.job_definition_t import ProfileMetadata
|
|
4
|
+
from uncountable.types.secret_retrieval_t import (
|
|
5
|
+
SecretRetrieval,
|
|
6
|
+
SecretRetrievalAWS,
|
|
7
|
+
SecretRetrievalEnv,
|
|
8
|
+
)
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class SecretRetrievalError(BaseException):
|
|
12
|
+
def __init__(
|
|
13
|
+
self, secret_retrieval: SecretRetrieval, message: str | None = None
|
|
14
|
+
) -> None:
|
|
15
|
+
self.secret_retrieval = secret_retrieval
|
|
16
|
+
self.message = message
|
|
17
|
+
|
|
18
|
+
def __str__(self) -> str:
|
|
19
|
+
append_message = ""
|
|
20
|
+
if self.message is not None:
|
|
21
|
+
append_message = f": {self.message}"
|
|
22
|
+
return f"{self.secret_retrieval.type} secret retrieval failed{append_message}"
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
def retrieve_secret(
|
|
26
|
+
secret_retrieval: SecretRetrieval, profile_metadata: ProfileMetadata
|
|
27
|
+
) -> str:
|
|
28
|
+
match secret_retrieval:
|
|
29
|
+
case SecretRetrievalEnv():
|
|
30
|
+
env_name = (
|
|
31
|
+
f"UNC_{profile_metadata.name.upper()}_{secret_retrieval.env_key.upper()}"
|
|
32
|
+
)
|
|
33
|
+
secret = os.environ.get(env_name)
|
|
34
|
+
if secret is None:
|
|
35
|
+
raise SecretRetrievalError(
|
|
36
|
+
secret_retrieval, f"environment variable {env_name} missing"
|
|
37
|
+
)
|
|
38
|
+
return secret
|
|
39
|
+
case SecretRetrievalAWS():
|
|
40
|
+
raise NotImplementedError("aws secret retrieval not yet implemented")
|
|
@@ -11,7 +11,7 @@ from apscheduler.triggers.cron import CronTrigger
|
|
|
11
11
|
from sqlalchemy.engine.base import Engine
|
|
12
12
|
|
|
13
13
|
from uncountable.integration.cron import CronJobArgs, cron_job_executor
|
|
14
|
-
from uncountable.
|
|
14
|
+
from uncountable.types.job_definition_t import (
|
|
15
15
|
AuthRetrieval,
|
|
16
16
|
CronJobDefinition,
|
|
17
17
|
JobDefinition,
|
uncountable/types/__init__.py
CHANGED
|
@@ -28,6 +28,7 @@ from .api.batch import execute_batch_load_async as execute_batch_load_async_t
|
|
|
28
28
|
from . import experiment_groups_t as experiment_groups_t
|
|
29
29
|
from . import field_values_t as field_values_t
|
|
30
30
|
from . import fields_t as fields_t
|
|
31
|
+
from . import generic_upload_t as generic_upload_t
|
|
31
32
|
from .api.recipes import get_curve as get_curve_t
|
|
32
33
|
from .api.entity import get_entities_data as get_entities_data_t
|
|
33
34
|
from .api.inputs import get_input_data as get_input_data_t
|
|
@@ -48,6 +49,8 @@ from . import id_source_t as id_source_t
|
|
|
48
49
|
from . import identifier_t as identifier_t
|
|
49
50
|
from . import input_attributes_t as input_attributes_t
|
|
50
51
|
from . import inputs_t as inputs_t
|
|
52
|
+
from .api.uploader import invoke_uploader as invoke_uploader_t
|
|
53
|
+
from . import job_definition_t as job_definition_t
|
|
51
54
|
from .api.entity import list_entities as list_entities_t
|
|
52
55
|
from .api.id_source import list_id_source as list_id_source_t
|
|
53
56
|
from .api.entity import lock_entity as lock_entity_t
|
|
@@ -71,6 +74,7 @@ from .api.entity import resolve_entity_ids as resolve_entity_ids_t
|
|
|
71
74
|
from .api.outputs import resolve_output_conditions as resolve_output_conditions_t
|
|
72
75
|
from . import response_t as response_t
|
|
73
76
|
from .api.triggers import run_trigger as run_trigger_t
|
|
77
|
+
from . import secret_retrieval_t as secret_retrieval_t
|
|
74
78
|
from .api.permissions import set_core_permissions as set_core_permissions_t
|
|
75
79
|
from .api.inputs import set_input_attribute_values as set_input_attribute_values_t
|
|
76
80
|
from .api.inputs import set_input_category as set_input_category_t
|
|
@@ -119,6 +123,7 @@ __all__: list[str] = [
|
|
|
119
123
|
"experiment_groups_t",
|
|
120
124
|
"field_values_t",
|
|
121
125
|
"fields_t",
|
|
126
|
+
"generic_upload_t",
|
|
122
127
|
"get_curve_t",
|
|
123
128
|
"get_entities_data_t",
|
|
124
129
|
"get_input_data_t",
|
|
@@ -139,6 +144,8 @@ __all__: list[str] = [
|
|
|
139
144
|
"identifier_t",
|
|
140
145
|
"input_attributes_t",
|
|
141
146
|
"inputs_t",
|
|
147
|
+
"invoke_uploader_t",
|
|
148
|
+
"job_definition_t",
|
|
142
149
|
"list_entities_t",
|
|
143
150
|
"list_id_source_t",
|
|
144
151
|
"lock_entity_t",
|
|
@@ -162,6 +169,7 @@ __all__: list[str] = [
|
|
|
162
169
|
"resolve_output_conditions_t",
|
|
163
170
|
"response_t",
|
|
164
171
|
"run_trigger_t",
|
|
172
|
+
"secret_retrieval_t",
|
|
165
173
|
"set_core_permissions_t",
|
|
166
174
|
"set_input_attribute_values_t",
|
|
167
175
|
"set_input_category_t",
|
|
@@ -8,7 +8,7 @@ import typing # noqa: F401
|
|
|
8
8
|
import datetime # noqa: F401
|
|
9
9
|
from decimal import Decimal # noqa: F401
|
|
10
10
|
from pkgs.strenum_compat import StrEnum
|
|
11
|
-
|
|
11
|
+
import dataclasses
|
|
12
12
|
from pkgs.serialization import serial_class
|
|
13
13
|
from ... import base_t
|
|
14
14
|
|
|
@@ -43,7 +43,7 @@ class RequestMethod(StrEnum):
|
|
|
43
43
|
@serial_class(
|
|
44
44
|
unconverted_values={"data"},
|
|
45
45
|
)
|
|
46
|
-
@dataclass(kw_only=True)
|
|
46
|
+
@dataclasses.dataclass(kw_only=True)
|
|
47
47
|
class BatchRequest:
|
|
48
48
|
path: str
|
|
49
49
|
method: RequestMethod
|
|
@@ -51,7 +51,7 @@ class BatchRequest:
|
|
|
51
51
|
|
|
52
52
|
|
|
53
53
|
# DO NOT MODIFY -- This file is generated by type_spec
|
|
54
|
-
@dataclass(kw_only=True)
|
|
54
|
+
@dataclasses.dataclass(kw_only=True)
|
|
55
55
|
class Arguments:
|
|
56
56
|
requests: list[BatchRequest]
|
|
57
57
|
|
|
@@ -60,7 +60,7 @@ class Arguments:
|
|
|
60
60
|
@serial_class(
|
|
61
61
|
unconverted_values={"response"},
|
|
62
62
|
)
|
|
63
|
-
@dataclass(kw_only=True)
|
|
63
|
+
@dataclasses.dataclass(kw_only=True)
|
|
64
64
|
class BatchResponse:
|
|
65
65
|
path: str
|
|
66
66
|
method: RequestMethod
|
|
@@ -69,7 +69,7 @@ class BatchResponse:
|
|
|
69
69
|
|
|
70
70
|
|
|
71
71
|
# DO NOT MODIFY -- This file is generated by type_spec
|
|
72
|
-
@dataclass(kw_only=True)
|
|
72
|
+
@dataclasses.dataclass(kw_only=True)
|
|
73
73
|
class Data:
|
|
74
74
|
responses: list[BatchResponse]
|
|
75
75
|
# DO NOT MODIFY -- This file is generated by type_spec
|
|
@@ -7,7 +7,7 @@ from __future__ import annotations
|
|
|
7
7
|
import typing # noqa: F401
|
|
8
8
|
import datetime # noqa: F401
|
|
9
9
|
from decimal import Decimal # noqa: F401
|
|
10
|
-
|
|
10
|
+
import dataclasses
|
|
11
11
|
from ... import async_batch_t
|
|
12
12
|
from ... import base_t
|
|
13
13
|
|
|
@@ -23,13 +23,13 @@ ENDPOINT_PATH = "api/external/batch/execute_batch_load_async"
|
|
|
23
23
|
|
|
24
24
|
|
|
25
25
|
# DO NOT MODIFY -- This file is generated by type_spec
|
|
26
|
-
@dataclass(kw_only=True)
|
|
26
|
+
@dataclasses.dataclass(kw_only=True)
|
|
27
27
|
class Arguments:
|
|
28
28
|
requests: list[async_batch_t.AsyncBatchRequest]
|
|
29
29
|
|
|
30
30
|
|
|
31
31
|
# DO NOT MODIFY -- This file is generated by type_spec
|
|
32
|
-
@dataclass(kw_only=True)
|
|
32
|
+
@dataclasses.dataclass(kw_only=True)
|
|
33
33
|
class Data:
|
|
34
34
|
job_id: base_t.ObjectId
|
|
35
35
|
# DO NOT MODIFY -- This file is generated by type_spec
|