UncountablePythonSDK 0.0.40__py3-none-any.whl → 0.0.42__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of UncountablePythonSDK might be problematic. Click here for more details.
- {UncountablePythonSDK-0.0.40.dist-info → UncountablePythonSDK-0.0.42.dist-info}/METADATA +5 -1
- {UncountablePythonSDK-0.0.40.dist-info → UncountablePythonSDK-0.0.42.dist-info}/RECORD +126 -107
- docs/requirements.txt +3 -3
- examples/invoke_uploader.py +23 -0
- pkgs/argument_parser/argument_parser.py +2 -2
- pkgs/filesystem_utils/__init__.py +17 -0
- pkgs/filesystem_utils/_gdrive_session.py +306 -0
- pkgs/filesystem_utils/_local_session.py +69 -0
- pkgs/filesystem_utils/_sftp_session.py +147 -0
- pkgs/filesystem_utils/file_type_utils.py +61 -0
- pkgs/filesystem_utils/filesystem_session.py +39 -0
- pkgs/serialization/yaml.py +54 -0
- pkgs/type_spec/config.py +1 -13
- pkgs/type_spec/emit_open_api.py +7 -5
- pkgs/type_spec/emit_open_api_util.py +4 -2
- pkgs/type_spec/emit_python.py +13 -14
- pkgs/type_spec/load_types.py +2 -1
- pkgs/type_spec/value_spec/__main__.py +1 -1
- uncountable/core/file_upload.py +13 -3
- uncountable/integration/construct_client.py +1 -1
- uncountable/integration/cron.py +11 -6
- uncountable/integration/entrypoint.py +6 -7
- uncountable/integration/executors/executors.py +24 -0
- uncountable/integration/executors/generic_upload_executor.py +245 -0
- uncountable/integration/executors/script_executor.py +1 -1
- uncountable/integration/job.py +19 -2
- uncountable/integration/secret_retrieval/__init__.py +3 -0
- uncountable/integration/secret_retrieval/retrieve_secret.py +40 -0
- uncountable/integration/server.py +1 -1
- uncountable/types/__init__.py +8 -0
- uncountable/types/api/batch/execute_batch.py +5 -5
- uncountable/types/api/batch/execute_batch_load_async.py +3 -3
- uncountable/types/api/chemical/convert_chemical_formats.py +4 -4
- uncountable/types/api/entity/create_entities.py +4 -4
- uncountable/types/api/entity/create_entity.py +4 -4
- uncountable/types/api/entity/get_entities_data.py +4 -4
- uncountable/types/api/entity/list_entities.py +5 -5
- uncountable/types/api/entity/lock_entity.py +3 -3
- uncountable/types/api/entity/resolve_entity_ids.py +4 -4
- uncountable/types/api/entity/set_values.py +3 -3
- uncountable/types/api/entity/transition_entity_phase.py +5 -5
- uncountable/types/api/entity/unlock_entity.py +3 -3
- uncountable/types/api/equipment/associate_equipment_input.py +3 -3
- uncountable/types/api/field_options/upsert_field_options.py +4 -4
- uncountable/types/api/id_source/list_id_source.py +4 -4
- uncountable/types/api/id_source/match_id_source.py +4 -4
- uncountable/types/api/input_groups/get_input_group_names.py +4 -4
- uncountable/types/api/inputs/create_inputs.py +5 -5
- uncountable/types/api/inputs/get_input_data.py +7 -7
- uncountable/types/api/inputs/get_input_names.py +4 -4
- uncountable/types/api/inputs/get_inputs_data.py +7 -7
- uncountable/types/api/inputs/set_input_attribute_values.py +4 -4
- uncountable/types/api/inputs/set_input_category.py +3 -3
- uncountable/types/api/inputs/set_input_subcategories.py +3 -3
- uncountable/types/api/inputs/set_intermediate_type.py +3 -3
- uncountable/types/api/material_families/update_entity_material_families.py +3 -3
- uncountable/types/api/outputs/get_output_data.py +7 -7
- uncountable/types/api/outputs/get_output_names.py +4 -4
- uncountable/types/api/outputs/resolve_output_conditions.py +6 -6
- uncountable/types/api/permissions/set_core_permissions.py +7 -7
- uncountable/types/api/project/get_projects.py +4 -4
- uncountable/types/api/project/get_projects_data.py +4 -4
- uncountable/types/api/recipe_links/create_recipe_link.py +3 -3
- uncountable/types/api/recipe_links/remove_recipe_link.py +3 -3
- uncountable/types/api/recipe_metadata/get_recipe_metadata_data.py +4 -4
- uncountable/types/api/recipes/add_recipe_to_project.py +3 -3
- uncountable/types/api/recipes/archive_recipes.py +3 -3
- uncountable/types/api/recipes/associate_recipe_as_input.py +3 -3
- uncountable/types/api/recipes/associate_recipe_as_lot.py +3 -3
- uncountable/types/api/recipes/create_recipe.py +3 -3
- uncountable/types/api/recipes/create_recipes.py +5 -5
- uncountable/types/api/recipes/disassociate_recipe_as_input.py +3 -3
- uncountable/types/api/recipes/edit_recipe_inputs.py +12 -12
- uncountable/types/api/recipes/get_curve.py +3 -3
- uncountable/types/api/recipes/get_recipe_calculations.py +4 -4
- uncountable/types/api/recipes/get_recipe_links.py +3 -3
- uncountable/types/api/recipes/get_recipe_names.py +4 -4
- uncountable/types/api/recipes/get_recipe_output_metadata.py +4 -4
- uncountable/types/api/recipes/get_recipes_data.py +12 -12
- uncountable/types/api/recipes/lock_recipes.py +4 -4
- uncountable/types/api/recipes/remove_recipe_from_project.py +3 -3
- uncountable/types/api/recipes/set_recipe_inputs.py +4 -4
- uncountable/types/api/recipes/set_recipe_metadata.py +3 -3
- uncountable/types/api/recipes/set_recipe_output_annotations.py +7 -7
- uncountable/types/api/recipes/set_recipe_outputs.py +5 -5
- uncountable/types/api/recipes/set_recipe_tags.py +7 -7
- uncountable/types/api/recipes/unarchive_recipes.py +3 -3
- uncountable/types/api/recipes/unlock_recipes.py +3 -3
- uncountable/types/api/triggers/run_trigger.py +3 -3
- uncountable/types/api/uploader/__init__.py +1 -0
- uncountable/types/api/uploader/invoke_uploader.py +38 -0
- uncountable/types/async_batch_processor.py +36 -0
- uncountable/types/async_batch_t.py +6 -4
- uncountable/types/calculations_t.py +2 -2
- uncountable/types/chemical_structure_t.py +2 -2
- uncountable/types/client_base.py +25 -2
- uncountable/types/curves_t.py +3 -3
- uncountable/types/entity_t.py +2 -2
- uncountable/types/experiment_groups_t.py +2 -2
- uncountable/types/field_values_t.py +5 -5
- uncountable/types/fields_t.py +2 -2
- uncountable/types/generic_upload.py +9 -0
- uncountable/types/generic_upload_t.py +41 -0
- uncountable/types/id_source_t.py +5 -5
- uncountable/types/identifier_t.py +4 -4
- uncountable/types/input_attributes_t.py +2 -2
- uncountable/types/inputs_t.py +2 -2
- uncountable/types/job_definition.py +26 -0
- uncountable/types/job_definition_t.py +203 -0
- uncountable/types/outputs_t.py +2 -2
- uncountable/types/phases_t.py +2 -2
- uncountable/types/recipe_identifiers_t.py +4 -4
- uncountable/types/recipe_links_t.py +2 -2
- uncountable/types/recipe_metadata_t.py +4 -4
- uncountable/types/recipe_output_metadata_t.py +2 -2
- uncountable/types/recipe_tags_t.py +2 -2
- uncountable/types/recipe_workflow_steps_t.py +5 -5
- uncountable/types/recipes_t.py +2 -2
- uncountable/types/response_t.py +2 -2
- uncountable/types/secret_retrieval.py +12 -0
- uncountable/types/secret_retrieval_t.py +69 -0
- uncountable/types/units_t.py +2 -2
- uncountable/types/users_t.py +2 -2
- uncountable/types/workflows_t.py +3 -3
- uncountable/integration/types.py +0 -89
- {UncountablePythonSDK-0.0.40.dist-info → UncountablePythonSDK-0.0.42.dist-info}/WHEEL +0 -0
- {UncountablePythonSDK-0.0.40.dist-info → UncountablePythonSDK-0.0.42.dist-info}/top_level.txt +0 -0
pkgs/type_spec/config.py
CHANGED
|
@@ -1,10 +1,9 @@
|
|
|
1
1
|
import os
|
|
2
2
|
from collections.abc import Callable, Mapping
|
|
3
3
|
from dataclasses import dataclass
|
|
4
|
-
from decimal import Decimal
|
|
5
4
|
from typing import Self, TypeVar
|
|
6
5
|
|
|
7
|
-
import yaml
|
|
6
|
+
from pkgs.serialization import yaml
|
|
8
7
|
|
|
9
8
|
ConfigValueType = str | None | Mapping[str, str | None] | list[str]
|
|
10
9
|
|
|
@@ -103,17 +102,6 @@ def _parse_language(config_class: type[_T], raw_value: ConfigValueType) -> _T:
|
|
|
103
102
|
return config_class(**raw_value)
|
|
104
103
|
|
|
105
104
|
|
|
106
|
-
def _decimal_constructor(loader, node): # type:ignore
|
|
107
|
-
value = loader.construct_scalar(node)
|
|
108
|
-
return Decimal(value)
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
# A semi-acceptable patch to force a number to be parsed as a decimal, since pyyaml
|
|
112
|
-
# parses them as lossy floats otherwise. Though a bit ugly, at least this way we have
|
|
113
|
-
# support for decimal constants
|
|
114
|
-
yaml.SafeLoader.add_constructor("!decimal", _decimal_constructor)
|
|
115
|
-
|
|
116
|
-
|
|
117
105
|
def parse_yaml_config(config_file: str) -> Config:
|
|
118
106
|
with open(config_file, encoding="utf-8") as input:
|
|
119
107
|
raw_config: dict[str, ConfigValueType] = yaml.safe_load(input)
|
pkgs/type_spec/emit_open_api.py
CHANGED
|
@@ -9,7 +9,8 @@ import json
|
|
|
9
9
|
import re
|
|
10
10
|
from typing import Collection, cast
|
|
11
11
|
|
|
12
|
-
import yaml
|
|
12
|
+
from pkgs.serialization import yaml
|
|
13
|
+
from pkgs.serialization_util.serialization_helpers import serialize_for_api
|
|
13
14
|
|
|
14
15
|
from . import builder, util
|
|
15
16
|
from .builder import EndpointGuideKey, RootGuideKey
|
|
@@ -169,7 +170,7 @@ def _serialize_global_context(ctx: EmitOpenAPIGlobalContext) -> str:
|
|
|
169
170
|
oa_paths[path.path] = {"$ref": path.ref}
|
|
170
171
|
oa_root["paths"] = oa_paths
|
|
171
172
|
|
|
172
|
-
return yaml.
|
|
173
|
+
return yaml.dumps(oa_root, sort_keys=False)
|
|
173
174
|
|
|
174
175
|
|
|
175
176
|
def _is_empty_object_type(typ: OpenAPIType) -> bool:
|
|
@@ -413,7 +414,7 @@ def _emit_namespace(
|
|
|
413
414
|
|
|
414
415
|
path = f"{config.types_output}/common/{'/'.join(namespace.path)}.yaml"
|
|
415
416
|
oa_namespace = {"components": oa_components}
|
|
416
|
-
_rewrite_with_notice(path, yaml.
|
|
417
|
+
_rewrite_with_notice(path, yaml.dumps(oa_namespace, sort_keys=False))
|
|
417
418
|
|
|
418
419
|
|
|
419
420
|
def _emit_type(
|
|
@@ -568,6 +569,7 @@ def _emit_endpoint(
|
|
|
568
569
|
description = f"**[External API-Endpoint]** <br/> {description}"
|
|
569
570
|
|
|
570
571
|
path_cutoff = min(3, len(namespace.path) - 1)
|
|
572
|
+
|
|
571
573
|
ctx.endpoint = EmitOpenAPIEndpoint(
|
|
572
574
|
method=namespace.endpoint.method.lower(),
|
|
573
575
|
tags=[tag_name],
|
|
@@ -580,8 +582,8 @@ def _emit_endpoint(
|
|
|
580
582
|
ref_name=f"ex_{i}",
|
|
581
583
|
summary=example.summary,
|
|
582
584
|
description=example.description,
|
|
583
|
-
arguments=example.arguments,
|
|
584
|
-
data=example.data,
|
|
585
|
+
arguments=serialize_for_api(example.arguments),
|
|
586
|
+
data=serialize_for_api(example.data),
|
|
585
587
|
)
|
|
586
588
|
for i, example in enumerate(endpoint_examples)
|
|
587
589
|
],
|
|
@@ -8,6 +8,8 @@ from collections import defaultdict
|
|
|
8
8
|
from dataclasses import dataclass, field
|
|
9
9
|
from typing import TypeAlias
|
|
10
10
|
|
|
11
|
+
from pkgs.serialization_util.serialization_helpers import JsonValue
|
|
12
|
+
|
|
11
13
|
from . import builder
|
|
12
14
|
from .open_api_util import OpenAPIType
|
|
13
15
|
|
|
@@ -68,8 +70,8 @@ class EmitOpenAPIEndpointExample:
|
|
|
68
70
|
ref_name: str
|
|
69
71
|
summary: str
|
|
70
72
|
description: str
|
|
71
|
-
arguments: dict[str,
|
|
72
|
-
data: dict[str,
|
|
73
|
+
arguments: dict[str, JsonValue]
|
|
74
|
+
data: dict[str, JsonValue]
|
|
73
75
|
|
|
74
76
|
|
|
75
77
|
EmitOpenAPIStabilityLevel = builder.StabilityLevel
|
pkgs/type_spec/emit_python.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
|
+
import dataclasses
|
|
1
2
|
import io
|
|
2
3
|
import os
|
|
3
|
-
from dataclasses import dataclass, field
|
|
4
4
|
from decimal import Decimal
|
|
5
5
|
from typing import Any, Optional
|
|
6
6
|
|
|
@@ -35,11 +35,11 @@ QUEUED_BATCH_REQUEST_STYPE = builder.SpecTypeDefnObject(
|
|
|
35
35
|
)
|
|
36
36
|
|
|
37
37
|
|
|
38
|
-
@dataclass(kw_only=True)
|
|
38
|
+
@dataclasses.dataclass(kw_only=True)
|
|
39
39
|
class TrackingContext:
|
|
40
40
|
namespace: Optional[builder.SpecNamespace] = None
|
|
41
|
-
namespaces: set[builder.SpecNamespace] = field(default_factory=set)
|
|
42
|
-
names: set[str] = field(default_factory=set)
|
|
41
|
+
namespaces: set[builder.SpecNamespace] = dataclasses.field(default_factory=set)
|
|
42
|
+
names: set[str] = dataclasses.field(default_factory=set)
|
|
43
43
|
|
|
44
44
|
use_enum: bool = False
|
|
45
45
|
use_serial_string_enum: bool = False
|
|
@@ -50,7 +50,7 @@ class TrackingContext:
|
|
|
50
50
|
use_opaque_key: bool = False
|
|
51
51
|
|
|
52
52
|
|
|
53
|
-
@dataclass(kw_only=True)
|
|
53
|
+
@dataclasses.dataclass(kw_only=True)
|
|
54
54
|
class Context(TrackingContext):
|
|
55
55
|
out: io.StringIO
|
|
56
56
|
namespace: builder.SpecNamespace
|
|
@@ -221,7 +221,7 @@ def _emit_types_imports(*, out: io.StringIO, ctx: Context) -> None:
|
|
|
221
221
|
if ctx.use_enum:
|
|
222
222
|
out.write("from pkgs.strenum_compat import StrEnum\n")
|
|
223
223
|
if ctx.use_dataclass:
|
|
224
|
-
out.write("
|
|
224
|
+
out.write("import dataclasses\n")
|
|
225
225
|
if ctx.use_serial_class:
|
|
226
226
|
out.write("from pkgs.serialization import serial_class\n")
|
|
227
227
|
if ctx.use_serial_union:
|
|
@@ -652,7 +652,7 @@ def _emit_string_enum(ctx: Context, stype: builder.SpecTypeDefnStringEnum) -> No
|
|
|
652
652
|
)
|
|
653
653
|
|
|
654
654
|
|
|
655
|
-
@dataclass
|
|
655
|
+
@dataclasses.dataclass
|
|
656
656
|
class EmittedPropertiesMetadata:
|
|
657
657
|
unconverted_keys: set[str]
|
|
658
658
|
unconverted_values: set[str]
|
|
@@ -719,7 +719,6 @@ def _emit_properties(
|
|
|
719
719
|
default = "None"
|
|
720
720
|
elif prop.has_default:
|
|
721
721
|
default = _emit_value(ctx, prop.spec_type, prop.default)
|
|
722
|
-
|
|
723
722
|
class_out.write(f"{INDENT * num_indent}{py_name}: {ref_type}")
|
|
724
723
|
if default:
|
|
725
724
|
class_out.write(f" = {default}")
|
|
@@ -833,7 +832,7 @@ def _emit_type(ctx: Context, stype: builder.SpecType) -> None:
|
|
|
833
832
|
|
|
834
833
|
ctx.out.write(")\n")
|
|
835
834
|
|
|
836
|
-
dataclass = "@dataclass"
|
|
835
|
+
dataclass = "@dataclasses.dataclass"
|
|
837
836
|
dc_args = []
|
|
838
837
|
if stype.is_kw_only():
|
|
839
838
|
dc_args.append("kw_only=True")
|
|
@@ -1159,7 +1158,7 @@ def _emit_api_argument_lookup(
|
|
|
1159
1158
|
for endpoint_root in builder.api_endpoints:
|
|
1160
1159
|
routes_output = config.routes_output[endpoint_root]
|
|
1161
1160
|
|
|
1162
|
-
imports = ["import typing", "
|
|
1161
|
+
imports = ["import typing", "import dataclasses"]
|
|
1163
1162
|
mappings = []
|
|
1164
1163
|
for namespace in sorted(
|
|
1165
1164
|
builder.namespaces.values(),
|
|
@@ -1212,13 +1211,13 @@ AT = typing.TypeVar("AT")
|
|
|
1212
1211
|
DT = typing.TypeVar("DT")
|
|
1213
1212
|
|
|
1214
1213
|
|
|
1215
|
-
@dataclass(kw_only=True, frozen=True)
|
|
1214
|
+
@dataclasses.dataclass(kw_only=True, frozen=True)
|
|
1216
1215
|
class ApiEndpointKey:
|
|
1217
1216
|
method: str
|
|
1218
1217
|
route: str
|
|
1219
1218
|
|
|
1220
1219
|
|
|
1221
|
-
@dataclass(kw_only=True)
|
|
1220
|
+
@dataclasses.dataclass(kw_only=True)
|
|
1222
1221
|
class ApiEndpointSpec(typing.Generic[AT, DT]):
|
|
1223
1222
|
route: str
|
|
1224
1223
|
arguments_type: type[AT]
|
|
@@ -1246,7 +1245,7 @@ __all__ = ["{API_ARGUMENTS_NAME}"]
|
|
|
1246
1245
|
CLIENT_CLASS_FILENAME = "client_base"
|
|
1247
1246
|
CLIENT_CLASS_IMPORTS = [
|
|
1248
1247
|
"from abc import ABC, abstractmethod",
|
|
1249
|
-
"
|
|
1248
|
+
"import dataclasses",
|
|
1250
1249
|
]
|
|
1251
1250
|
ASYNC_BATCH_PROCESSOR_FILENAME = "async_batch_processor"
|
|
1252
1251
|
ASYNC_BATCH_PROCESSOR_IMPORTS = [
|
|
@@ -1339,7 +1338,7 @@ def _emit_client_class(
|
|
|
1339
1338
|
DT = typing.TypeVar("DT")
|
|
1340
1339
|
|
|
1341
1340
|
|
|
1342
|
-
@dataclass(kw_only=True)
|
|
1341
|
+
@dataclasses.dataclass(kw_only=True)
|
|
1343
1342
|
class APIRequest:
|
|
1344
1343
|
method: str
|
|
1345
1344
|
endpoint: str
|
pkgs/type_spec/load_types.py
CHANGED
|
@@ -20,9 +20,9 @@ import sys
|
|
|
20
20
|
from typing import TypeVar, cast
|
|
21
21
|
|
|
22
22
|
import regex as re
|
|
23
|
-
import yaml
|
|
24
23
|
|
|
25
24
|
from main.base.types import base_t, value_spec_t
|
|
25
|
+
from pkgs.serialization import yaml
|
|
26
26
|
|
|
27
27
|
from ..util import parse_type_str, rewrite_file
|
|
28
28
|
from .convert_type import convert_to_value_spec_type
|
uncountable/core/file_upload.py
CHANGED
|
@@ -16,6 +16,7 @@ _CHUNK_SIZE = 5 * 1024 * 1024 # s3 requires 5MiB minimum
|
|
|
16
16
|
|
|
17
17
|
class FileUploadType(StrEnum):
|
|
18
18
|
MEDIA_FILE_UPLOAD = "MEDIA_FILE_UPLOAD"
|
|
19
|
+
DATA_FILE_UPLOAD = "DATA_FILE_UPLOAD"
|
|
19
20
|
|
|
20
21
|
|
|
21
22
|
@dataclass(kw_only=True)
|
|
@@ -26,7 +27,14 @@ class MediaFileUpload:
|
|
|
26
27
|
type: Literal[FileUploadType.MEDIA_FILE_UPLOAD] = FileUploadType.MEDIA_FILE_UPLOAD
|
|
27
28
|
|
|
28
29
|
|
|
29
|
-
|
|
30
|
+
@dataclass(kw_only=True)
|
|
31
|
+
class DataFileUpload:
|
|
32
|
+
data: BytesIO
|
|
33
|
+
name: str
|
|
34
|
+
type: Literal[FileUploadType.DATA_FILE_UPLOAD] = FileUploadType.DATA_FILE_UPLOAD
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
FileUpload = MediaFileUpload | DataFileUpload
|
|
30
38
|
|
|
31
39
|
|
|
32
40
|
@dataclass(kw_only=True)
|
|
@@ -37,12 +45,14 @@ class FileBytes:
|
|
|
37
45
|
|
|
38
46
|
@contextmanager
|
|
39
47
|
def file_upload_data(file_upload: FileUpload) -> Generator[FileBytes, None, None]:
|
|
40
|
-
match file_upload
|
|
41
|
-
case
|
|
48
|
+
match file_upload:
|
|
49
|
+
case MediaFileUpload():
|
|
42
50
|
with open(file_upload.path, "rb") as f:
|
|
43
51
|
yield FileBytes(
|
|
44
52
|
name=Path(file_upload.path).name, bytes_data=BytesIO(f.read())
|
|
45
53
|
)
|
|
54
|
+
case DataFileUpload():
|
|
55
|
+
yield FileBytes(name=file_upload.name, bytes_data=file_upload.data)
|
|
46
56
|
|
|
47
57
|
|
|
48
58
|
@dataclass(kw_only=True)
|
uncountable/integration/cron.py
CHANGED
|
@@ -2,9 +2,9 @@ from dataclasses import dataclass
|
|
|
2
2
|
|
|
3
3
|
from pkgs.argument_parser import CachedParser
|
|
4
4
|
from uncountable.integration.construct_client import construct_uncountable_client
|
|
5
|
-
from uncountable.integration.executors.
|
|
6
|
-
from uncountable.integration.job import CronJobArguments
|
|
7
|
-
from uncountable.
|
|
5
|
+
from uncountable.integration.executors.executors import resolve_executor
|
|
6
|
+
from uncountable.integration.job import CronJobArguments, JobLogger
|
|
7
|
+
from uncountable.types.job_definition_t import JobDefinition, ProfileMetadata
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
@dataclass
|
|
@@ -21,10 +21,15 @@ def cron_job_executor(**kwargs: dict) -> None:
|
|
|
21
21
|
args = CronJobArguments(
|
|
22
22
|
job_definition=args_passed.definition,
|
|
23
23
|
client=construct_uncountable_client(profile_meta=args_passed.profile_metadata),
|
|
24
|
+
profile_metadata=args_passed.profile_metadata,
|
|
25
|
+
logger=JobLogger(
|
|
26
|
+
profile_metadata=args_passed.profile_metadata,
|
|
27
|
+
job_definition=args_passed.definition,
|
|
28
|
+
),
|
|
24
29
|
)
|
|
25
30
|
|
|
26
|
-
job =
|
|
27
|
-
|
|
28
|
-
)
|
|
31
|
+
job = resolve_executor(args_passed.definition.executor, args_passed.profile_metadata)
|
|
32
|
+
|
|
33
|
+
print(f"running job {args_passed.definition.name}")
|
|
29
34
|
|
|
30
35
|
job.run(args=args)
|
|
@@ -4,15 +4,14 @@ from importlib import resources
|
|
|
4
4
|
from pkgs.argument_parser import CachedParser
|
|
5
5
|
from uncountable.integration.db.connect import create_db_engine
|
|
6
6
|
from uncountable.integration.server import IntegrationServer
|
|
7
|
-
from uncountable.
|
|
7
|
+
from uncountable.types.job_definition_t import ProfileDefinition
|
|
8
8
|
|
|
9
9
|
profile_parser = CachedParser(ProfileDefinition)
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
def main() -> None:
|
|
12
|
+
def main(blocking: bool) -> None:
|
|
13
13
|
profiles_module = os.environ["UNC_PROFILES_MODULE"]
|
|
14
14
|
with IntegrationServer(create_db_engine()) as server:
|
|
15
|
-
# TODO: Loop through all job spec yaml files and call server.add_job
|
|
16
15
|
profiles = [
|
|
17
16
|
entry
|
|
18
17
|
for entry in resources.files(profiles_module).iterdir()
|
|
@@ -26,7 +25,7 @@ def main() -> None:
|
|
|
26
25
|
resource="profile.yaml",
|
|
27
26
|
)
|
|
28
27
|
except FileNotFoundError as e:
|
|
29
|
-
print("WARN: profile.yaml not found", e)
|
|
28
|
+
print(f"WARN: profile.yaml not found for {profile_name}", e)
|
|
30
29
|
continue
|
|
31
30
|
server.register_profile(
|
|
32
31
|
profile_name=profile_name,
|
|
@@ -35,8 +34,8 @@ def main() -> None:
|
|
|
35
34
|
jobs=profile.jobs,
|
|
36
35
|
)
|
|
37
36
|
|
|
38
|
-
|
|
37
|
+
if blocking:
|
|
38
|
+
server.serve_forever()
|
|
39
39
|
|
|
40
40
|
|
|
41
|
-
|
|
42
|
-
main()
|
|
41
|
+
main(__name__ == "__main__")
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
from typing import assert_never
|
|
2
|
+
|
|
3
|
+
from uncountable.integration.executors.generic_upload_executor import GenericUploadJob
|
|
4
|
+
from uncountable.integration.executors.script_executor import resolve_script_executor
|
|
5
|
+
from uncountable.integration.job import Job
|
|
6
|
+
from uncountable.types import job_definition_t
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def resolve_executor(
|
|
10
|
+
job_executor: job_definition_t.JobExecutor,
|
|
11
|
+
profile_metadata: job_definition_t.ProfileMetadata,
|
|
12
|
+
) -> Job:
|
|
13
|
+
match job_executor:
|
|
14
|
+
case job_definition_t.JobExecutorScript():
|
|
15
|
+
return resolve_script_executor(
|
|
16
|
+
job_executor, profile_metadata=profile_metadata
|
|
17
|
+
)
|
|
18
|
+
case job_definition_t.JobExecutorGenericUpload():
|
|
19
|
+
return GenericUploadJob(
|
|
20
|
+
remote_directories=job_executor.remote_directories,
|
|
21
|
+
upload_strategy=job_executor.upload_strategy,
|
|
22
|
+
data_source=job_executor.data_source,
|
|
23
|
+
)
|
|
24
|
+
assert_never(job_executor)
|
|
@@ -0,0 +1,245 @@
|
|
|
1
|
+
import io
|
|
2
|
+
import os
|
|
3
|
+
import re
|
|
4
|
+
from datetime import datetime, timezone
|
|
5
|
+
|
|
6
|
+
import paramiko
|
|
7
|
+
|
|
8
|
+
from pkgs.filesystem_utils import (
|
|
9
|
+
FileObjectData,
|
|
10
|
+
FileSystemFileReference,
|
|
11
|
+
FileSystemObject,
|
|
12
|
+
FileSystemSFTPConfig,
|
|
13
|
+
FileTransfer,
|
|
14
|
+
SFTPSession,
|
|
15
|
+
)
|
|
16
|
+
from pkgs.filesystem_utils.filesystem_session import FileSystemSession
|
|
17
|
+
from uncountable.core.async_batch import AsyncBatchProcessor
|
|
18
|
+
from uncountable.core.file_upload import DataFileUpload, FileUpload
|
|
19
|
+
from uncountable.integration.job import Job, JobArguments, JobLogger, JobResult
|
|
20
|
+
from uncountable.integration.secret_retrieval import retrieve_secret
|
|
21
|
+
from uncountable.types.generic_upload_t import (
|
|
22
|
+
GenericRemoteDirectoryScope,
|
|
23
|
+
GenericUploadStrategy,
|
|
24
|
+
)
|
|
25
|
+
from uncountable.types.job_definition_t import (
|
|
26
|
+
GenericUploadDataSource,
|
|
27
|
+
GenericUploadDataSourceSFTP,
|
|
28
|
+
)
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _filter_files_by_keyword(
|
|
32
|
+
remote_directory: GenericRemoteDirectoryScope,
|
|
33
|
+
files: list[FileObjectData],
|
|
34
|
+
logger: JobLogger,
|
|
35
|
+
) -> list[FileObjectData]:
|
|
36
|
+
if remote_directory.detection_keyword is None:
|
|
37
|
+
return files
|
|
38
|
+
|
|
39
|
+
raise NotImplementedError("keyword detection not implemented yet")
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _filter_by_filename(
|
|
43
|
+
remote_directory: GenericRemoteDirectoryScope, files: list[FileSystemObject]
|
|
44
|
+
) -> list[FileSystemObject]:
|
|
45
|
+
if remote_directory.filename_regex is None:
|
|
46
|
+
return files
|
|
47
|
+
|
|
48
|
+
return [
|
|
49
|
+
file
|
|
50
|
+
for file in files
|
|
51
|
+
if file.filename is not None
|
|
52
|
+
and re.search(remote_directory.filename_regex, file.filename)
|
|
53
|
+
]
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def _filter_by_file_extension(
|
|
57
|
+
remote_directory: GenericRemoteDirectoryScope, files: list[FileSystemObject]
|
|
58
|
+
) -> list[FileSystemObject]:
|
|
59
|
+
if remote_directory.valid_file_extensions is None:
|
|
60
|
+
return files
|
|
61
|
+
|
|
62
|
+
return [
|
|
63
|
+
file
|
|
64
|
+
for file in files
|
|
65
|
+
if file.filename is not None
|
|
66
|
+
and os.path.splitext(file.filename)[-1] in remote_directory.valid_file_extensions
|
|
67
|
+
]
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
def _filter_by_max_files(
|
|
71
|
+
remote_directory: GenericRemoteDirectoryScope, files: list[FileSystemObject]
|
|
72
|
+
) -> list[FileSystemObject]:
|
|
73
|
+
if remote_directory.max_files is None:
|
|
74
|
+
return files
|
|
75
|
+
|
|
76
|
+
return files[: remote_directory.max_files]
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def _pull_remote_directory_data(
|
|
80
|
+
*,
|
|
81
|
+
filesystem_session: FileSystemSession,
|
|
82
|
+
remote_directory: GenericRemoteDirectoryScope,
|
|
83
|
+
logger: JobLogger,
|
|
84
|
+
) -> list[FileObjectData]:
|
|
85
|
+
files_to_pull = filesystem_session.list_files(
|
|
86
|
+
dir_path=FileSystemFileReference(
|
|
87
|
+
filepath=remote_directory.src_path,
|
|
88
|
+
),
|
|
89
|
+
recursive=remote_directory.recursive,
|
|
90
|
+
)
|
|
91
|
+
logger.log_info(
|
|
92
|
+
f"Pulled the following files {files_to_pull} from the remote directory {remote_directory}.",
|
|
93
|
+
)
|
|
94
|
+
|
|
95
|
+
files_to_pull = _filter_by_file_extension(remote_directory, files_to_pull)
|
|
96
|
+
files_to_pull = _filter_by_filename(remote_directory, files_to_pull)
|
|
97
|
+
files_to_pull = _filter_by_max_files(remote_directory, files_to_pull)
|
|
98
|
+
|
|
99
|
+
logger.log_info(
|
|
100
|
+
f"Accessing SFTP directory: {remote_directory.src_path} and pulling files: {', '.join([f.filename for f in files_to_pull if f.filename is not None])}",
|
|
101
|
+
)
|
|
102
|
+
return filesystem_session.download_files(files_to_pull)
|
|
103
|
+
|
|
104
|
+
|
|
105
|
+
def _filter_downloaded_file_data(
|
|
106
|
+
remote_directory: GenericRemoteDirectoryScope,
|
|
107
|
+
pulled_file_data: list[FileObjectData],
|
|
108
|
+
logger: JobLogger,
|
|
109
|
+
) -> list[FileObjectData]:
|
|
110
|
+
filtered_file_data = _filter_files_by_keyword(
|
|
111
|
+
remote_directory=remote_directory, files=pulled_file_data, logger=logger
|
|
112
|
+
)
|
|
113
|
+
return filtered_file_data
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _move_files_post_upload(
|
|
117
|
+
*,
|
|
118
|
+
filesystem_session: FileSystemSession,
|
|
119
|
+
remote_directory_scope: GenericRemoteDirectoryScope,
|
|
120
|
+
success_file_paths: list[str],
|
|
121
|
+
failed_file_paths: list[str],
|
|
122
|
+
) -> None:
|
|
123
|
+
success_file_transfers: list[FileTransfer] = []
|
|
124
|
+
appended_text = ""
|
|
125
|
+
|
|
126
|
+
if remote_directory_scope.prepend_date_on_archive:
|
|
127
|
+
appended_text = f"-{datetime.now(timezone.utc).timestamp()}"
|
|
128
|
+
|
|
129
|
+
for file_path in success_file_paths:
|
|
130
|
+
filename = os.path.split(file_path)[-1]
|
|
131
|
+
root, extension = os.path.splitext(filename)
|
|
132
|
+
new_filename = f"{root}{appended_text}{extension}"
|
|
133
|
+
# format is source, dest in the tuple
|
|
134
|
+
success_file_transfers.append((
|
|
135
|
+
FileSystemFileReference(file_path),
|
|
136
|
+
FileSystemFileReference(
|
|
137
|
+
os.path.join(
|
|
138
|
+
remote_directory_scope.success_archive_path,
|
|
139
|
+
new_filename,
|
|
140
|
+
)
|
|
141
|
+
),
|
|
142
|
+
))
|
|
143
|
+
|
|
144
|
+
failed_file_transfers: list[FileTransfer] = []
|
|
145
|
+
for file_path in failed_file_paths:
|
|
146
|
+
filename = os.path.split(file_path)[-1]
|
|
147
|
+
root, extension = os.path.splitext(filename)
|
|
148
|
+
new_filename = f"{root}{appended_text}{extension}"
|
|
149
|
+
failed_file_transfers.append((
|
|
150
|
+
FileSystemFileReference(file_path),
|
|
151
|
+
FileSystemFileReference(
|
|
152
|
+
os.path.join(
|
|
153
|
+
remote_directory_scope.failure_archive_path,
|
|
154
|
+
new_filename,
|
|
155
|
+
)
|
|
156
|
+
),
|
|
157
|
+
))
|
|
158
|
+
|
|
159
|
+
filesystem_session.move_files([*success_file_transfers, *failed_file_transfers])
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
class GenericUploadJob(Job):
|
|
163
|
+
def __init__(
|
|
164
|
+
self,
|
|
165
|
+
data_source: GenericUploadDataSource,
|
|
166
|
+
remote_directories: list[GenericRemoteDirectoryScope],
|
|
167
|
+
upload_strategy: GenericUploadStrategy,
|
|
168
|
+
) -> None:
|
|
169
|
+
super().__init__()
|
|
170
|
+
self.remote_directories = remote_directories
|
|
171
|
+
self.upload_strategy = upload_strategy
|
|
172
|
+
self.data_source = data_source
|
|
173
|
+
|
|
174
|
+
def _construct_filesystem_session(self, args: JobArguments) -> FileSystemSession:
|
|
175
|
+
match self.data_source:
|
|
176
|
+
case GenericUploadDataSourceSFTP():
|
|
177
|
+
pem_secret = retrieve_secret(
|
|
178
|
+
self.data_source.pem_secret, profile_metadata=args.profile_metadata
|
|
179
|
+
)
|
|
180
|
+
private_key = paramiko.AgentKey.from_private_key(io.StringIO(pem_secret))
|
|
181
|
+
sftp_config = FileSystemSFTPConfig(
|
|
182
|
+
ip=self.data_source.host,
|
|
183
|
+
username=self.data_source.username,
|
|
184
|
+
pem_key=private_key,
|
|
185
|
+
pem_path=None,
|
|
186
|
+
)
|
|
187
|
+
return SFTPSession(sftp_config=sftp_config)
|
|
188
|
+
|
|
189
|
+
def run(self, args: JobArguments) -> JobResult:
|
|
190
|
+
client = args.client
|
|
191
|
+
logger = args.logger
|
|
192
|
+
|
|
193
|
+
batch_executor = AsyncBatchProcessor(client=client)
|
|
194
|
+
with self._construct_filesystem_session(args) as filesystem_session:
|
|
195
|
+
files_to_upload: list[FileUpload] = []
|
|
196
|
+
for remote_directory in self.remote_directories:
|
|
197
|
+
pulled_file_data = _pull_remote_directory_data(
|
|
198
|
+
filesystem_session=filesystem_session,
|
|
199
|
+
remote_directory=remote_directory,
|
|
200
|
+
logger=logger,
|
|
201
|
+
)
|
|
202
|
+
filtered_file_data = _filter_downloaded_file_data(
|
|
203
|
+
remote_directory=remote_directory,
|
|
204
|
+
pulled_file_data=pulled_file_data,
|
|
205
|
+
logger=args.logger,
|
|
206
|
+
)
|
|
207
|
+
for file_data in filtered_file_data:
|
|
208
|
+
files_to_upload.append(
|
|
209
|
+
DataFileUpload(
|
|
210
|
+
data=io.BytesIO(file_data.file_data), name=file_data.filename
|
|
211
|
+
)
|
|
212
|
+
)
|
|
213
|
+
if not self.upload_strategy.skip_moving_files:
|
|
214
|
+
_move_files_post_upload(
|
|
215
|
+
filesystem_session=filesystem_session,
|
|
216
|
+
remote_directory_scope=remote_directory,
|
|
217
|
+
success_file_paths=[
|
|
218
|
+
file.filepath if file.filepath is not None else file.filename
|
|
219
|
+
for file in filtered_file_data
|
|
220
|
+
],
|
|
221
|
+
# IMPROVE: use triggers/webhooks to mark failed files as failed
|
|
222
|
+
failed_file_paths=[],
|
|
223
|
+
)
|
|
224
|
+
|
|
225
|
+
uploaded_files = client.upload_files(file_uploads=files_to_upload)
|
|
226
|
+
|
|
227
|
+
file_ids = [file.file_id for file in uploaded_files]
|
|
228
|
+
|
|
229
|
+
if self.upload_strategy.parse_files_individually:
|
|
230
|
+
for file_id in file_ids:
|
|
231
|
+
batch_executor.invoke_uploader(
|
|
232
|
+
file_ids=[file_id],
|
|
233
|
+
uploader_key=self.upload_strategy.uploader_key,
|
|
234
|
+
material_family_keys=self.upload_strategy.material_family_keys,
|
|
235
|
+
)
|
|
236
|
+
else:
|
|
237
|
+
batch_executor.invoke_uploader(
|
|
238
|
+
file_ids=file_ids,
|
|
239
|
+
uploader_key=self.upload_strategy.uploader_key,
|
|
240
|
+
material_family_keys=self.upload_strategy.material_family_keys,
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
batch_executor.send()
|
|
244
|
+
|
|
245
|
+
return JobResult(success=True)
|
|
@@ -3,7 +3,7 @@ import inspect
|
|
|
3
3
|
import os
|
|
4
4
|
|
|
5
5
|
from uncountable.integration.job import Job
|
|
6
|
-
from uncountable.
|
|
6
|
+
from uncountable.types.job_definition_t import JobExecutorScript, ProfileMetadata
|
|
7
7
|
|
|
8
8
|
|
|
9
9
|
def resolve_script_executor(
|
uncountable/integration/job.py
CHANGED
|
@@ -2,13 +2,30 @@ from abc import ABC, abstractmethod
|
|
|
2
2
|
from dataclasses import dataclass
|
|
3
3
|
|
|
4
4
|
from uncountable.core.client import Client
|
|
5
|
-
from uncountable.
|
|
5
|
+
from uncountable.types.job_definition_t import JobDefinition, ProfileMetadata
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class JobLogger:
|
|
9
|
+
def __init__(
|
|
10
|
+
self, *, profile_metadata: ProfileMetadata, job_definition: JobDefinition
|
|
11
|
+
) -> None:
|
|
12
|
+
self.profile_metadata = profile_metadata
|
|
13
|
+
self.job_definition = job_definition
|
|
14
|
+
|
|
15
|
+
def log_info(self, *log_objects: object) -> None:
|
|
16
|
+
# IMPROVE: log a json message with context that can be parsed by OT
|
|
17
|
+
print(
|
|
18
|
+
f"[{self.job_definition.id}] in profile ({self.profile_metadata.name}): ",
|
|
19
|
+
*log_objects,
|
|
20
|
+
)
|
|
6
21
|
|
|
7
22
|
|
|
8
23
|
@dataclass
|
|
9
24
|
class JobArgumentsBase:
|
|
10
25
|
job_definition: JobDefinition
|
|
26
|
+
profile_metadata: ProfileMetadata
|
|
11
27
|
client: Client
|
|
28
|
+
logger: JobLogger
|
|
12
29
|
|
|
13
30
|
|
|
14
31
|
@dataclass
|
|
@@ -37,6 +54,6 @@ class CronJob(Job):
|
|
|
37
54
|
def run(self, args: CronJobArguments) -> JobResult: ...
|
|
38
55
|
|
|
39
56
|
|
|
40
|
-
def register_job(cls: Job) -> Job:
|
|
57
|
+
def register_job(cls: type[Job]) -> type[Job]:
|
|
41
58
|
cls._unc_job_registered = True
|
|
42
59
|
return cls
|