UncountablePythonSDK 0.0.40__py3-none-any.whl → 0.0.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of UncountablePythonSDK might be problematic. Click here for more details.

Files changed (127) hide show
  1. {UncountablePythonSDK-0.0.40.dist-info → UncountablePythonSDK-0.0.42.dist-info}/METADATA +5 -1
  2. {UncountablePythonSDK-0.0.40.dist-info → UncountablePythonSDK-0.0.42.dist-info}/RECORD +126 -107
  3. docs/requirements.txt +3 -3
  4. examples/invoke_uploader.py +23 -0
  5. pkgs/argument_parser/argument_parser.py +2 -2
  6. pkgs/filesystem_utils/__init__.py +17 -0
  7. pkgs/filesystem_utils/_gdrive_session.py +306 -0
  8. pkgs/filesystem_utils/_local_session.py +69 -0
  9. pkgs/filesystem_utils/_sftp_session.py +147 -0
  10. pkgs/filesystem_utils/file_type_utils.py +61 -0
  11. pkgs/filesystem_utils/filesystem_session.py +39 -0
  12. pkgs/serialization/yaml.py +54 -0
  13. pkgs/type_spec/config.py +1 -13
  14. pkgs/type_spec/emit_open_api.py +7 -5
  15. pkgs/type_spec/emit_open_api_util.py +4 -2
  16. pkgs/type_spec/emit_python.py +13 -14
  17. pkgs/type_spec/load_types.py +2 -1
  18. pkgs/type_spec/value_spec/__main__.py +1 -1
  19. uncountable/core/file_upload.py +13 -3
  20. uncountable/integration/construct_client.py +1 -1
  21. uncountable/integration/cron.py +11 -6
  22. uncountable/integration/entrypoint.py +6 -7
  23. uncountable/integration/executors/executors.py +24 -0
  24. uncountable/integration/executors/generic_upload_executor.py +245 -0
  25. uncountable/integration/executors/script_executor.py +1 -1
  26. uncountable/integration/job.py +19 -2
  27. uncountable/integration/secret_retrieval/__init__.py +3 -0
  28. uncountable/integration/secret_retrieval/retrieve_secret.py +40 -0
  29. uncountable/integration/server.py +1 -1
  30. uncountable/types/__init__.py +8 -0
  31. uncountable/types/api/batch/execute_batch.py +5 -5
  32. uncountable/types/api/batch/execute_batch_load_async.py +3 -3
  33. uncountable/types/api/chemical/convert_chemical_formats.py +4 -4
  34. uncountable/types/api/entity/create_entities.py +4 -4
  35. uncountable/types/api/entity/create_entity.py +4 -4
  36. uncountable/types/api/entity/get_entities_data.py +4 -4
  37. uncountable/types/api/entity/list_entities.py +5 -5
  38. uncountable/types/api/entity/lock_entity.py +3 -3
  39. uncountable/types/api/entity/resolve_entity_ids.py +4 -4
  40. uncountable/types/api/entity/set_values.py +3 -3
  41. uncountable/types/api/entity/transition_entity_phase.py +5 -5
  42. uncountable/types/api/entity/unlock_entity.py +3 -3
  43. uncountable/types/api/equipment/associate_equipment_input.py +3 -3
  44. uncountable/types/api/field_options/upsert_field_options.py +4 -4
  45. uncountable/types/api/id_source/list_id_source.py +4 -4
  46. uncountable/types/api/id_source/match_id_source.py +4 -4
  47. uncountable/types/api/input_groups/get_input_group_names.py +4 -4
  48. uncountable/types/api/inputs/create_inputs.py +5 -5
  49. uncountable/types/api/inputs/get_input_data.py +7 -7
  50. uncountable/types/api/inputs/get_input_names.py +4 -4
  51. uncountable/types/api/inputs/get_inputs_data.py +7 -7
  52. uncountable/types/api/inputs/set_input_attribute_values.py +4 -4
  53. uncountable/types/api/inputs/set_input_category.py +3 -3
  54. uncountable/types/api/inputs/set_input_subcategories.py +3 -3
  55. uncountable/types/api/inputs/set_intermediate_type.py +3 -3
  56. uncountable/types/api/material_families/update_entity_material_families.py +3 -3
  57. uncountable/types/api/outputs/get_output_data.py +7 -7
  58. uncountable/types/api/outputs/get_output_names.py +4 -4
  59. uncountable/types/api/outputs/resolve_output_conditions.py +6 -6
  60. uncountable/types/api/permissions/set_core_permissions.py +7 -7
  61. uncountable/types/api/project/get_projects.py +4 -4
  62. uncountable/types/api/project/get_projects_data.py +4 -4
  63. uncountable/types/api/recipe_links/create_recipe_link.py +3 -3
  64. uncountable/types/api/recipe_links/remove_recipe_link.py +3 -3
  65. uncountable/types/api/recipe_metadata/get_recipe_metadata_data.py +4 -4
  66. uncountable/types/api/recipes/add_recipe_to_project.py +3 -3
  67. uncountable/types/api/recipes/archive_recipes.py +3 -3
  68. uncountable/types/api/recipes/associate_recipe_as_input.py +3 -3
  69. uncountable/types/api/recipes/associate_recipe_as_lot.py +3 -3
  70. uncountable/types/api/recipes/create_recipe.py +3 -3
  71. uncountable/types/api/recipes/create_recipes.py +5 -5
  72. uncountable/types/api/recipes/disassociate_recipe_as_input.py +3 -3
  73. uncountable/types/api/recipes/edit_recipe_inputs.py +12 -12
  74. uncountable/types/api/recipes/get_curve.py +3 -3
  75. uncountable/types/api/recipes/get_recipe_calculations.py +4 -4
  76. uncountable/types/api/recipes/get_recipe_links.py +3 -3
  77. uncountable/types/api/recipes/get_recipe_names.py +4 -4
  78. uncountable/types/api/recipes/get_recipe_output_metadata.py +4 -4
  79. uncountable/types/api/recipes/get_recipes_data.py +12 -12
  80. uncountable/types/api/recipes/lock_recipes.py +4 -4
  81. uncountable/types/api/recipes/remove_recipe_from_project.py +3 -3
  82. uncountable/types/api/recipes/set_recipe_inputs.py +4 -4
  83. uncountable/types/api/recipes/set_recipe_metadata.py +3 -3
  84. uncountable/types/api/recipes/set_recipe_output_annotations.py +7 -7
  85. uncountable/types/api/recipes/set_recipe_outputs.py +5 -5
  86. uncountable/types/api/recipes/set_recipe_tags.py +7 -7
  87. uncountable/types/api/recipes/unarchive_recipes.py +3 -3
  88. uncountable/types/api/recipes/unlock_recipes.py +3 -3
  89. uncountable/types/api/triggers/run_trigger.py +3 -3
  90. uncountable/types/api/uploader/__init__.py +1 -0
  91. uncountable/types/api/uploader/invoke_uploader.py +38 -0
  92. uncountable/types/async_batch_processor.py +36 -0
  93. uncountable/types/async_batch_t.py +6 -4
  94. uncountable/types/calculations_t.py +2 -2
  95. uncountable/types/chemical_structure_t.py +2 -2
  96. uncountable/types/client_base.py +25 -2
  97. uncountable/types/curves_t.py +3 -3
  98. uncountable/types/entity_t.py +2 -2
  99. uncountable/types/experiment_groups_t.py +2 -2
  100. uncountable/types/field_values_t.py +5 -5
  101. uncountable/types/fields_t.py +2 -2
  102. uncountable/types/generic_upload.py +9 -0
  103. uncountable/types/generic_upload_t.py +41 -0
  104. uncountable/types/id_source_t.py +5 -5
  105. uncountable/types/identifier_t.py +4 -4
  106. uncountable/types/input_attributes_t.py +2 -2
  107. uncountable/types/inputs_t.py +2 -2
  108. uncountable/types/job_definition.py +26 -0
  109. uncountable/types/job_definition_t.py +203 -0
  110. uncountable/types/outputs_t.py +2 -2
  111. uncountable/types/phases_t.py +2 -2
  112. uncountable/types/recipe_identifiers_t.py +4 -4
  113. uncountable/types/recipe_links_t.py +2 -2
  114. uncountable/types/recipe_metadata_t.py +4 -4
  115. uncountable/types/recipe_output_metadata_t.py +2 -2
  116. uncountable/types/recipe_tags_t.py +2 -2
  117. uncountable/types/recipe_workflow_steps_t.py +5 -5
  118. uncountable/types/recipes_t.py +2 -2
  119. uncountable/types/response_t.py +2 -2
  120. uncountable/types/secret_retrieval.py +12 -0
  121. uncountable/types/secret_retrieval_t.py +69 -0
  122. uncountable/types/units_t.py +2 -2
  123. uncountable/types/users_t.py +2 -2
  124. uncountable/types/workflows_t.py +3 -3
  125. uncountable/integration/types.py +0 -89
  126. {UncountablePythonSDK-0.0.40.dist-info → UncountablePythonSDK-0.0.42.dist-info}/WHEEL +0 -0
  127. {UncountablePythonSDK-0.0.40.dist-info → UncountablePythonSDK-0.0.42.dist-info}/top_level.txt +0 -0
pkgs/type_spec/config.py CHANGED
@@ -1,10 +1,9 @@
1
1
  import os
2
2
  from collections.abc import Callable, Mapping
3
3
  from dataclasses import dataclass
4
- from decimal import Decimal
5
4
  from typing import Self, TypeVar
6
5
 
7
- import yaml
6
+ from pkgs.serialization import yaml
8
7
 
9
8
  ConfigValueType = str | None | Mapping[str, str | None] | list[str]
10
9
 
@@ -103,17 +102,6 @@ def _parse_language(config_class: type[_T], raw_value: ConfigValueType) -> _T:
103
102
  return config_class(**raw_value)
104
103
 
105
104
 
106
- def _decimal_constructor(loader, node): # type:ignore
107
- value = loader.construct_scalar(node)
108
- return Decimal(value)
109
-
110
-
111
- # A semi-acceptable patch to force a number to be parsed as a decimal, since pyyaml
112
- # parses them as lossy floats otherwise. Though a bit ugly, at least this way we have
113
- # support for decimal constants
114
- yaml.SafeLoader.add_constructor("!decimal", _decimal_constructor)
115
-
116
-
117
105
  def parse_yaml_config(config_file: str) -> Config:
118
106
  with open(config_file, encoding="utf-8") as input:
119
107
  raw_config: dict[str, ConfigValueType] = yaml.safe_load(input)
@@ -9,7 +9,8 @@ import json
9
9
  import re
10
10
  from typing import Collection, cast
11
11
 
12
- import yaml
12
+ from pkgs.serialization import yaml
13
+ from pkgs.serialization_util.serialization_helpers import serialize_for_api
13
14
 
14
15
  from . import builder, util
15
16
  from .builder import EndpointGuideKey, RootGuideKey
@@ -169,7 +170,7 @@ def _serialize_global_context(ctx: EmitOpenAPIGlobalContext) -> str:
169
170
  oa_paths[path.path] = {"$ref": path.ref}
170
171
  oa_root["paths"] = oa_paths
171
172
 
172
- return yaml.dump(oa_root, sort_keys=False)
173
+ return yaml.dumps(oa_root, sort_keys=False)
173
174
 
174
175
 
175
176
  def _is_empty_object_type(typ: OpenAPIType) -> bool:
@@ -413,7 +414,7 @@ def _emit_namespace(
413
414
 
414
415
  path = f"{config.types_output}/common/{'/'.join(namespace.path)}.yaml"
415
416
  oa_namespace = {"components": oa_components}
416
- _rewrite_with_notice(path, yaml.dump(oa_namespace, sort_keys=False))
417
+ _rewrite_with_notice(path, yaml.dumps(oa_namespace, sort_keys=False))
417
418
 
418
419
 
419
420
  def _emit_type(
@@ -568,6 +569,7 @@ def _emit_endpoint(
568
569
  description = f"**[External API-Endpoint]** <br/> {description}"
569
570
 
570
571
  path_cutoff = min(3, len(namespace.path) - 1)
572
+
571
573
  ctx.endpoint = EmitOpenAPIEndpoint(
572
574
  method=namespace.endpoint.method.lower(),
573
575
  tags=[tag_name],
@@ -580,8 +582,8 @@ def _emit_endpoint(
580
582
  ref_name=f"ex_{i}",
581
583
  summary=example.summary,
582
584
  description=example.description,
583
- arguments=example.arguments,
584
- data=example.data,
585
+ arguments=serialize_for_api(example.arguments),
586
+ data=serialize_for_api(example.data),
585
587
  )
586
588
  for i, example in enumerate(endpoint_examples)
587
589
  ],
@@ -8,6 +8,8 @@ from collections import defaultdict
8
8
  from dataclasses import dataclass, field
9
9
  from typing import TypeAlias
10
10
 
11
+ from pkgs.serialization_util.serialization_helpers import JsonValue
12
+
11
13
  from . import builder
12
14
  from .open_api_util import OpenAPIType
13
15
 
@@ -68,8 +70,8 @@ class EmitOpenAPIEndpointExample:
68
70
  ref_name: str
69
71
  summary: str
70
72
  description: str
71
- arguments: dict[str, object]
72
- data: dict[str, object]
73
+ arguments: dict[str, JsonValue]
74
+ data: dict[str, JsonValue]
73
75
 
74
76
 
75
77
  EmitOpenAPIStabilityLevel = builder.StabilityLevel
@@ -1,6 +1,6 @@
1
+ import dataclasses
1
2
  import io
2
3
  import os
3
- from dataclasses import dataclass, field
4
4
  from decimal import Decimal
5
5
  from typing import Any, Optional
6
6
 
@@ -35,11 +35,11 @@ QUEUED_BATCH_REQUEST_STYPE = builder.SpecTypeDefnObject(
35
35
  )
36
36
 
37
37
 
38
- @dataclass(kw_only=True)
38
+ @dataclasses.dataclass(kw_only=True)
39
39
  class TrackingContext:
40
40
  namespace: Optional[builder.SpecNamespace] = None
41
- namespaces: set[builder.SpecNamespace] = field(default_factory=set)
42
- names: set[str] = field(default_factory=set)
41
+ namespaces: set[builder.SpecNamespace] = dataclasses.field(default_factory=set)
42
+ names: set[str] = dataclasses.field(default_factory=set)
43
43
 
44
44
  use_enum: bool = False
45
45
  use_serial_string_enum: bool = False
@@ -50,7 +50,7 @@ class TrackingContext:
50
50
  use_opaque_key: bool = False
51
51
 
52
52
 
53
- @dataclass(kw_only=True)
53
+ @dataclasses.dataclass(kw_only=True)
54
54
  class Context(TrackingContext):
55
55
  out: io.StringIO
56
56
  namespace: builder.SpecNamespace
@@ -221,7 +221,7 @@ def _emit_types_imports(*, out: io.StringIO, ctx: Context) -> None:
221
221
  if ctx.use_enum:
222
222
  out.write("from pkgs.strenum_compat import StrEnum\n")
223
223
  if ctx.use_dataclass:
224
- out.write("from dataclasses import dataclass\n")
224
+ out.write("import dataclasses\n")
225
225
  if ctx.use_serial_class:
226
226
  out.write("from pkgs.serialization import serial_class\n")
227
227
  if ctx.use_serial_union:
@@ -652,7 +652,7 @@ def _emit_string_enum(ctx: Context, stype: builder.SpecTypeDefnStringEnum) -> No
652
652
  )
653
653
 
654
654
 
655
- @dataclass
655
+ @dataclasses.dataclass
656
656
  class EmittedPropertiesMetadata:
657
657
  unconverted_keys: set[str]
658
658
  unconverted_values: set[str]
@@ -719,7 +719,6 @@ def _emit_properties(
719
719
  default = "None"
720
720
  elif prop.has_default:
721
721
  default = _emit_value(ctx, prop.spec_type, prop.default)
722
-
723
722
  class_out.write(f"{INDENT * num_indent}{py_name}: {ref_type}")
724
723
  if default:
725
724
  class_out.write(f" = {default}")
@@ -833,7 +832,7 @@ def _emit_type(ctx: Context, stype: builder.SpecType) -> None:
833
832
 
834
833
  ctx.out.write(")\n")
835
834
 
836
- dataclass = "@dataclass"
835
+ dataclass = "@dataclasses.dataclass"
837
836
  dc_args = []
838
837
  if stype.is_kw_only():
839
838
  dc_args.append("kw_only=True")
@@ -1159,7 +1158,7 @@ def _emit_api_argument_lookup(
1159
1158
  for endpoint_root in builder.api_endpoints:
1160
1159
  routes_output = config.routes_output[endpoint_root]
1161
1160
 
1162
- imports = ["import typing", "from dataclasses import dataclass"]
1161
+ imports = ["import typing", "import dataclasses"]
1163
1162
  mappings = []
1164
1163
  for namespace in sorted(
1165
1164
  builder.namespaces.values(),
@@ -1212,13 +1211,13 @@ AT = typing.TypeVar("AT")
1212
1211
  DT = typing.TypeVar("DT")
1213
1212
 
1214
1213
 
1215
- @dataclass(kw_only=True, frozen=True)
1214
+ @dataclasses.dataclass(kw_only=True, frozen=True)
1216
1215
  class ApiEndpointKey:
1217
1216
  method: str
1218
1217
  route: str
1219
1218
 
1220
1219
 
1221
- @dataclass(kw_only=True)
1220
+ @dataclasses.dataclass(kw_only=True)
1222
1221
  class ApiEndpointSpec(typing.Generic[AT, DT]):
1223
1222
  route: str
1224
1223
  arguments_type: type[AT]
@@ -1246,7 +1245,7 @@ __all__ = ["{API_ARGUMENTS_NAME}"]
1246
1245
  CLIENT_CLASS_FILENAME = "client_base"
1247
1246
  CLIENT_CLASS_IMPORTS = [
1248
1247
  "from abc import ABC, abstractmethod",
1249
- "from dataclasses import dataclass",
1248
+ "import dataclasses",
1250
1249
  ]
1251
1250
  ASYNC_BATCH_PROCESSOR_FILENAME = "async_batch_processor"
1252
1251
  ASYNC_BATCH_PROCESSOR_IMPORTS = [
@@ -1339,7 +1338,7 @@ def _emit_client_class(
1339
1338
  DT = typing.TypeVar("DT")
1340
1339
 
1341
1340
 
1342
- @dataclass(kw_only=True)
1341
+ @dataclasses.dataclass(kw_only=True)
1343
1342
  class APIRequest:
1344
1343
  method: str
1345
1344
  endpoint: str
@@ -3,9 +3,10 @@ from collections.abc import Callable
3
3
  from io import StringIO
4
4
  from typing import Optional
5
5
 
6
- import yaml
7
6
  from shelljob import fs
8
7
 
8
+ from pkgs.serialization import yaml
9
+
9
10
  from .builder import SpecBuilder
10
11
  from .config import Config
11
12
 
@@ -20,9 +20,9 @@ import sys
20
20
  from typing import TypeVar, cast
21
21
 
22
22
  import regex as re
23
- import yaml
24
23
 
25
24
  from main.base.types import base_t, value_spec_t
25
+ from pkgs.serialization import yaml
26
26
 
27
27
  from ..util import parse_type_str, rewrite_file
28
28
  from .convert_type import convert_to_value_spec_type
@@ -16,6 +16,7 @@ _CHUNK_SIZE = 5 * 1024 * 1024 # s3 requires 5MiB minimum
16
16
 
17
17
  class FileUploadType(StrEnum):
18
18
  MEDIA_FILE_UPLOAD = "MEDIA_FILE_UPLOAD"
19
+ DATA_FILE_UPLOAD = "DATA_FILE_UPLOAD"
19
20
 
20
21
 
21
22
  @dataclass(kw_only=True)
@@ -26,7 +27,14 @@ class MediaFileUpload:
26
27
  type: Literal[FileUploadType.MEDIA_FILE_UPLOAD] = FileUploadType.MEDIA_FILE_UPLOAD
27
28
 
28
29
 
29
- FileUpload = MediaFileUpload
30
+ @dataclass(kw_only=True)
31
+ class DataFileUpload:
32
+ data: BytesIO
33
+ name: str
34
+ type: Literal[FileUploadType.DATA_FILE_UPLOAD] = FileUploadType.DATA_FILE_UPLOAD
35
+
36
+
37
+ FileUpload = MediaFileUpload | DataFileUpload
30
38
 
31
39
 
32
40
  @dataclass(kw_only=True)
@@ -37,12 +45,14 @@ class FileBytes:
37
45
 
38
46
  @contextmanager
39
47
  def file_upload_data(file_upload: FileUpload) -> Generator[FileBytes, None, None]:
40
- match file_upload.type:
41
- case FileUploadType.MEDIA_FILE_UPLOAD:
48
+ match file_upload:
49
+ case MediaFileUpload():
42
50
  with open(file_upload.path, "rb") as f:
43
51
  yield FileBytes(
44
52
  name=Path(file_upload.path).name, bytes_data=BytesIO(f.read())
45
53
  )
54
+ case DataFileUpload():
55
+ yield FileBytes(name=file_upload.name, bytes_data=file_upload.data)
46
56
 
47
57
 
48
58
  @dataclass(kw_only=True)
@@ -2,7 +2,7 @@ import os
2
2
  from typing import assert_never
3
3
 
4
4
  from uncountable.core import AuthDetailsApiKey, Client
5
- from uncountable.integration.types import (
5
+ from uncountable.types.job_definition_t import (
6
6
  AuthRetrievalEnv,
7
7
  ProfileMetadata,
8
8
  )
@@ -2,9 +2,9 @@ from dataclasses import dataclass
2
2
 
3
3
  from pkgs.argument_parser import CachedParser
4
4
  from uncountable.integration.construct_client import construct_uncountable_client
5
- from uncountable.integration.executors.script_executor import resolve_script_executor
6
- from uncountable.integration.job import CronJobArguments
7
- from uncountable.integration.types import JobDefinition, ProfileMetadata
5
+ from uncountable.integration.executors.executors import resolve_executor
6
+ from uncountable.integration.job import CronJobArguments, JobLogger
7
+ from uncountable.types.job_definition_t import JobDefinition, ProfileMetadata
8
8
 
9
9
 
10
10
  @dataclass
@@ -21,10 +21,15 @@ def cron_job_executor(**kwargs: dict) -> None:
21
21
  args = CronJobArguments(
22
22
  job_definition=args_passed.definition,
23
23
  client=construct_uncountable_client(profile_meta=args_passed.profile_metadata),
24
+ profile_metadata=args_passed.profile_metadata,
25
+ logger=JobLogger(
26
+ profile_metadata=args_passed.profile_metadata,
27
+ job_definition=args_passed.definition,
28
+ ),
24
29
  )
25
30
 
26
- job = resolve_script_executor(
27
- args_passed.definition.executor, args_passed.profile_metadata
28
- )
31
+ job = resolve_executor(args_passed.definition.executor, args_passed.profile_metadata)
32
+
33
+ print(f"running job {args_passed.definition.name}")
29
34
 
30
35
  job.run(args=args)
@@ -4,15 +4,14 @@ from importlib import resources
4
4
  from pkgs.argument_parser import CachedParser
5
5
  from uncountable.integration.db.connect import create_db_engine
6
6
  from uncountable.integration.server import IntegrationServer
7
- from uncountable.integration.types import ProfileDefinition
7
+ from uncountable.types.job_definition_t import ProfileDefinition
8
8
 
9
9
  profile_parser = CachedParser(ProfileDefinition)
10
10
 
11
11
 
12
- def main() -> None:
12
+ def main(blocking: bool) -> None:
13
13
  profiles_module = os.environ["UNC_PROFILES_MODULE"]
14
14
  with IntegrationServer(create_db_engine()) as server:
15
- # TODO: Loop through all job spec yaml files and call server.add_job
16
15
  profiles = [
17
16
  entry
18
17
  for entry in resources.files(profiles_module).iterdir()
@@ -26,7 +25,7 @@ def main() -> None:
26
25
  resource="profile.yaml",
27
26
  )
28
27
  except FileNotFoundError as e:
29
- print("WARN: profile.yaml not found", e)
28
+ print(f"WARN: profile.yaml not found for {profile_name}", e)
30
29
  continue
31
30
  server.register_profile(
32
31
  profile_name=profile_name,
@@ -35,8 +34,8 @@ def main() -> None:
35
34
  jobs=profile.jobs,
36
35
  )
37
36
 
38
- server.serve_forever()
37
+ if blocking:
38
+ server.serve_forever()
39
39
 
40
40
 
41
- if __name__ == "__main__":
42
- main()
41
+ main(__name__ == "__main__")
@@ -0,0 +1,24 @@
1
+ from typing import assert_never
2
+
3
+ from uncountable.integration.executors.generic_upload_executor import GenericUploadJob
4
+ from uncountable.integration.executors.script_executor import resolve_script_executor
5
+ from uncountable.integration.job import Job
6
+ from uncountable.types import job_definition_t
7
+
8
+
9
+ def resolve_executor(
10
+ job_executor: job_definition_t.JobExecutor,
11
+ profile_metadata: job_definition_t.ProfileMetadata,
12
+ ) -> Job:
13
+ match job_executor:
14
+ case job_definition_t.JobExecutorScript():
15
+ return resolve_script_executor(
16
+ job_executor, profile_metadata=profile_metadata
17
+ )
18
+ case job_definition_t.JobExecutorGenericUpload():
19
+ return GenericUploadJob(
20
+ remote_directories=job_executor.remote_directories,
21
+ upload_strategy=job_executor.upload_strategy,
22
+ data_source=job_executor.data_source,
23
+ )
24
+ assert_never(job_executor)
@@ -0,0 +1,245 @@
1
+ import io
2
+ import os
3
+ import re
4
+ from datetime import datetime, timezone
5
+
6
+ import paramiko
7
+
8
+ from pkgs.filesystem_utils import (
9
+ FileObjectData,
10
+ FileSystemFileReference,
11
+ FileSystemObject,
12
+ FileSystemSFTPConfig,
13
+ FileTransfer,
14
+ SFTPSession,
15
+ )
16
+ from pkgs.filesystem_utils.filesystem_session import FileSystemSession
17
+ from uncountable.core.async_batch import AsyncBatchProcessor
18
+ from uncountable.core.file_upload import DataFileUpload, FileUpload
19
+ from uncountable.integration.job import Job, JobArguments, JobLogger, JobResult
20
+ from uncountable.integration.secret_retrieval import retrieve_secret
21
+ from uncountable.types.generic_upload_t import (
22
+ GenericRemoteDirectoryScope,
23
+ GenericUploadStrategy,
24
+ )
25
+ from uncountable.types.job_definition_t import (
26
+ GenericUploadDataSource,
27
+ GenericUploadDataSourceSFTP,
28
+ )
29
+
30
+
31
+ def _filter_files_by_keyword(
32
+ remote_directory: GenericRemoteDirectoryScope,
33
+ files: list[FileObjectData],
34
+ logger: JobLogger,
35
+ ) -> list[FileObjectData]:
36
+ if remote_directory.detection_keyword is None:
37
+ return files
38
+
39
+ raise NotImplementedError("keyword detection not implemented yet")
40
+
41
+
42
+ def _filter_by_filename(
43
+ remote_directory: GenericRemoteDirectoryScope, files: list[FileSystemObject]
44
+ ) -> list[FileSystemObject]:
45
+ if remote_directory.filename_regex is None:
46
+ return files
47
+
48
+ return [
49
+ file
50
+ for file in files
51
+ if file.filename is not None
52
+ and re.search(remote_directory.filename_regex, file.filename)
53
+ ]
54
+
55
+
56
+ def _filter_by_file_extension(
57
+ remote_directory: GenericRemoteDirectoryScope, files: list[FileSystemObject]
58
+ ) -> list[FileSystemObject]:
59
+ if remote_directory.valid_file_extensions is None:
60
+ return files
61
+
62
+ return [
63
+ file
64
+ for file in files
65
+ if file.filename is not None
66
+ and os.path.splitext(file.filename)[-1] in remote_directory.valid_file_extensions
67
+ ]
68
+
69
+
70
+ def _filter_by_max_files(
71
+ remote_directory: GenericRemoteDirectoryScope, files: list[FileSystemObject]
72
+ ) -> list[FileSystemObject]:
73
+ if remote_directory.max_files is None:
74
+ return files
75
+
76
+ return files[: remote_directory.max_files]
77
+
78
+
79
+ def _pull_remote_directory_data(
80
+ *,
81
+ filesystem_session: FileSystemSession,
82
+ remote_directory: GenericRemoteDirectoryScope,
83
+ logger: JobLogger,
84
+ ) -> list[FileObjectData]:
85
+ files_to_pull = filesystem_session.list_files(
86
+ dir_path=FileSystemFileReference(
87
+ filepath=remote_directory.src_path,
88
+ ),
89
+ recursive=remote_directory.recursive,
90
+ )
91
+ logger.log_info(
92
+ f"Pulled the following files {files_to_pull} from the remote directory {remote_directory}.",
93
+ )
94
+
95
+ files_to_pull = _filter_by_file_extension(remote_directory, files_to_pull)
96
+ files_to_pull = _filter_by_filename(remote_directory, files_to_pull)
97
+ files_to_pull = _filter_by_max_files(remote_directory, files_to_pull)
98
+
99
+ logger.log_info(
100
+ f"Accessing SFTP directory: {remote_directory.src_path} and pulling files: {', '.join([f.filename for f in files_to_pull if f.filename is not None])}",
101
+ )
102
+ return filesystem_session.download_files(files_to_pull)
103
+
104
+
105
+ def _filter_downloaded_file_data(
106
+ remote_directory: GenericRemoteDirectoryScope,
107
+ pulled_file_data: list[FileObjectData],
108
+ logger: JobLogger,
109
+ ) -> list[FileObjectData]:
110
+ filtered_file_data = _filter_files_by_keyword(
111
+ remote_directory=remote_directory, files=pulled_file_data, logger=logger
112
+ )
113
+ return filtered_file_data
114
+
115
+
116
+ def _move_files_post_upload(
117
+ *,
118
+ filesystem_session: FileSystemSession,
119
+ remote_directory_scope: GenericRemoteDirectoryScope,
120
+ success_file_paths: list[str],
121
+ failed_file_paths: list[str],
122
+ ) -> None:
123
+ success_file_transfers: list[FileTransfer] = []
124
+ appended_text = ""
125
+
126
+ if remote_directory_scope.prepend_date_on_archive:
127
+ appended_text = f"-{datetime.now(timezone.utc).timestamp()}"
128
+
129
+ for file_path in success_file_paths:
130
+ filename = os.path.split(file_path)[-1]
131
+ root, extension = os.path.splitext(filename)
132
+ new_filename = f"{root}{appended_text}{extension}"
133
+ # format is source, dest in the tuple
134
+ success_file_transfers.append((
135
+ FileSystemFileReference(file_path),
136
+ FileSystemFileReference(
137
+ os.path.join(
138
+ remote_directory_scope.success_archive_path,
139
+ new_filename,
140
+ )
141
+ ),
142
+ ))
143
+
144
+ failed_file_transfers: list[FileTransfer] = []
145
+ for file_path in failed_file_paths:
146
+ filename = os.path.split(file_path)[-1]
147
+ root, extension = os.path.splitext(filename)
148
+ new_filename = f"{root}{appended_text}{extension}"
149
+ failed_file_transfers.append((
150
+ FileSystemFileReference(file_path),
151
+ FileSystemFileReference(
152
+ os.path.join(
153
+ remote_directory_scope.failure_archive_path,
154
+ new_filename,
155
+ )
156
+ ),
157
+ ))
158
+
159
+ filesystem_session.move_files([*success_file_transfers, *failed_file_transfers])
160
+
161
+
162
+ class GenericUploadJob(Job):
163
+ def __init__(
164
+ self,
165
+ data_source: GenericUploadDataSource,
166
+ remote_directories: list[GenericRemoteDirectoryScope],
167
+ upload_strategy: GenericUploadStrategy,
168
+ ) -> None:
169
+ super().__init__()
170
+ self.remote_directories = remote_directories
171
+ self.upload_strategy = upload_strategy
172
+ self.data_source = data_source
173
+
174
+ def _construct_filesystem_session(self, args: JobArguments) -> FileSystemSession:
175
+ match self.data_source:
176
+ case GenericUploadDataSourceSFTP():
177
+ pem_secret = retrieve_secret(
178
+ self.data_source.pem_secret, profile_metadata=args.profile_metadata
179
+ )
180
+ private_key = paramiko.AgentKey.from_private_key(io.StringIO(pem_secret))
181
+ sftp_config = FileSystemSFTPConfig(
182
+ ip=self.data_source.host,
183
+ username=self.data_source.username,
184
+ pem_key=private_key,
185
+ pem_path=None,
186
+ )
187
+ return SFTPSession(sftp_config=sftp_config)
188
+
189
+ def run(self, args: JobArguments) -> JobResult:
190
+ client = args.client
191
+ logger = args.logger
192
+
193
+ batch_executor = AsyncBatchProcessor(client=client)
194
+ with self._construct_filesystem_session(args) as filesystem_session:
195
+ files_to_upload: list[FileUpload] = []
196
+ for remote_directory in self.remote_directories:
197
+ pulled_file_data = _pull_remote_directory_data(
198
+ filesystem_session=filesystem_session,
199
+ remote_directory=remote_directory,
200
+ logger=logger,
201
+ )
202
+ filtered_file_data = _filter_downloaded_file_data(
203
+ remote_directory=remote_directory,
204
+ pulled_file_data=pulled_file_data,
205
+ logger=args.logger,
206
+ )
207
+ for file_data in filtered_file_data:
208
+ files_to_upload.append(
209
+ DataFileUpload(
210
+ data=io.BytesIO(file_data.file_data), name=file_data.filename
211
+ )
212
+ )
213
+ if not self.upload_strategy.skip_moving_files:
214
+ _move_files_post_upload(
215
+ filesystem_session=filesystem_session,
216
+ remote_directory_scope=remote_directory,
217
+ success_file_paths=[
218
+ file.filepath if file.filepath is not None else file.filename
219
+ for file in filtered_file_data
220
+ ],
221
+ # IMPROVE: use triggers/webhooks to mark failed files as failed
222
+ failed_file_paths=[],
223
+ )
224
+
225
+ uploaded_files = client.upload_files(file_uploads=files_to_upload)
226
+
227
+ file_ids = [file.file_id for file in uploaded_files]
228
+
229
+ if self.upload_strategy.parse_files_individually:
230
+ for file_id in file_ids:
231
+ batch_executor.invoke_uploader(
232
+ file_ids=[file_id],
233
+ uploader_key=self.upload_strategy.uploader_key,
234
+ material_family_keys=self.upload_strategy.material_family_keys,
235
+ )
236
+ else:
237
+ batch_executor.invoke_uploader(
238
+ file_ids=file_ids,
239
+ uploader_key=self.upload_strategy.uploader_key,
240
+ material_family_keys=self.upload_strategy.material_family_keys,
241
+ )
242
+
243
+ batch_executor.send()
244
+
245
+ return JobResult(success=True)
@@ -3,7 +3,7 @@ import inspect
3
3
  import os
4
4
 
5
5
  from uncountable.integration.job import Job
6
- from uncountable.integration.types import JobExecutorScript, ProfileMetadata
6
+ from uncountable.types.job_definition_t import JobExecutorScript, ProfileMetadata
7
7
 
8
8
 
9
9
  def resolve_script_executor(
@@ -2,13 +2,30 @@ from abc import ABC, abstractmethod
2
2
  from dataclasses import dataclass
3
3
 
4
4
  from uncountable.core.client import Client
5
- from uncountable.integration.types import JobDefinition
5
+ from uncountable.types.job_definition_t import JobDefinition, ProfileMetadata
6
+
7
+
8
+ class JobLogger:
9
+ def __init__(
10
+ self, *, profile_metadata: ProfileMetadata, job_definition: JobDefinition
11
+ ) -> None:
12
+ self.profile_metadata = profile_metadata
13
+ self.job_definition = job_definition
14
+
15
+ def log_info(self, *log_objects: object) -> None:
16
+ # IMPROVE: log a json message with context that can be parsed by OT
17
+ print(
18
+ f"[{self.job_definition.id}] in profile ({self.profile_metadata.name}): ",
19
+ *log_objects,
20
+ )
6
21
 
7
22
 
8
23
  @dataclass
9
24
  class JobArgumentsBase:
10
25
  job_definition: JobDefinition
26
+ profile_metadata: ProfileMetadata
11
27
  client: Client
28
+ logger: JobLogger
12
29
 
13
30
 
14
31
  @dataclass
@@ -37,6 +54,6 @@ class CronJob(Job):
37
54
  def run(self, args: CronJobArguments) -> JobResult: ...
38
55
 
39
56
 
40
- def register_job(cls: Job) -> Job:
57
+ def register_job(cls: type[Job]) -> type[Job]:
41
58
  cls._unc_job_registered = True
42
59
  return cls
@@ -0,0 +1,3 @@
1
+ from .retrieve_secret import retrieve_secret
2
+
3
+ __all__: list[str] = ["retrieve_secret"]