UncountablePythonSDK 0.0.24__py3-none-any.whl → 0.0.131__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of UncountablePythonSDK might be problematic. Click here for more details.
- docs/conf.py +60 -8
- docs/index.md +107 -4
- docs/integration_examples/create_ingredient.md +43 -0
- docs/integration_examples/create_output.md +56 -0
- docs/integration_examples/index.md +6 -0
- docs/justfile +2 -2
- docs/requirements.txt +7 -5
- examples/async_batch.py +5 -6
- examples/basic_auth.py +7 -0
- examples/create_entity.py +4 -6
- examples/create_ingredient_sdk.py +34 -0
- examples/download_files.py +26 -0
- examples/edit_recipe_inputs.py +50 -0
- examples/integration-server/jobs/materials_auto/concurrent_cron.py +11 -0
- examples/integration-server/jobs/materials_auto/example_cron.py +21 -0
- examples/integration-server/jobs/materials_auto/example_http.py +47 -0
- examples/integration-server/jobs/materials_auto/example_instrument.py +100 -0
- examples/integration-server/jobs/materials_auto/example_parse.py +140 -0
- examples/integration-server/jobs/materials_auto/example_predictions.py +61 -0
- examples/integration-server/jobs/materials_auto/example_runsheet_wh.py +39 -0
- examples/integration-server/jobs/materials_auto/example_wh.py +23 -0
- examples/integration-server/jobs/materials_auto/profile.yaml +104 -0
- examples/integration-server/pyproject.toml +224 -0
- examples/invoke_uploader.py +26 -0
- examples/oauth.py +7 -0
- examples/set_recipe_metadata_file.py +40 -0
- examples/set_recipe_output_file_sdk.py +26 -0
- examples/upload_files.py +2 -3
- pkgs/argument_parser/__init__.py +9 -0
- pkgs/argument_parser/_is_namedtuple.py +3 -0
- pkgs/argument_parser/argument_parser.py +295 -74
- pkgs/argument_parser/case_convert.py +4 -3
- pkgs/filesystem_utils/__init__.py +20 -0
- pkgs/filesystem_utils/_blob_session.py +144 -0
- pkgs/filesystem_utils/_gdrive_session.py +309 -0
- pkgs/filesystem_utils/_local_session.py +69 -0
- pkgs/filesystem_utils/_s3_session.py +118 -0
- pkgs/filesystem_utils/_sftp_session.py +151 -0
- pkgs/filesystem_utils/file_type_utils.py +91 -0
- pkgs/filesystem_utils/filesystem_session.py +39 -0
- pkgs/py.typed +0 -0
- pkgs/serialization/__init__.py +8 -1
- pkgs/serialization/annotation.py +64 -0
- pkgs/serialization/missing_sentry.py +1 -1
- pkgs/serialization/opaque_key.py +1 -1
- pkgs/serialization/serial_alias.py +47 -0
- pkgs/serialization/serial_class.py +69 -54
- pkgs/serialization/serial_generic.py +16 -0
- pkgs/serialization/serial_union.py +84 -0
- pkgs/serialization/yaml.py +57 -0
- pkgs/serialization_util/__init__.py +7 -7
- pkgs/serialization_util/convert_to_snakecase.py +27 -0
- pkgs/serialization_util/dataclasses.py +14 -0
- pkgs/serialization_util/serialization_helpers.py +117 -71
- pkgs/type_spec/actions_registry/__main__.py +0 -4
- pkgs/type_spec/actions_registry/emit_typescript.py +5 -5
- pkgs/type_spec/builder.py +438 -109
- pkgs/type_spec/builder_types.py +9 -0
- pkgs/type_spec/config.py +52 -24
- pkgs/type_spec/cross_output_links.py +99 -0
- pkgs/type_spec/emit_io_ts.py +1 -1
- pkgs/type_spec/emit_open_api.py +160 -41
- pkgs/type_spec/emit_open_api_util.py +13 -7
- pkgs/type_spec/emit_python.py +450 -136
- pkgs/type_spec/emit_typescript.py +117 -250
- pkgs/type_spec/emit_typescript_util.py +293 -4
- pkgs/type_spec/load_types.py +20 -5
- pkgs/type_spec/non_discriminated_union_exceptions.py +14 -0
- pkgs/type_spec/open_api_util.py +29 -4
- pkgs/type_spec/parts/base.py.prepart +13 -10
- pkgs/type_spec/parts/base.ts.prepart +4 -0
- pkgs/type_spec/type_info/__main__.py +3 -1
- pkgs/type_spec/type_info/emit_type_info.py +161 -32
- pkgs/type_spec/ui_entry_actions/__init__.py +4 -0
- pkgs/type_spec/ui_entry_actions/generate_ui_entry_actions.py +308 -0
- pkgs/type_spec/util.py +4 -4
- pkgs/type_spec/value_spec/__main__.py +27 -10
- pkgs/type_spec/value_spec/convert_type.py +21 -1
- pkgs/type_spec/value_spec/emit_python.py +25 -7
- pkgs/type_spec/value_spec/types.py +1 -1
- uncountable/__init__.py +1 -2
- uncountable/core/__init__.py +11 -3
- uncountable/core/async_batch.py +16 -1
- uncountable/core/client.py +247 -52
- uncountable/core/environment.py +41 -0
- uncountable/core/file_upload.py +67 -22
- uncountable/core/types.py +8 -13
- uncountable/integration/cli.py +142 -0
- uncountable/integration/construct_client.py +43 -27
- uncountable/integration/cron.py +12 -11
- uncountable/integration/db/connect.py +12 -2
- uncountable/integration/db/session.py +25 -0
- uncountable/integration/entrypoint.py +4 -34
- uncountable/integration/executors/executors.py +147 -0
- uncountable/integration/executors/generic_upload_executor.py +336 -0
- uncountable/integration/executors/script_executor.py +15 -9
- uncountable/integration/http_server/__init__.py +5 -0
- uncountable/integration/http_server/types.py +69 -0
- uncountable/integration/job.py +246 -19
- uncountable/integration/queue_runner/__init__.py +0 -0
- uncountable/integration/queue_runner/command_server/__init__.py +28 -0
- uncountable/integration/queue_runner/command_server/command_client.py +133 -0
- uncountable/integration/queue_runner/command_server/command_server.py +142 -0
- uncountable/integration/queue_runner/command_server/constants.py +4 -0
- uncountable/integration/queue_runner/command_server/protocol/__init__.py +0 -0
- uncountable/integration/queue_runner/command_server/protocol/command_server.proto +58 -0
- uncountable/integration/queue_runner/command_server/protocol/command_server_pb2.py +57 -0
- uncountable/integration/queue_runner/command_server/protocol/command_server_pb2.pyi +114 -0
- uncountable/integration/queue_runner/command_server/protocol/command_server_pb2_grpc.py +264 -0
- uncountable/integration/queue_runner/command_server/types.py +75 -0
- uncountable/integration/queue_runner/datastore/__init__.py +3 -0
- uncountable/integration/queue_runner/datastore/datastore_sqlite.py +250 -0
- uncountable/integration/queue_runner/datastore/interface.py +29 -0
- uncountable/integration/queue_runner/datastore/model.py +24 -0
- uncountable/integration/queue_runner/job_scheduler.py +200 -0
- uncountable/integration/queue_runner/queue_runner.py +34 -0
- uncountable/integration/queue_runner/types.py +7 -0
- uncountable/integration/queue_runner/worker.py +116 -0
- uncountable/integration/scan_profiles.py +67 -0
- uncountable/integration/scheduler.py +199 -0
- uncountable/integration/secret_retrieval/__init__.py +3 -0
- uncountable/integration/secret_retrieval/retrieve_secret.py +93 -0
- uncountable/integration/server.py +103 -54
- uncountable/integration/telemetry.py +251 -0
- uncountable/integration/webhook_server/entrypoint.py +97 -0
- uncountable/types/__init__.py +149 -30
- uncountable/types/api/batch/execute_batch.py +16 -9
- uncountable/types/api/batch/execute_batch_load_async.py +13 -7
- uncountable/types/api/chemical/convert_chemical_formats.py +20 -8
- uncountable/types/api/condition_parameters/__init__.py +1 -0
- uncountable/types/api/condition_parameters/upsert_condition_match.py +72 -0
- uncountable/types/api/entity/create_entities.py +24 -12
- uncountable/types/api/entity/create_entity.py +22 -13
- uncountable/types/api/entity/create_or_update_entity.py +48 -0
- uncountable/types/api/entity/export_entities.py +59 -0
- uncountable/types/api/entity/get_entities_data.py +18 -9
- uncountable/types/api/entity/grant_entity_permissions.py +48 -0
- uncountable/types/api/entity/list_aggregate.py +79 -0
- uncountable/types/api/entity/list_entities.py +53 -14
- uncountable/types/api/entity/lock_entity.py +45 -0
- uncountable/types/api/entity/lookup_entity.py +116 -0
- uncountable/types/api/entity/resolve_entity_ids.py +19 -10
- uncountable/types/api/entity/set_entity_field_values.py +44 -0
- uncountable/types/api/entity/set_values.py +15 -8
- uncountable/types/api/entity/transition_entity_phase.py +27 -12
- uncountable/types/api/entity/unlock_entity.py +44 -0
- uncountable/types/api/equipment/__init__.py +1 -0
- uncountable/types/api/equipment/associate_equipment_input.py +43 -0
- uncountable/types/api/field_options/__init__.py +1 -0
- uncountable/types/api/field_options/upsert_field_options.py +55 -0
- uncountable/types/api/files/__init__.py +1 -0
- uncountable/types/api/files/download_file.py +77 -0
- uncountable/types/api/id_source/list_id_source.py +20 -11
- uncountable/types/api/id_source/match_id_source.py +15 -10
- uncountable/types/api/input_groups/get_input_group_names.py +16 -7
- uncountable/types/api/inputs/create_inputs.py +28 -14
- uncountable/types/api/inputs/get_input_data.py +34 -16
- uncountable/types/api/inputs/get_input_names.py +19 -10
- uncountable/types/api/inputs/get_inputs_data.py +29 -11
- uncountable/types/api/inputs/set_input_attribute_values.py +16 -10
- uncountable/types/api/inputs/set_input_category.py +44 -0
- uncountable/types/api/inputs/set_input_subcategories.py +45 -0
- uncountable/types/api/inputs/set_intermediate_type.py +50 -0
- uncountable/types/api/integrations/__init__.py +1 -0
- uncountable/types/api/integrations/publish_realtime_data.py +41 -0
- uncountable/types/api/integrations/push_notification.py +49 -0
- uncountable/types/api/integrations/register_sockets_token.py +41 -0
- uncountable/types/api/listing/__init__.py +1 -0
- uncountable/types/api/listing/fetch_listing.py +58 -0
- uncountable/types/api/material_families/__init__.py +1 -0
- uncountable/types/api/material_families/update_entity_material_families.py +47 -0
- uncountable/types/api/notebooks/__init__.py +1 -0
- uncountable/types/api/notebooks/add_notebook_content.py +119 -0
- uncountable/types/api/outputs/get_output_data.py +32 -17
- uncountable/types/api/outputs/get_output_names.py +18 -9
- uncountable/types/api/outputs/get_output_organization.py +173 -0
- uncountable/types/api/outputs/resolve_output_conditions.py +23 -11
- uncountable/types/api/permissions/set_core_permissions.py +31 -15
- uncountable/types/api/project/get_projects.py +20 -11
- uncountable/types/api/project/get_projects_data.py +23 -14
- uncountable/types/api/recipe_links/create_recipe_link.py +17 -10
- uncountable/types/api/recipe_links/remove_recipe_link.py +45 -0
- uncountable/types/api/recipe_metadata/get_recipe_metadata_data.py +19 -10
- uncountable/types/api/recipes/add_recipe_to_project.py +42 -0
- uncountable/types/api/recipes/add_time_series_data.py +64 -0
- uncountable/types/api/recipes/archive_recipes.py +14 -7
- uncountable/types/api/recipes/associate_recipe_as_input.py +16 -8
- uncountable/types/api/recipes/associate_recipe_as_lot.py +14 -7
- uncountable/types/api/recipes/clear_recipe_outputs.py +42 -0
- uncountable/types/api/recipes/create_mix_order.py +44 -0
- uncountable/types/api/recipes/create_recipe.py +21 -14
- uncountable/types/api/recipes/create_recipes.py +25 -13
- uncountable/types/api/recipes/disassociate_recipe_as_input.py +14 -7
- uncountable/types/api/recipes/edit_recipe_inputs.py +208 -19
- uncountable/types/api/recipes/get_column_calculation_values.py +57 -0
- uncountable/types/api/recipes/get_curve.py +15 -9
- uncountable/types/api/recipes/get_recipe_calculations.py +17 -11
- uncountable/types/api/recipes/get_recipe_links.py +14 -8
- uncountable/types/api/recipes/get_recipe_names.py +16 -7
- uncountable/types/api/recipes/get_recipe_output_metadata.py +16 -10
- uncountable/types/api/recipes/get_recipes_data.py +96 -45
- uncountable/types/api/recipes/lock_recipes.py +64 -0
- uncountable/types/api/recipes/remove_recipe_from_project.py +42 -0
- uncountable/types/api/recipes/set_recipe_inputs.py +19 -13
- uncountable/types/api/recipes/set_recipe_metadata.py +14 -7
- uncountable/types/api/recipes/set_recipe_output_annotations.py +114 -0
- uncountable/types/api/recipes/set_recipe_output_file.py +55 -0
- uncountable/types/api/recipes/set_recipe_outputs.py +40 -15
- uncountable/types/api/recipes/set_recipe_tags.py +30 -13
- uncountable/types/api/recipes/set_recipe_total.py +59 -0
- uncountable/types/api/recipes/unarchive_recipes.py +41 -0
- uncountable/types/api/recipes/unlock_recipes.py +51 -0
- uncountable/types/api/runsheet/__init__.py +1 -0
- uncountable/types/api/runsheet/complete_async_upload.py +41 -0
- uncountable/types/api/triggers/run_trigger.py +15 -8
- uncountable/types/api/uploader/__init__.py +1 -0
- uncountable/types/api/uploader/complete_async_parse.py +46 -0
- uncountable/types/api/uploader/invoke_uploader.py +46 -0
- uncountable/types/api/user/__init__.py +1 -0
- uncountable/types/api/user/get_current_user_info.py +40 -0
- uncountable/types/async_batch.py +8 -52
- uncountable/types/async_batch_processor.py +694 -18
- uncountable/types/async_batch_t.py +108 -0
- uncountable/types/async_jobs.py +8 -0
- uncountable/types/async_jobs_t.py +52 -0
- uncountable/types/auth_retrieval.py +11 -0
- uncountable/types/auth_retrieval_t.py +75 -0
- uncountable/types/base.py +5 -80
- uncountable/types/base_t.py +87 -0
- uncountable/types/calculations.py +3 -19
- uncountable/types/calculations_t.py +26 -0
- uncountable/types/chemical_structure.py +3 -23
- uncountable/types/chemical_structure_t.py +28 -0
- uncountable/types/client_base.py +1170 -88
- uncountable/types/client_config.py +8 -0
- uncountable/types/client_config_t.py +36 -0
- uncountable/types/curves.py +5 -43
- uncountable/types/curves_t.py +50 -0
- uncountable/types/data.py +12 -0
- uncountable/types/data_t.py +103 -0
- uncountable/types/entity.py +8 -270
- uncountable/types/entity_t.py +446 -0
- uncountable/types/experiment_groups.py +3 -19
- uncountable/types/experiment_groups_t.py +26 -0
- uncountable/types/exports.py +8 -0
- uncountable/types/exports_t.py +34 -0
- uncountable/types/field_values.py +25 -61
- uncountable/types/field_values_t.py +302 -0
- uncountable/types/fields.py +3 -20
- uncountable/types/fields_t.py +27 -0
- uncountable/types/generic_upload.py +14 -0
- uncountable/types/generic_upload_t.py +119 -0
- uncountable/types/id_source.py +7 -45
- uncountable/types/id_source_t.py +68 -0
- uncountable/types/identifier.py +6 -50
- uncountable/types/identifier_t.py +62 -0
- uncountable/types/input_attributes.py +3 -25
- uncountable/types/input_attributes_t.py +29 -0
- uncountable/types/inputs.py +6 -57
- uncountable/types/inputs_t.py +82 -0
- uncountable/types/integration_server.py +8 -0
- uncountable/types/integration_server_t.py +46 -0
- uncountable/types/integration_session.py +10 -0
- uncountable/types/integration_session_t.py +60 -0
- uncountable/types/integrations.py +10 -0
- uncountable/types/integrations_t.py +62 -0
- uncountable/types/job_definition.py +28 -0
- uncountable/types/job_definition_t.py +285 -0
- uncountable/types/listing.py +9 -0
- uncountable/types/listing_t.py +51 -0
- uncountable/types/notices.py +8 -0
- uncountable/types/notices_t.py +37 -0
- uncountable/types/notifications.py +11 -0
- uncountable/types/notifications_t.py +74 -0
- uncountable/types/outputs.py +3 -22
- uncountable/types/outputs_t.py +29 -0
- uncountable/types/overrides.py +9 -0
- uncountable/types/overrides_t.py +49 -0
- uncountable/types/permissions.py +3 -42
- uncountable/types/permissions_t.py +45 -0
- uncountable/types/phases.py +3 -19
- uncountable/types/phases_t.py +26 -0
- uncountable/types/post_base.py +3 -26
- uncountable/types/post_base_t.py +29 -0
- uncountable/types/queued_job.py +17 -0
- uncountable/types/queued_job_t.py +140 -0
- uncountable/types/recipe_identifiers.py +7 -58
- uncountable/types/recipe_identifiers_t.py +75 -0
- uncountable/types/recipe_inputs.py +4 -26
- uncountable/types/recipe_inputs_t.py +29 -0
- uncountable/types/recipe_links.py +4 -46
- uncountable/types/recipe_links_t.py +53 -0
- uncountable/types/recipe_metadata.py +5 -48
- uncountable/types/recipe_metadata_t.py +57 -0
- uncountable/types/recipe_output_metadata.py +3 -20
- uncountable/types/recipe_output_metadata_t.py +27 -0
- uncountable/types/recipe_tags.py +3 -19
- uncountable/types/recipe_tags_t.py +26 -0
- uncountable/types/recipe_workflow_steps.py +9 -73
- uncountable/types/recipe_workflow_steps_t.py +95 -0
- uncountable/types/recipes.py +7 -0
- uncountable/types/recipes_t.py +25 -0
- uncountable/types/response.py +3 -21
- uncountable/types/response_t.py +26 -0
- uncountable/types/secret_retrieval.py +11 -0
- uncountable/types/secret_retrieval_t.py +75 -0
- uncountable/types/sockets.py +20 -0
- uncountable/types/sockets_t.py +169 -0
- uncountable/types/structured_filters.py +25 -0
- uncountable/types/structured_filters_t.py +248 -0
- uncountable/types/units.py +3 -19
- uncountable/types/units_t.py +26 -0
- uncountable/types/uploader.py +24 -0
- uncountable/types/uploader_t.py +222 -0
- uncountable/types/users.py +3 -20
- uncountable/types/users_t.py +27 -0
- uncountable/types/webhook_job.py +9 -0
- uncountable/types/webhook_job_t.py +48 -0
- uncountable/types/workflows.py +4 -28
- uncountable/types/workflows_t.py +38 -0
- uncountablepythonsdk-0.0.131.dist-info/METADATA +64 -0
- uncountablepythonsdk-0.0.131.dist-info/RECORD +363 -0
- {UncountablePythonSDK-0.0.24.dist-info → uncountablepythonsdk-0.0.131.dist-info}/WHEEL +1 -1
- {UncountablePythonSDK-0.0.24.dist-info → uncountablepythonsdk-0.0.131.dist-info}/top_level.txt +0 -1
- UncountablePythonSDK-0.0.24.dist-info/METADATA +0 -47
- UncountablePythonSDK-0.0.24.dist-info/RECORD +0 -216
- docs/quickstart.md +0 -19
- examples/recipe-import/importer.py +0 -39
- type_spec/external/api/batch/execute_batch.yaml +0 -56
- type_spec/external/api/batch/execute_batch_load_async.yaml +0 -18
- type_spec/external/api/chemical/convert_chemical_formats.yaml +0 -33
- type_spec/external/api/entity/create_entities.yaml +0 -45
- type_spec/external/api/entity/create_entity.yaml +0 -51
- type_spec/external/api/entity/get_entities_data.yaml +0 -29
- type_spec/external/api/entity/list_entities.yaml +0 -52
- type_spec/external/api/entity/resolve_entity_ids.yaml +0 -29
- type_spec/external/api/entity/set_values.yaml +0 -18
- type_spec/external/api/entity/transition_entity_phase.yaml +0 -44
- type_spec/external/api/id_source/list_id_source.yaml +0 -35
- type_spec/external/api/id_source/match_id_source.yaml +0 -32
- type_spec/external/api/input_groups/get_input_group_names.yaml +0 -29
- type_spec/external/api/inputs/create_inputs.yaml +0 -48
- type_spec/external/api/inputs/get_input_data.yaml +0 -95
- type_spec/external/api/inputs/get_input_names.yaml +0 -38
- type_spec/external/api/inputs/get_inputs_data.yaml +0 -82
- type_spec/external/api/inputs/set_input_attribute_values.yaml +0 -33
- type_spec/external/api/outputs/get_output_data.yaml +0 -92
- type_spec/external/api/outputs/get_output_names.yaml +0 -35
- type_spec/external/api/outputs/resolve_output_conditions.yaml +0 -50
- type_spec/external/api/permissions/set_core_permissions.yaml +0 -69
- type_spec/external/api/project/get_projects.yaml +0 -42
- type_spec/external/api/project/get_projects_data.yaml +0 -50
- type_spec/external/api/recipe_links/create_recipe_link.yaml +0 -25
- type_spec/external/api/recipe_metadata/get_recipe_metadata_data.yaml +0 -41
- type_spec/external/api/recipes/archive_recipes.yaml +0 -20
- type_spec/external/api/recipes/associate_recipe_as_input.yaml +0 -19
- type_spec/external/api/recipes/associate_recipe_as_lot.yaml +0 -19
- type_spec/external/api/recipes/create_recipe.yaml +0 -39
- type_spec/external/api/recipes/create_recipes.yaml +0 -47
- type_spec/external/api/recipes/disassociate_recipe_as_input.yaml +0 -16
- type_spec/external/api/recipes/edit_recipe_inputs.yaml +0 -85
- type_spec/external/api/recipes/get_curve.yaml +0 -21
- type_spec/external/api/recipes/get_recipe_calculations.yaml +0 -39
- type_spec/external/api/recipes/get_recipe_links.yaml +0 -26
- type_spec/external/api/recipes/get_recipe_names.yaml +0 -29
- type_spec/external/api/recipes/get_recipe_output_metadata.yaml +0 -36
- type_spec/external/api/recipes/get_recipes_data.yaml +0 -244
- type_spec/external/api/recipes/set_recipe_inputs.yaml +0 -42
- type_spec/external/api/recipes/set_recipe_metadata.yaml +0 -20
- type_spec/external/api/recipes/set_recipe_outputs.yaml +0 -52
- type_spec/external/api/recipes/set_recipe_tags.yaml +0 -62
- type_spec/external/api/triggers/run_trigger.yaml +0 -18
- uncountable/integration/types.py +0 -89
|
@@ -4,9 +4,10 @@ import re
|
|
|
4
4
|
|
|
5
5
|
@functools.lru_cache(maxsize=500000)
|
|
6
6
|
def snake_to_camel_case(o: str) -> str:
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
7
|
+
tokens = o.split("_")
|
|
8
|
+
if len(tokens) < 2:
|
|
9
|
+
return o
|
|
10
|
+
return "".join([part.title() if i > 0 else part for i, part in enumerate(tokens)])
|
|
10
11
|
|
|
11
12
|
|
|
12
13
|
def kebab_to_pascal_case(o: str) -> str:
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
from ._gdrive_session import GDriveSession as GDriveSession
|
|
2
|
+
from ._gdrive_session import delete_gdrive_file as delete_gdrive_file
|
|
3
|
+
from ._gdrive_session import download_gdrive_file as download_gdrive_file
|
|
4
|
+
from ._gdrive_session import list_gdrive_files as list_gdrive_files
|
|
5
|
+
from ._gdrive_session import move_gdrive_file as move_gdrive_file
|
|
6
|
+
from ._gdrive_session import upload_file_gdrive as upload_file_gdrive
|
|
7
|
+
from ._local_session import LocalSession as LocalSession
|
|
8
|
+
from ._s3_session import S3Session as S3Session
|
|
9
|
+
from ._sftp_session import SFTPSession as SFTPSession
|
|
10
|
+
from ._sftp_session import list_sftp_files as list_sftp_files
|
|
11
|
+
from ._sftp_session import move_sftp_files as move_sftp_files
|
|
12
|
+
from .file_type_utils import FileObjectData as FileObjectData
|
|
13
|
+
from .file_type_utils import FileSystemFileReference as FileSystemFileReference
|
|
14
|
+
from .file_type_utils import FileSystemObject as FileSystemObject
|
|
15
|
+
from .file_type_utils import FileSystemS3Config as FileSystemS3Config
|
|
16
|
+
from .file_type_utils import FileSystemSFTPConfig as FileSystemSFTPConfig
|
|
17
|
+
from .file_type_utils import FileTransfer as FileTransfer
|
|
18
|
+
from .file_type_utils import IncompatibleFileReference as IncompatibleFileReference
|
|
19
|
+
from .file_type_utils import RemoteObjectReference as RemoteObjectReference
|
|
20
|
+
from .filesystem_session import FileSystemSession as FileSystemSession
|
|
@@ -0,0 +1,144 @@
|
|
|
1
|
+
from io import BytesIO
|
|
2
|
+
|
|
3
|
+
from azure.core.credentials import AzureSasCredential
|
|
4
|
+
from azure.storage.blob import BlobServiceClient, ContainerClient
|
|
5
|
+
|
|
6
|
+
from pkgs.filesystem_utils.file_type_utils import (
|
|
7
|
+
FileObjectData,
|
|
8
|
+
FileSystemBlobConfig,
|
|
9
|
+
FileSystemFileReference,
|
|
10
|
+
FileSystemObject,
|
|
11
|
+
FileTransfer,
|
|
12
|
+
IncompatibleFileReference,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
from .filesystem_session import FileSystemSession
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _add_slash(prefix: str) -> str:
|
|
19
|
+
if len(prefix) > 0 and prefix[-1] != "/":
|
|
20
|
+
prefix = prefix + "/"
|
|
21
|
+
return prefix
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class BlobSession(FileSystemSession):
|
|
25
|
+
config: FileSystemBlobConfig
|
|
26
|
+
|
|
27
|
+
def __init__(self, blob_config: FileSystemBlobConfig) -> None:
|
|
28
|
+
super().__init__()
|
|
29
|
+
self.config = blob_config
|
|
30
|
+
|
|
31
|
+
def start(self) -> None:
|
|
32
|
+
self.service_client: BlobServiceClient | None = BlobServiceClient(
|
|
33
|
+
self.config.account_url, credential=self.config.credential
|
|
34
|
+
)
|
|
35
|
+
self.container_client: ContainerClient | None = (
|
|
36
|
+
self.service_client.get_container_client(self.config.container)
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
def __enter__(self) -> "BlobSession":
|
|
40
|
+
self.start()
|
|
41
|
+
return self
|
|
42
|
+
|
|
43
|
+
def __exit__(self, exc_type: object, exc_val: object, exc_tb: object) -> None:
|
|
44
|
+
self.service_client = None
|
|
45
|
+
self.container_client = None
|
|
46
|
+
|
|
47
|
+
def list_files(
|
|
48
|
+
self,
|
|
49
|
+
dir_path: FileSystemObject,
|
|
50
|
+
*,
|
|
51
|
+
recursive: bool = False,
|
|
52
|
+
valid_extensions: list[str] | None = None,
|
|
53
|
+
) -> list[FileSystemObject]:
|
|
54
|
+
if not isinstance(dir_path, FileSystemFileReference):
|
|
55
|
+
raise IncompatibleFileReference()
|
|
56
|
+
|
|
57
|
+
assert self.service_client is not None and self.container_client is not None, (
|
|
58
|
+
"call to list_files on uninitialized blob session"
|
|
59
|
+
)
|
|
60
|
+
|
|
61
|
+
filesystem_file_references: list[FileSystemObject] = []
|
|
62
|
+
prefix = _add_slash(dir_path.filepath)
|
|
63
|
+
for blob in self.container_client.list_blobs(name_starts_with=prefix):
|
|
64
|
+
if not recursive and (
|
|
65
|
+
blob.name == prefix or "/" in blob.name[len(prefix) :]
|
|
66
|
+
):
|
|
67
|
+
continue
|
|
68
|
+
if valid_extensions is None or any(
|
|
69
|
+
blob.name.endswith(valid_extension)
|
|
70
|
+
for valid_extension in valid_extensions
|
|
71
|
+
):
|
|
72
|
+
filesystem_file_references.append(
|
|
73
|
+
FileSystemFileReference(
|
|
74
|
+
filepath=blob.name,
|
|
75
|
+
)
|
|
76
|
+
)
|
|
77
|
+
|
|
78
|
+
return filesystem_file_references
|
|
79
|
+
|
|
80
|
+
def download_files(
|
|
81
|
+
self,
|
|
82
|
+
filepaths: list[FileSystemObject],
|
|
83
|
+
) -> list[FileObjectData]:
|
|
84
|
+
downloaded_files: list[FileObjectData] = []
|
|
85
|
+
assert self.service_client is not None and self.container_client is not None, (
|
|
86
|
+
"call to download_files on uninitialized blob session"
|
|
87
|
+
)
|
|
88
|
+
|
|
89
|
+
for file_object in filepaths:
|
|
90
|
+
if (
|
|
91
|
+
not isinstance(file_object, FileSystemFileReference)
|
|
92
|
+
or file_object.filename is None
|
|
93
|
+
):
|
|
94
|
+
raise IncompatibleFileReference()
|
|
95
|
+
|
|
96
|
+
blob_client = self.container_client.get_blob_client(file_object.filepath)
|
|
97
|
+
download_stream = blob_client.download_blob()
|
|
98
|
+
file_data = download_stream.readall()
|
|
99
|
+
downloaded_files.append(
|
|
100
|
+
FileObjectData(
|
|
101
|
+
file_data=file_data,
|
|
102
|
+
file_IO=BytesIO(file_data),
|
|
103
|
+
filename=file_object.filename,
|
|
104
|
+
filepath=file_object.filepath,
|
|
105
|
+
)
|
|
106
|
+
)
|
|
107
|
+
|
|
108
|
+
return downloaded_files
|
|
109
|
+
|
|
110
|
+
def move_files(self, file_mappings: list[FileTransfer]) -> None:
|
|
111
|
+
assert self.service_client is not None and self.container_client is not None, (
|
|
112
|
+
"call to move_files on uninitialized blob session"
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
for src_file, dest_file in file_mappings:
|
|
116
|
+
if not isinstance(src_file, FileSystemFileReference) or not isinstance(
|
|
117
|
+
dest_file, FileSystemFileReference
|
|
118
|
+
):
|
|
119
|
+
raise IncompatibleFileReference()
|
|
120
|
+
|
|
121
|
+
source_blob_client = self.container_client.get_blob_client(
|
|
122
|
+
src_file.filepath
|
|
123
|
+
)
|
|
124
|
+
dest_blob_client = self.container_client.get_blob_client(dest_file.filepath)
|
|
125
|
+
|
|
126
|
+
source_url = (
|
|
127
|
+
f"{source_blob_client.url}?{self.config.credential.signature}"
|
|
128
|
+
if isinstance(self.config.credential, AzureSasCredential)
|
|
129
|
+
else source_blob_client.url
|
|
130
|
+
)
|
|
131
|
+
|
|
132
|
+
dest_blob_client.start_copy_from_url(source_url)
|
|
133
|
+
source_blob_client.delete_blob()
|
|
134
|
+
|
|
135
|
+
def delete_files(self, filepaths: list[FileSystemObject]) -> None:
|
|
136
|
+
assert self.service_client is not None and self.container_client is not None, (
|
|
137
|
+
"call to delete_files on uninitialized blob session"
|
|
138
|
+
)
|
|
139
|
+
for file_object in filepaths:
|
|
140
|
+
if not isinstance(file_object, FileSystemFileReference):
|
|
141
|
+
raise IncompatibleFileReference()
|
|
142
|
+
|
|
143
|
+
blob_client = self.container_client.get_blob_client(file_object.filepath)
|
|
144
|
+
blob_client.delete_blob()
|
|
@@ -0,0 +1,309 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from io import BytesIO
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
from google.oauth2 import service_account
|
|
6
|
+
from googleapiclient.discovery import build as build_gdrive_connection
|
|
7
|
+
from googleapiclient.errors import HttpError
|
|
8
|
+
from googleapiclient.http import MediaIoBaseDownload, MediaIoBaseUpload
|
|
9
|
+
from tqdm import tqdm
|
|
10
|
+
|
|
11
|
+
from pkgs.filesystem_utils.file_type_utils import (
|
|
12
|
+
FileObjectData,
|
|
13
|
+
FileSystemFileReference,
|
|
14
|
+
FileSystemObject,
|
|
15
|
+
FileTransfer,
|
|
16
|
+
IncompatibleFileReference,
|
|
17
|
+
RemoteObjectReference,
|
|
18
|
+
)
|
|
19
|
+
|
|
20
|
+
from .filesystem_session import FileSystemSession
|
|
21
|
+
|
|
22
|
+
# NOTE: google apis do not have static types
|
|
23
|
+
GDriveResource = Any
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def download_gdrive_file(
|
|
27
|
+
gdrive_connection: GDriveResource,
|
|
28
|
+
file_id: str,
|
|
29
|
+
filename: str,
|
|
30
|
+
mime_type: str,
|
|
31
|
+
*,
|
|
32
|
+
verbose: bool = False,
|
|
33
|
+
) -> FileObjectData | None:
|
|
34
|
+
if "folder" in mime_type:
|
|
35
|
+
if verbose:
|
|
36
|
+
print(f"{filename} is a folder and will not be downloaded.")
|
|
37
|
+
return None
|
|
38
|
+
elif "google-apps" in mime_type:
|
|
39
|
+
# Handle google workspace doc
|
|
40
|
+
if "spreadsheet" in mime_type:
|
|
41
|
+
if verbose:
|
|
42
|
+
print(f"{filename} is a Google Sheet, exporting.")
|
|
43
|
+
file_request = gdrive_connection.files().export_media(
|
|
44
|
+
fileId=file_id, mimeType="text/csv"
|
|
45
|
+
)
|
|
46
|
+
filename += ".csv"
|
|
47
|
+
elif "document" in mime_type:
|
|
48
|
+
if verbose:
|
|
49
|
+
print(f"{filename} is a Google Doc, exporting.")
|
|
50
|
+
file_request = gdrive_connection.files().export_media(
|
|
51
|
+
fileId=file_id, mimeType="application/msword"
|
|
52
|
+
)
|
|
53
|
+
filename += ".doc"
|
|
54
|
+
else:
|
|
55
|
+
if verbose:
|
|
56
|
+
print(f"{filename} is an unsupported google workspace filetype.")
|
|
57
|
+
print(f"Skipping. mimeType: {mime_type}.")
|
|
58
|
+
return None
|
|
59
|
+
else:
|
|
60
|
+
file_request = gdrive_connection.files().get_media(fileId=file_id)
|
|
61
|
+
|
|
62
|
+
file_handler = BytesIO()
|
|
63
|
+
downloader = MediaIoBaseDownload(file_handler, file_request)
|
|
64
|
+
download_complete = False
|
|
65
|
+
while not download_complete:
|
|
66
|
+
_status, download_complete = downloader.next_chunk()
|
|
67
|
+
|
|
68
|
+
file_handler.seek(0)
|
|
69
|
+
file_data = file_handler.read()
|
|
70
|
+
return FileObjectData(
|
|
71
|
+
file_data=file_data,
|
|
72
|
+
file_IO=BytesIO(file_data),
|
|
73
|
+
filename=filename,
|
|
74
|
+
filepath=file_id,
|
|
75
|
+
metadata={"id": file_id},
|
|
76
|
+
mime_type=mime_type,
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
def list_gdrive_files(
|
|
81
|
+
gdrive_connection: GDriveResource, gdrive_folder_id: str, *, recurse: bool = False
|
|
82
|
+
) -> list[dict[str, str]]:
|
|
83
|
+
query = f"parents = '{gdrive_folder_id}'"
|
|
84
|
+
print("Listing files", end="", flush=True)
|
|
85
|
+
paginated_files_in_folder = [
|
|
86
|
+
(
|
|
87
|
+
gdrive_connection.files()
|
|
88
|
+
.list(
|
|
89
|
+
q=query,
|
|
90
|
+
corpora="allDrives",
|
|
91
|
+
includeItemsFromAllDrives=True,
|
|
92
|
+
supportsAllDrives=True,
|
|
93
|
+
)
|
|
94
|
+
.execute()
|
|
95
|
+
)
|
|
96
|
+
]
|
|
97
|
+
while paginated_files_in_folder[-1].get("nextPageToken") is not None:
|
|
98
|
+
print(".", end="", flush=True)
|
|
99
|
+
paginated_files_in_folder.append(
|
|
100
|
+
gdrive_connection.files()
|
|
101
|
+
.list(
|
|
102
|
+
q=query,
|
|
103
|
+
corpora="allDrives",
|
|
104
|
+
includeItemsFromAllDrives=True,
|
|
105
|
+
supportsAllDrives=True,
|
|
106
|
+
pageToken=paginated_files_in_folder[-1]["nextPageToken"],
|
|
107
|
+
)
|
|
108
|
+
.execute()
|
|
109
|
+
)
|
|
110
|
+
print()
|
|
111
|
+
# Get available files: https://developers.google.com/drive/api/v3/manage-downloads#python
|
|
112
|
+
files: list[dict[str, str]] = []
|
|
113
|
+
for files_in_folder in paginated_files_in_folder:
|
|
114
|
+
files.extend(files_in_folder.get("files", []))
|
|
115
|
+
subfiles: list[dict[str, str]] = []
|
|
116
|
+
if recurse:
|
|
117
|
+
for file in files:
|
|
118
|
+
if file["mimeType"] == "application/vnd.google-apps.folder":
|
|
119
|
+
subfiles.extend(
|
|
120
|
+
list_gdrive_files(
|
|
121
|
+
gdrive_connection=gdrive_connection,
|
|
122
|
+
gdrive_folder_id=file["id"],
|
|
123
|
+
recurse=True,
|
|
124
|
+
)
|
|
125
|
+
)
|
|
126
|
+
return [*files, *subfiles]
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def upload_file_gdrive(
|
|
130
|
+
gdrive_connection: GDriveResource,
|
|
131
|
+
src_file: BytesIO,
|
|
132
|
+
mime_type: str,
|
|
133
|
+
dest_folder_id: str,
|
|
134
|
+
dest_filename: str,
|
|
135
|
+
) -> None:
|
|
136
|
+
file_metadata = {"name": dest_filename, "parents": [dest_folder_id]}
|
|
137
|
+
media = MediaIoBaseUpload(src_file, mimetype=mime_type)
|
|
138
|
+
try:
|
|
139
|
+
gdrive_connection.files().create(
|
|
140
|
+
body=file_metadata, media_body=media, fields="id", supportsAllDrives=True
|
|
141
|
+
).execute()
|
|
142
|
+
except HttpError:
|
|
143
|
+
print("FileSystemObject Upload to GDrive Unsuccessful")
|
|
144
|
+
|
|
145
|
+
|
|
146
|
+
def move_gdrive_file(
|
|
147
|
+
gdrive_connection: GDriveResource,
|
|
148
|
+
src_file_id: str,
|
|
149
|
+
dest_folder_id: str,
|
|
150
|
+
*,
|
|
151
|
+
dest_filename: str | None = None,
|
|
152
|
+
) -> None:
|
|
153
|
+
# Retrieve the existing parents to remove
|
|
154
|
+
file = (
|
|
155
|
+
gdrive_connection.files()
|
|
156
|
+
.get(fileId=src_file_id, fields="parents, name", supportsTeamDrives=True)
|
|
157
|
+
.execute()
|
|
158
|
+
)
|
|
159
|
+
|
|
160
|
+
new_filename = file["name"]
|
|
161
|
+
if dest_filename is not None:
|
|
162
|
+
new_filename = dest_filename
|
|
163
|
+
previous_parents = ",".join(file.get("parents"))
|
|
164
|
+
metadata = {"name": new_filename}
|
|
165
|
+
gdrive_connection.files().update(
|
|
166
|
+
fileId=src_file_id, body=metadata, fields="name", supportsTeamDrives=True
|
|
167
|
+
).execute()
|
|
168
|
+
gdrive_connection.files().update(
|
|
169
|
+
fileId=src_file_id,
|
|
170
|
+
addParents=dest_folder_id,
|
|
171
|
+
removeParents=previous_parents,
|
|
172
|
+
fields="id, parents",
|
|
173
|
+
supportsTeamDrives=True,
|
|
174
|
+
).execute()
|
|
175
|
+
|
|
176
|
+
|
|
177
|
+
def delete_gdrive_file(gdrive_connection: GDriveResource, file_id: str) -> None:
|
|
178
|
+
gdrive_connection.files().delete(fileId=file_id, supportsAllDrives=True).execute()
|
|
179
|
+
|
|
180
|
+
|
|
181
|
+
class GDriveSession(FileSystemSession):
|
|
182
|
+
def __init__(self, service_account_json_path: str) -> None:
|
|
183
|
+
super().__init__()
|
|
184
|
+
self.service_account_json_path = service_account_json_path
|
|
185
|
+
|
|
186
|
+
def start(self) -> None:
|
|
187
|
+
credentials = service_account.Credentials.from_service_account_file( # type: ignore[no-untyped-call]
|
|
188
|
+
self.service_account_json_path
|
|
189
|
+
)
|
|
190
|
+
gdrive_connection = build_gdrive_connection(
|
|
191
|
+
"drive", "v3", credentials=credentials
|
|
192
|
+
)
|
|
193
|
+
self.connection = gdrive_connection
|
|
194
|
+
|
|
195
|
+
def list_files(
|
|
196
|
+
self,
|
|
197
|
+
dir_path: FileSystemObject,
|
|
198
|
+
*,
|
|
199
|
+
recursive: bool = False,
|
|
200
|
+
valid_file_extensions: tuple[str, ...] | None = None,
|
|
201
|
+
) -> list[FileSystemObject]:
|
|
202
|
+
if not isinstance(dir_path, RemoteObjectReference):
|
|
203
|
+
raise IncompatibleFileReference(
|
|
204
|
+
"Incompatible FileSystemObject to GDriveSession.list_files"
|
|
205
|
+
)
|
|
206
|
+
if not dir_path.is_dir:
|
|
207
|
+
raise IncompatibleFileReference(
|
|
208
|
+
"FileSystemObject does not reference a directory"
|
|
209
|
+
)
|
|
210
|
+
files = list_gdrive_files(self.connection, dir_path.file_id, recurse=recursive)
|
|
211
|
+
gdrive_files: list[FileSystemObject] = []
|
|
212
|
+
for file_context in files:
|
|
213
|
+
if (
|
|
214
|
+
valid_file_extensions is not None
|
|
215
|
+
and os.path.splitext(file_context["name"])[1]
|
|
216
|
+
not in valid_file_extensions
|
|
217
|
+
):
|
|
218
|
+
continue
|
|
219
|
+
gdrive_files.append(
|
|
220
|
+
RemoteObjectReference(
|
|
221
|
+
file_id=file_context["id"],
|
|
222
|
+
mime_type=file_context["mimeType"],
|
|
223
|
+
filename=file_context["name"],
|
|
224
|
+
)
|
|
225
|
+
)
|
|
226
|
+
return gdrive_files
|
|
227
|
+
|
|
228
|
+
def delete_files(self, filepaths: list[FileSystemObject]) -> None:
|
|
229
|
+
"""Warning:
|
|
230
|
+
Security account must have sufficient permissions to perform delete!
|
|
231
|
+
https://developers.google.com/drive/api/v3/reference/files/delete?hl=en
|
|
232
|
+
https://developers.google.com/drive/api/v3/ref-roles
|
|
233
|
+
"""
|
|
234
|
+
for file_object in filepaths:
|
|
235
|
+
if not isinstance(file_object, RemoteObjectReference):
|
|
236
|
+
raise IncompatibleFileReference(
|
|
237
|
+
"Incompatible FileSystemObject provided to GDriveSession.delete_files"
|
|
238
|
+
)
|
|
239
|
+
delete_gdrive_file(self.connection, file_object.file_id)
|
|
240
|
+
|
|
241
|
+
def move_files(self, file_mappings: list[FileTransfer]) -> None:
|
|
242
|
+
for src_file, dest_file in file_mappings:
|
|
243
|
+
if (
|
|
244
|
+
isinstance(src_file, FileSystemFileReference)
|
|
245
|
+
or not isinstance(dest_file, RemoteObjectReference)
|
|
246
|
+
or not dest_file.is_dir
|
|
247
|
+
or (isinstance(src_file, RemoteObjectReference) and src_file.is_dir)
|
|
248
|
+
):
|
|
249
|
+
continue
|
|
250
|
+
new_filename = dest_file.filename
|
|
251
|
+
if isinstance(src_file, RemoteObjectReference):
|
|
252
|
+
if new_filename is not None:
|
|
253
|
+
move_gdrive_file(
|
|
254
|
+
self.connection,
|
|
255
|
+
src_file.file_id,
|
|
256
|
+
dest_file.file_id,
|
|
257
|
+
dest_filename=new_filename,
|
|
258
|
+
)
|
|
259
|
+
else:
|
|
260
|
+
move_gdrive_file(
|
|
261
|
+
self.connection, src_file.file_id, dest_file.file_id
|
|
262
|
+
)
|
|
263
|
+
elif isinstance(src_file, FileObjectData):
|
|
264
|
+
if src_file.mime_type is None:
|
|
265
|
+
raise IncompatibleFileReference(
|
|
266
|
+
"No mime_type present on source file data."
|
|
267
|
+
)
|
|
268
|
+
new_filename = src_file.filename
|
|
269
|
+
if dest_file.filename is not None:
|
|
270
|
+
new_filename = dest_file.filename
|
|
271
|
+
upload_file_gdrive(
|
|
272
|
+
self.connection,
|
|
273
|
+
src_file.file_IO,
|
|
274
|
+
src_file.mime_type,
|
|
275
|
+
dest_file.file_id,
|
|
276
|
+
new_filename,
|
|
277
|
+
)
|
|
278
|
+
else:
|
|
279
|
+
raise IncompatibleFileReference(
|
|
280
|
+
"Unrecognized file reference in FileTransfer object"
|
|
281
|
+
)
|
|
282
|
+
|
|
283
|
+
def download_files(self, filepaths: list[FileSystemObject]) -> list[FileObjectData]:
|
|
284
|
+
downloaded_files: list[FileObjectData] = []
|
|
285
|
+
print(f"Downloading {len(filepaths)} files")
|
|
286
|
+
for file_object in tqdm(filepaths):
|
|
287
|
+
if (
|
|
288
|
+
not isinstance(file_object, RemoteObjectReference)
|
|
289
|
+
or file_object.filename is None
|
|
290
|
+
):
|
|
291
|
+
raise IncompatibleFileReference(
|
|
292
|
+
"Incompatible FileSystemObject included in filepaths"
|
|
293
|
+
)
|
|
294
|
+
downloaded_file = download_gdrive_file(
|
|
295
|
+
self.connection,
|
|
296
|
+
file_object.file_id,
|
|
297
|
+
file_object.filename,
|
|
298
|
+
file_object.mime_type,
|
|
299
|
+
)
|
|
300
|
+
if downloaded_file is not None:
|
|
301
|
+
downloaded_files.append(downloaded_file)
|
|
302
|
+
return downloaded_files
|
|
303
|
+
|
|
304
|
+
def __enter__(self) -> "GDriveSession":
|
|
305
|
+
self.start()
|
|
306
|
+
return self
|
|
307
|
+
|
|
308
|
+
def __exit__(self, exc_type: object, exc_val: object, exc_tb: object) -> None:
|
|
309
|
+
self.connection.close()
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
import os
|
|
2
|
+
from io import BytesIO
|
|
3
|
+
|
|
4
|
+
from pkgs.filesystem_utils.file_type_utils import (
|
|
5
|
+
FileObjectData,
|
|
6
|
+
FileSystemFileReference,
|
|
7
|
+
FileSystemObject,
|
|
8
|
+
FileTransfer,
|
|
9
|
+
IncompatibleFileReference,
|
|
10
|
+
)
|
|
11
|
+
|
|
12
|
+
from .filesystem_session import FileSystemSession
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class LocalSession(FileSystemSession):
|
|
16
|
+
def __init__(self) -> None:
|
|
17
|
+
super().__init__()
|
|
18
|
+
|
|
19
|
+
def start(self) -> None:
|
|
20
|
+
return None
|
|
21
|
+
|
|
22
|
+
def __enter__(self) -> "LocalSession":
|
|
23
|
+
return self
|
|
24
|
+
|
|
25
|
+
def __exit__(self, exc_type: object, exc_val: object, exc_tb: object) -> None:
|
|
26
|
+
return None
|
|
27
|
+
|
|
28
|
+
def move_files(self, file_mappings: list[FileTransfer]) -> None:
|
|
29
|
+
for src_file, dest_file in file_mappings:
|
|
30
|
+
if not (
|
|
31
|
+
isinstance(src_file, FileSystemFileReference)
|
|
32
|
+
and isinstance(dest_file, FileSystemFileReference)
|
|
33
|
+
):
|
|
34
|
+
raise IncompatibleFileReference()
|
|
35
|
+
os.rename(src_file.filepath, dest_file.filepath)
|
|
36
|
+
|
|
37
|
+
def download_files(self, filepaths: list[FileSystemObject]) -> list[FileObjectData]:
|
|
38
|
+
downloaded_files: list[FileObjectData] = []
|
|
39
|
+
for file_object in filepaths:
|
|
40
|
+
if (
|
|
41
|
+
not isinstance(file_object, FileSystemFileReference)
|
|
42
|
+
or file_object.filename is None
|
|
43
|
+
):
|
|
44
|
+
raise IncompatibleFileReference()
|
|
45
|
+
with open(file_object.filepath, "rb") as file_data:
|
|
46
|
+
file_bytes = file_data.read()
|
|
47
|
+
downloaded_files.append(
|
|
48
|
+
FileObjectData(
|
|
49
|
+
file_bytes,
|
|
50
|
+
BytesIO(file_bytes),
|
|
51
|
+
file_object.filename,
|
|
52
|
+
filepath=file_object.filepath,
|
|
53
|
+
)
|
|
54
|
+
)
|
|
55
|
+
return downloaded_files
|
|
56
|
+
|
|
57
|
+
def list_files(
|
|
58
|
+
self, dir_path: FileSystemObject, *, recursive: bool = False
|
|
59
|
+
) -> list[FileSystemObject]:
|
|
60
|
+
if not isinstance(dir_path, FileSystemFileReference) or not os.path.isdir(
|
|
61
|
+
dir_path.filepath
|
|
62
|
+
):
|
|
63
|
+
raise IncompatibleFileReference()
|
|
64
|
+
if recursive:
|
|
65
|
+
raise NotImplementedError("recursive not implemented for local session")
|
|
66
|
+
return [
|
|
67
|
+
FileSystemFileReference(os.path.join(dir_path.filepath, filename))
|
|
68
|
+
for filename in os.listdir(dir_path.filepath)
|
|
69
|
+
]
|
|
@@ -0,0 +1,118 @@
|
|
|
1
|
+
from io import BytesIO
|
|
2
|
+
|
|
3
|
+
from boto3.session import Session
|
|
4
|
+
from mypy_boto3_s3.service_resource import Bucket
|
|
5
|
+
|
|
6
|
+
from pkgs.filesystem_utils.file_type_utils import (
|
|
7
|
+
FileObjectData,
|
|
8
|
+
FileSystemFileReference,
|
|
9
|
+
FileSystemObject,
|
|
10
|
+
FileSystemS3Config,
|
|
11
|
+
FileTransfer,
|
|
12
|
+
IncompatibleFileReference,
|
|
13
|
+
)
|
|
14
|
+
|
|
15
|
+
from .filesystem_session import FileSystemSession
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def _add_slash(prefix: str) -> str:
|
|
19
|
+
if len(prefix) > 0 and prefix[-1] != "/":
|
|
20
|
+
prefix = prefix + "/"
|
|
21
|
+
return prefix
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
class S3Session(FileSystemSession):
|
|
25
|
+
config: FileSystemS3Config
|
|
26
|
+
|
|
27
|
+
def __init__(self, s3_config: FileSystemS3Config) -> None:
|
|
28
|
+
super().__init__()
|
|
29
|
+
self.config = s3_config
|
|
30
|
+
|
|
31
|
+
def start(self) -> None:
|
|
32
|
+
session = Session(region_name=self.config.region_name)
|
|
33
|
+
s3_resource = session.resource(
|
|
34
|
+
service_name="s3",
|
|
35
|
+
endpoint_url=self.config.endpoint_url,
|
|
36
|
+
aws_access_key_id=self.config.access_key_id,
|
|
37
|
+
aws_secret_access_key=self.config.secret_access_key,
|
|
38
|
+
aws_session_token=self.config.session_token,
|
|
39
|
+
)
|
|
40
|
+
|
|
41
|
+
self.bucket: Bucket | None = s3_resource.Bucket(self.config.bucket_name)
|
|
42
|
+
|
|
43
|
+
def __enter__(self) -> "S3Session":
|
|
44
|
+
self.start()
|
|
45
|
+
return self
|
|
46
|
+
|
|
47
|
+
def __exit__(self, exc_type: object, exc_val: object, exc_tb: object) -> None:
|
|
48
|
+
self.bucket = None
|
|
49
|
+
|
|
50
|
+
def list_files(
|
|
51
|
+
self,
|
|
52
|
+
dir_path: FileSystemObject,
|
|
53
|
+
*,
|
|
54
|
+
recursive: bool = False,
|
|
55
|
+
valid_extensions: list[str] | None = None,
|
|
56
|
+
) -> list[FileSystemObject]:
|
|
57
|
+
if not isinstance(dir_path, FileSystemFileReference):
|
|
58
|
+
raise IncompatibleFileReference()
|
|
59
|
+
|
|
60
|
+
assert self.bucket is not None, "call to list_files on uninitialized s3 session"
|
|
61
|
+
|
|
62
|
+
filesystem_references: list[FileSystemObject] = []
|
|
63
|
+
prefix = _add_slash(dir_path.filepath)
|
|
64
|
+
for obj in self.bucket.objects.filter(Prefix=prefix):
|
|
65
|
+
if not recursive and (obj.key == prefix or "/" in obj.key[len(prefix) :]):
|
|
66
|
+
continue
|
|
67
|
+
if valid_extensions is None or any(
|
|
68
|
+
obj.key.endswith(valid_extension)
|
|
69
|
+
for valid_extension in valid_extensions
|
|
70
|
+
):
|
|
71
|
+
filesystem_references.append(FileSystemFileReference(obj.key))
|
|
72
|
+
|
|
73
|
+
return filesystem_references
|
|
74
|
+
|
|
75
|
+
def download_files(
|
|
76
|
+
self,
|
|
77
|
+
filepaths: list[FileSystemObject],
|
|
78
|
+
) -> list[FileObjectData]:
|
|
79
|
+
downloaded_files: list[FileObjectData] = []
|
|
80
|
+
assert self.bucket is not None, (
|
|
81
|
+
"call to download_files on uninitialized s3 session"
|
|
82
|
+
)
|
|
83
|
+
|
|
84
|
+
for file_object in filepaths:
|
|
85
|
+
if (
|
|
86
|
+
not isinstance(file_object, FileSystemFileReference)
|
|
87
|
+
or file_object.filename is None
|
|
88
|
+
):
|
|
89
|
+
raise IncompatibleFileReference()
|
|
90
|
+
s3_file_obj = self.bucket.Object(file_object.filepath)
|
|
91
|
+
response = s3_file_obj.get()
|
|
92
|
+
file_obj_bytes = response["Body"].read()
|
|
93
|
+
downloaded_files.append(
|
|
94
|
+
FileObjectData(
|
|
95
|
+
file_data=file_obj_bytes,
|
|
96
|
+
file_IO=BytesIO(file_obj_bytes),
|
|
97
|
+
filename=file_object.filename,
|
|
98
|
+
filepath=file_object.filepath,
|
|
99
|
+
)
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
return downloaded_files
|
|
103
|
+
|
|
104
|
+
def move_files(self, file_mappings: list[FileTransfer]) -> None:
|
|
105
|
+
assert self.bucket is not None, "call to move_files on uninitialized s3 session"
|
|
106
|
+
|
|
107
|
+
for src_file, dest_file in file_mappings:
|
|
108
|
+
if not isinstance(src_file, FileSystemFileReference) or not isinstance(
|
|
109
|
+
dest_file, FileSystemFileReference
|
|
110
|
+
):
|
|
111
|
+
raise IncompatibleFileReference()
|
|
112
|
+
self.bucket.Object(dest_file.filepath).copy_from(
|
|
113
|
+
CopySource={
|
|
114
|
+
"Bucket": self.bucket.name,
|
|
115
|
+
"Key": src_file.filepath,
|
|
116
|
+
}
|
|
117
|
+
)
|
|
118
|
+
self.bucket.Object(src_file.filepath).delete()
|