hid_data_transfer_lib 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. hid_data_transfer_lib-0.1.0/PKG-INFO +166 -0
  2. hid_data_transfer_lib-0.1.0/README.md +149 -0
  3. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/.env +1 -0
  4. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/__init__.py +0 -0
  5. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/conf/__init__.py +0 -0
  6. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/conf/__pycache__/__init__.cpython-311.pyc +0 -0
  7. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/conf/__pycache__/__init__.cpython-39.pyc +0 -0
  8. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/conf/__pycache__/cli_configuration.cpython-311.pyc +0 -0
  9. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/conf/__pycache__/cli_configuration.cpython-39.pyc +0 -0
  10. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/conf/__pycache__/conf.cpython-39.pyc +0 -0
  11. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/conf/__pycache__/hid_dt_configuration.cpython-311.pyc +0 -0
  12. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/conf/hid_dt.cfg +13 -0
  13. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/conf/hid_dt_configuration.py +395 -0
  14. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/conf/hid_dt_local.cfg +8 -0
  15. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/conf/hid_dt_psnc.cfg +13 -0
  16. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/exceptions/__init__.py +0 -0
  17. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/exceptions/__pycache__/__init__.cpython-311.pyc +0 -0
  18. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/exceptions/__pycache__/cli_exceptions.cpython-311.pyc +0 -0
  19. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/exceptions/__pycache__/hid_dt_exceptions.cpython-311.pyc +0 -0
  20. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/exceptions/hid_dt_exceptions.py +40 -0
  21. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/hid_dt_lib.py +746 -0
  22. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/keycloak/__init__.py +0 -0
  23. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/keycloak/__pycache__/__init__.cpython-311.pyc +0 -0
  24. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/keycloak/__pycache__/keycloak_api.cpython-311.pyc +0 -0
  25. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/keycloak/__pycache__/keycloak_rest.cpython-311.pyc +0 -0
  26. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/keycloak/keycloak_rest.py +236 -0
  27. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/nifi/__init__.py +0 -0
  28. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/nifi/__pycache__/__init__.cpython-311.pyc +0 -0
  29. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/nifi/__pycache__/__init__.cpython-39.pyc +0 -0
  30. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/nifi/__pycache__/client.cpython-311.pyc +0 -0
  31. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/nifi/__pycache__/client.cpython-39.pyc +0 -0
  32. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/nifi/__pycache__/nifi_client.cpython-311.pyc +0 -0
  33. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/nifi/__pycache__/nifi_rest.cpython-311.pyc +0 -0
  34. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/nifi/nifi_client.py +899 -0
  35. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/nifi/nifi_rest.py +644 -0
  36. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/py.typed +0 -0
  37. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/util/__init__.py +1 -0
  38. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/util/__pycache__/__init__.cpython-311.pyc +0 -0
  39. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/util/__pycache__/__init__.cpython-39.pyc +0 -0
  40. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/util/__pycache__/util.cpython-311.pyc +0 -0
  41. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/util/__pycache__/util.cpython-39.pyc +0 -0
  42. hid_data_transfer_lib-0.1.0/hid_data_transfer_lib/util/util.py +199 -0
  43. hid_data_transfer_lib-0.1.0/pyproject.toml +42 -0
@@ -0,0 +1,166 @@
1
+ Metadata-Version: 2.1
2
+ Name: hid_data_transfer_lib
3
+ Version: 0.1.0
4
+ Summary: HiDALGO Data Transfer library provides methods to transfer data between different data providers and consumers using NIFI pipelines
5
+ License: APL-2.0
6
+ Author: Jesús Gorroñogoitia
7
+ Author-email: jesus.gorronogoitia@eviden.com
8
+ Requires-Python: >=3.11,<4.0
9
+ Classifier: License :: Other/Proprietary License
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Requires-Dist: paramiko (>=3.3.1,<4.0.0)
14
+ Requires-Dist: requests (>=2.31.0,<3.0.0)
15
+ Description-Content-Type: text/markdown
16
+
17
+ # Hidalgo2 Data Transfer Tool
18
+ This repository contains the implementation of the Hidalgo2 data transfer library. It uses [Apache NIFI](https://nifi.apache.org/) to transfer data from different data sources to specified targets
19
+
20
+ ## Features
21
+ This library is planning to support the following features:
22
+ - transfer datasets from Cloud Providers to HDFS
23
+ - transfer datasets from Cloud Providers to CKAN
24
+ - transfer datasets from/to Hadoop HDFS to/from HPC
25
+ - transfer datasets from/to a CKAN to/from HPC
26
+ - transfer datasets from/to local filesystem to/from HPC
27
+ - transfer datasets from/to local filesystem to/from CKAN
28
+
29
+ ## Prototype
30
+ Current prototype of the library supports the following features:
31
+ - transfer datasets from/to Hadoop HDFS (without Kerberos security) to/from HPC (PoC)
32
+ - transfer datasets from/to a CKAN to/from HPC
33
+ - transfer datasets from/to local filesystem to/from CKAN
34
+
35
+
36
+ ## Implementation
37
+ This is a Python library that offers specialized API methods to transfer data from data sources to targets.
38
+ Each API method launches a NIFI pipeline, by instantiating a NIFI process group out of its workflow definition registered in the NIFI registry.
39
+ It uses the parameters given within the library method invocation to populate a NIFI parameter context that is asociated to the process group.
40
+ Then, processors in the process group are executed once (or until the incomining processor's flowfile queue gets empty), one after another, following the group sequence flow, until the flow is completed.
41
+ A processor is executed after the previous one has terminated. To check the status of the transfer command, the library offers another check-status command.
42
+ Upon termination, the NIFI environment is cleaned up, by removing the created entities (i.e. the process group and its paramenter context).
43
+ The Data Transfer Library sends requests to NIFI through its REST API.
44
+
45
+ ## Requirements
46
+ To use the Data Transfer library, it is required the following requirements:
47
+ - **Python3** execution environment
48
+ - **Poetry** python package management tool (optional)
49
+ - **NIFI** instance, and either an NIFI or KEYCLOAK user's account and a NIFI server ssh account
50
+ - **HDFS** instance
51
+ - **CKAN** instance, with an user APIKey
52
+
53
+ Python3 should be installed in the computer where Data Transfer CLI will be used.
54
+ To install Poetry, follows [this instructions](https://python-poetry.org/docs/#installing-with-the-official-installer)
55
+
56
+ ## Data Transfer lib configuration
57
+ ### Configuration file
58
+ Before using the Data Transfer library, you should configure it to point at the target NIFI. The configuration file is located, by default, at the *data_transfer_cli/conf/hid_dt.cfg* file. Otherwise, its location can be specified in the environement variable *HID_DT_CONFIG_FILE*
59
+
60
+ ```
61
+ [Nifi]
62
+ nifi_endpoint=https://nifi.hidalgo2.eu:9443
63
+ nifi_upload_folder=/opt/nifi/data/upload
64
+ nifi_download_folder=/opt/nifi/data/download
65
+ nifi_secure_connection=True
66
+
67
+ [Keycloak]
68
+ keycloak_endpoint=https://idm.hidalgo2.eu
69
+ keycloak_client_id=nifi
70
+ keycloak_client_secret=<keycloak_nifi_client_secret>
71
+ ```
72
+ Under the NIFI section,
73
+ - We define the url of the NIFI service (*nifi_endpoint*),
74
+ - We also specify a folder (*nifi_upload_folder*) in NIFI server where to upload files
75
+ - And another folder (*nifi_download_folder*) where from to download files. These folder must be accessible by the NIFI service (ask NIFI administrator for details).
76
+ - Additionally, you cat set if NIFI servers listens on a secure HTTPS connection (*nifi_secure_connection*=True) or on a non-secure HTTP (*nifi_secure_connection*=False)
77
+
78
+ Under the Keycloak section, you can configure the Keycloak integrated with NIFI, specifying:
79
+ - The Keycloak service endpoint (*keycloak_endpoint*)
80
+ - The NIFI client in Keycloak (*keycloak_client*)
81
+ - The NIFI secret in Keycloak (*keycloak_client_secret*)
82
+
83
+ HiDALGO2 developers can contact the Keycloak administrator for the *keycloak_client_secret*
84
+
85
+ ### User's accounts in environment variables
86
+
87
+ You must also specify a user account (username, private_key) that grants to upload/download files to the NIFI server (as requested to upload temporary HPC keys or to support local file transfer). This user's account is provided by Hidalgo2 infrastructure provider and it is user's or service's specific. This account is set up in the following environment variables
88
+ - NIFI_SERVER_USERNAME: `export NIFI_SERVER_USERNAME=<nifi_server_username>`
89
+ - NIFI_SERVER_PRIVATE_KEY: `export NIFI_SERVER_PRIVATE_KEY=<path_to_private_key>`
90
+
91
+ Additionally, a user account granted with access to the NIFI service must be specified, either a
92
+
93
+ #### A) NIFI User Account
94
+ The NIFI account must be configured in the following environment variables:
95
+ - NIFI_LOGIN: `export NIFI_LOGIN=<nifi_login>`
96
+ - NIFI_PASSWORD: `export NIFI_PASSWORD=<nifi_password>`
97
+
98
+ This NIFI account is provided by the NIFI administrator.
99
+
100
+ #### B) Keycloak Account with access to NIFI
101
+ The Keycloak account must be configured in the following environment variables:
102
+ - KEYCLOAK_LOGIN: `export KEYCLOAK_LOGIN=<keycloak_login>`
103
+ - KEYCLOAK_PASSWORD: `export KEYCLOAK_PASSWORD=<keycloak_password>`
104
+
105
+ For HiDALGO2 developers, NIFI (Service, Server) and Keycloak accounts are provided by the HiDALGO2 administrator.
106
+
107
+
108
+ ## Usage
109
+ The data transfer library can be invoked following this procedure:
110
+
111
+ - Provide NIFI server and Keycloak accounts in environment variables
112
+ ```
113
+ NIFI_SERVER_USERNAME=<nifi_server_username>
114
+ NIFI_SERVER_PRIVATE_KEY=<path_to_nifi_server_user_private_key>
115
+ KEYCLOAK_LOGIN=<keycloak_username>
116
+ KEYCLOAK_PASSWORD=<keycloak_password>
117
+ ```
118
+ - Customized above hid_dt.cfg and specify its path in the envirorment variable
119
+ `HID_DT_CONFIG_FILE=<path_to_data_transfer_configuration_file`
120
+
121
+ - In your python code, instantiate a HIDDataTransferConfiguration object and an HIDDataTranfer object
122
+ The HDIDataTransfer object can be created, by default, using the Keycloak account provided in the environment variables,
123
+ or by providing a dictionary with the Keycloak token, the refresh token, and the expiration time
124
+
125
+ ```
126
+ from hid_data_transfer_lib.hid_dt_lib import HIDDataTransfer
127
+ from hid_data_transfer_lib.conf.hid_dt_configuration import (
128
+ HidDataTransferConfiguration
129
+ )
130
+
131
+ config = HidDataTransferConfiguration()
132
+ # Create a HIDDataTransfer object that uses the Keycloak account provided in the environment variables
133
+ dt_client = HIDDataTransfer(conf=config, secure=True)
134
+
135
+ # OR
136
+
137
+ # Create a HIDDataTransfer object that uses the provided Keycloak token dictionary
138
+ keycloak_token = {
139
+ "username": <keycloak_username>,
140
+ "token": <keycloak_token>,
141
+ "expires_in": <keycloak_token_expires_in>,
142
+ "refresh_token": <keycloak_refresh_token>
143
+ }
144
+ dt_client = HIDDataTransfer(
145
+ conf=config,
146
+ secure=True,
147
+ keycloak_token=keycloak_token
148
+ )
149
+ ```
150
+ - Invoke any data transfer library method using the created object to tranfer data
151
+ ```
152
+ dt_client.ckan2hpc(
153
+ ckan_host=<ckan_endpoint>,
154
+ ckan_api_key=<ckan_apikey>,
155
+ ckan_organization=<ckan_organization>,
156
+ ckan_dataset=<ckan_dataset>,
157
+ ckan_resource=<ckan_resource>,
158
+ hpc_host=<hpc_endpoint>,
159
+ hpc_username=<hpc_username>,
160
+ hpc_secret_key_path=<hpc_secret_key>,
161
+ data_target=<hpc_target_folder>,
162
+ )
163
+ ```
164
+
165
+
166
+
@@ -0,0 +1,149 @@
1
+ # Hidalgo2 Data Transfer Tool
2
+ This repository contains the implementation of the Hidalgo2 data transfer library. It uses [Apache NIFI](https://nifi.apache.org/) to transfer data from different data sources to specified targets
3
+
4
+ ## Features
5
+ This library is planning to support the following features:
6
+ - transfer datasets from Cloud Providers to HDFS
7
+ - transfer datasets from Cloud Providers to CKAN
8
+ - transfer datasets from/to Hadoop HDFS to/from HPC
9
+ - transfer datasets from/to a CKAN to/from HPC
10
+ - transfer datasets from/to local filesystem to/from HPC
11
+ - transfer datasets from/to local filesystem to/from CKAN
12
+
13
+ ## Prototype
14
+ Current prototype of the library supports the following features:
15
+ - transfer datasets from/to Hadoop HDFS (without Kerberos security) to/from HPC (PoC)
16
+ - transfer datasets from/to a CKAN to/from HPC
17
+ - transfer datasets from/to local filesystem to/from CKAN
18
+
19
+
20
+ ## Implementation
21
+ This is a Python library that offers specialized API methods to transfer data from data sources to targets.
22
+ Each API method launches a NIFI pipeline, by instantiating a NIFI process group out of its workflow definition registered in the NIFI registry.
23
+ It uses the parameters given within the library method invocation to populate a NIFI parameter context that is asociated to the process group.
24
+ Then, processors in the process group are executed once (or until the incomining processor's flowfile queue gets empty), one after another, following the group sequence flow, until the flow is completed.
25
+ A processor is executed after the previous one has terminated. To check the status of the transfer command, the library offers another check-status command.
26
+ Upon termination, the NIFI environment is cleaned up, by removing the created entities (i.e. the process group and its paramenter context).
27
+ The Data Transfer Library sends requests to NIFI through its REST API.
28
+
29
+ ## Requirements
30
+ To use the Data Transfer library, it is required the following requirements:
31
+ - **Python3** execution environment
32
+ - **Poetry** python package management tool (optional)
33
+ - **NIFI** instance, and either an NIFI or KEYCLOAK user's account and a NIFI server ssh account
34
+ - **HDFS** instance
35
+ - **CKAN** instance, with an user APIKey
36
+
37
+ Python3 should be installed in the computer where Data Transfer CLI will be used.
38
+ To install Poetry, follows [this instructions](https://python-poetry.org/docs/#installing-with-the-official-installer)
39
+
40
+ ## Data Transfer lib configuration
41
+ ### Configuration file
42
+ Before using the Data Transfer library, you should configure it to point at the target NIFI. The configuration file is located, by default, at the *data_transfer_cli/conf/hid_dt.cfg* file. Otherwise, its location can be specified in the environement variable *HID_DT_CONFIG_FILE*
43
+
44
+ ```
45
+ [Nifi]
46
+ nifi_endpoint=https://nifi.hidalgo2.eu:9443
47
+ nifi_upload_folder=/opt/nifi/data/upload
48
+ nifi_download_folder=/opt/nifi/data/download
49
+ nifi_secure_connection=True
50
+
51
+ [Keycloak]
52
+ keycloak_endpoint=https://idm.hidalgo2.eu
53
+ keycloak_client_id=nifi
54
+ keycloak_client_secret=<keycloak_nifi_client_secret>
55
+ ```
56
+ Under the NIFI section,
57
+ - We define the url of the NIFI service (*nifi_endpoint*),
58
+ - We also specify a folder (*nifi_upload_folder*) in NIFI server where to upload files
59
+ - And another folder (*nifi_download_folder*) where from to download files. These folder must be accessible by the NIFI service (ask NIFI administrator for details).
60
+ - Additionally, you cat set if NIFI servers listens on a secure HTTPS connection (*nifi_secure_connection*=True) or on a non-secure HTTP (*nifi_secure_connection*=False)
61
+
62
+ Under the Keycloak section, you can configure the Keycloak integrated with NIFI, specifying:
63
+ - The Keycloak service endpoint (*keycloak_endpoint*)
64
+ - The NIFI client in Keycloak (*keycloak_client*)
65
+ - The NIFI secret in Keycloak (*keycloak_client_secret*)
66
+
67
+ HiDALGO2 developers can contact the Keycloak administrator for the *keycloak_client_secret*
68
+
69
+ ### User's accounts in environment variables
70
+
71
+ You must also specify a user account (username, private_key) that grants to upload/download files to the NIFI server (as requested to upload temporary HPC keys or to support local file transfer). This user's account is provided by Hidalgo2 infrastructure provider and it is user's or service's specific. This account is set up in the following environment variables
72
+ - NIFI_SERVER_USERNAME: `export NIFI_SERVER_USERNAME=<nifi_server_username>`
73
+ - NIFI_SERVER_PRIVATE_KEY: `export NIFI_SERVER_PRIVATE_KEY=<path_to_private_key>`
74
+
75
+ Additionally, a user account granted with access to the NIFI service must be specified, either a
76
+
77
+ #### A) NIFI User Account
78
+ The NIFI account must be configured in the following environment variables:
79
+ - NIFI_LOGIN: `export NIFI_LOGIN=<nifi_login>`
80
+ - NIFI_PASSWORD: `export NIFI_PASSWORD=<nifi_password>`
81
+
82
+ This NIFI account is provided by the NIFI administrator.
83
+
84
+ #### B) Keycloak Account with access to NIFI
85
+ The Keycloak account must be configured in the following environment variables:
86
+ - KEYCLOAK_LOGIN: `export KEYCLOAK_LOGIN=<keycloak_login>`
87
+ - KEYCLOAK_PASSWORD: `export KEYCLOAK_PASSWORD=<keycloak_password>`
88
+
89
+ For HiDALGO2 developers, NIFI (Service, Server) and Keycloak accounts are provided by the HiDALGO2 administrator.
90
+
91
+
92
+ ## Usage
93
+ The data transfer library can be invoked following this procedure:
94
+
95
+ - Provide NIFI server and Keycloak accounts in environment variables
96
+ ```
97
+ NIFI_SERVER_USERNAME=<nifi_server_username>
98
+ NIFI_SERVER_PRIVATE_KEY=<path_to_nifi_server_user_private_key>
99
+ KEYCLOAK_LOGIN=<keycloak_username>
100
+ KEYCLOAK_PASSWORD=<keycloak_password>
101
+ ```
102
+ - Customized above hid_dt.cfg and specify its path in the envirorment variable
103
+ `HID_DT_CONFIG_FILE=<path_to_data_transfer_configuration_file`
104
+
105
+ - In your python code, instantiate a HIDDataTransferConfiguration object and an HIDDataTranfer object
106
+ The HDIDataTransfer object can be created, by default, using the Keycloak account provided in the environment variables,
107
+ or by providing a dictionary with the Keycloak token, the refresh token, and the expiration time
108
+
109
+ ```
110
+ from hid_data_transfer_lib.hid_dt_lib import HIDDataTransfer
111
+ from hid_data_transfer_lib.conf.hid_dt_configuration import (
112
+ HidDataTransferConfiguration
113
+ )
114
+
115
+ config = HidDataTransferConfiguration()
116
+ # Create a HIDDataTransfer object that uses the Keycloak account provided in the environment variables
117
+ dt_client = HIDDataTransfer(conf=config, secure=True)
118
+
119
+ # OR
120
+
121
+ # Create a HIDDataTransfer object that uses the provided Keycloak token dictionary
122
+ keycloak_token = {
123
+ "username": <keycloak_username>,
124
+ "token": <keycloak_token>,
125
+ "expires_in": <keycloak_token_expires_in>,
126
+ "refresh_token": <keycloak_refresh_token>
127
+ }
128
+ dt_client = HIDDataTransfer(
129
+ conf=config,
130
+ secure=True,
131
+ keycloak_token=keycloak_token
132
+ )
133
+ ```
134
+ - Invoke any data transfer library method using the created object to tranfer data
135
+ ```
136
+ dt_client.ckan2hpc(
137
+ ckan_host=<ckan_endpoint>,
138
+ ckan_api_key=<ckan_apikey>,
139
+ ckan_organization=<ckan_organization>,
140
+ ckan_dataset=<ckan_dataset>,
141
+ ckan_resource=<ckan_resource>,
142
+ hpc_host=<hpc_endpoint>,
143
+ hpc_username=<hpc_username>,
144
+ hpc_secret_key_path=<hpc_secret_key>,
145
+ data_target=<hpc_target_folder>,
146
+ )
147
+ ```
148
+
149
+
@@ -0,0 +1 @@
1
+ PYTHONPATH=./src
@@ -0,0 +1,13 @@
1
+ [Nifi]
2
+ nifi_endpoint=https://nifi.hidalgo2.eu:9443
3
+ nifi_upload_folder=/opt/nifi/data/upload
4
+ nifi_download_folder=/opt/nifi/data/download
5
+ nifi_secure_connection=True
6
+
7
+ [Keycloak]
8
+ keycloak_endpoint=https://idm.hidalgo2.eu
9
+ keycloak_client_id=nifi
10
+ keycloak_client_secret=<keycloak_client_secret>
11
+
12
+ [Logging]
13
+ logging_level=INFO