altastata 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- altastata-0.1.0/PKG-INFO +114 -0
- altastata-0.1.0/README.md +91 -0
- altastata-0.1.0/altastata/__init__.py +25 -0
- altastata-0.1.0/altastata/altastata_functions.py +199 -0
- altastata-0.1.0/altastata/altastata_pytorch_dataset.py +283 -0
- altastata-0.1.0/altastata/altastata_tensorflow_dataset.py +409 -0
- altastata-0.1.0/altastata/base_gateway.py +76 -0
- altastata-0.1.0/altastata.egg-info/PKG-INFO +114 -0
- altastata-0.1.0/altastata.egg-info/SOURCES.txt +13 -0
- altastata-0.1.0/altastata.egg-info/dependency_links.txt +1 -0
- altastata-0.1.0/altastata.egg-info/requires.txt +1 -0
- altastata-0.1.0/altastata.egg-info/top_level.txt +1 -0
- altastata-0.1.0/pyproject.toml +3 -0
- altastata-0.1.0/setup.cfg +4 -0
- altastata-0.1.0/setup.py +27 -0
altastata-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: altastata
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: A Python package for Altastata data processing and machine learning integration
|
|
5
|
+
Home-page: https://github.com/sergevil/altastata-python-package
|
|
6
|
+
Author: Serge Vilvovsky
|
|
7
|
+
Author-email: serge.vilvovsky@altastata.com
|
|
8
|
+
Classifier: Programming Language :: Python :: 3
|
|
9
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
10
|
+
Classifier: Operating System :: OS Independent
|
|
11
|
+
Requires-Python: >=3.6
|
|
12
|
+
Description-Content-Type: text/markdown
|
|
13
|
+
Requires-Dist: py4j==0.10.9.5
|
|
14
|
+
Dynamic: author
|
|
15
|
+
Dynamic: author-email
|
|
16
|
+
Dynamic: classifier
|
|
17
|
+
Dynamic: description
|
|
18
|
+
Dynamic: description-content-type
|
|
19
|
+
Dynamic: home-page
|
|
20
|
+
Dynamic: requires-dist
|
|
21
|
+
Dynamic: requires-python
|
|
22
|
+
Dynamic: summary
|
|
23
|
+
|
|
24
|
+
# Make sure you have py4j0.10.9.8.jar or similar at altastata/lib directory
|
|
25
|
+
|
|
26
|
+
# for example for Windows
|
|
27
|
+
cp /c/Users/serge/AppData/Local/Packages/PythonSoftwareFoundation.Python.3.13_qbz5n2kfra8p0/LocalCache/local-packages/share/py4j/py4j0.10.9.8.jar altastata/lib/
|
|
28
|
+
|
|
29
|
+
# Make sure you have altastata-hadoop jar (created without bouncy castle) and separate bouncy castle jars
|
|
30
|
+
|
|
31
|
+
# for example
|
|
32
|
+
# go to altastata-hadoop
|
|
33
|
+
|
|
34
|
+
gradle clean build shadowJar -PexcludeBouncyCastle=true copyDeps
|
|
35
|
+
|
|
36
|
+
# to build this one
|
|
37
|
+
|
|
38
|
+
cp ../mycloud/altastata-hadoop/build/libs/altastata-hadoop-all.jar altastata/lib/
|
|
39
|
+
cp ../mycloud/altastata-hadoop/build/libs_dependency/bc*-jdk18on-*.jar altastata/lib/
|
|
40
|
+
|
|
41
|
+
# verify that the jar is ok (it was corrupted in Linux)
|
|
42
|
+
jar tf altastata/lib/py4j0.10.9.5.jar | grep GatewayServer
|
|
43
|
+
|
|
44
|
+
# if py4j file is corrupted, run
|
|
45
|
+
wget https://repo1.maven.org/maven2/net/sf/py4j/py4j/0.10.9.5/py4j-0.10.9.5.jar -O altastata/lib/py4j0.10.9.5.jar
|
|
46
|
+
|
|
47
|
+
# if you want to change the logs level copy and modify this file
|
|
48
|
+
cp ../mycloud/altastata-hadoop/src/main/resources/logback.xml altastata/lib/
|
|
49
|
+
|
|
50
|
+
# install
|
|
51
|
+
pip install -e .
|
|
52
|
+
|
|
53
|
+
# test
|
|
54
|
+
python test_script.py
|
|
55
|
+
|
|
56
|
+
# build docker
|
|
57
|
+
docker buildx build --platform linux/amd64,linux/arm64 --push -t ghcr.io/sergevil/altastata/jupyter-datascience:2024a_latest -f openshift/Dockerfile .
|
|
58
|
+
|
|
59
|
+
# push to the registry if needed
|
|
60
|
+
docker push ghcr.io/sergevil/altastata/jupyter-datascience:2024a_latest
|
|
61
|
+
|
|
62
|
+
# run docker
|
|
63
|
+
docker run --name altastata-jupyter -d -p 8888:8888 -v /Users/sergevilvovsky/.altastata:/opt/app-root/src/.altastata:rw -v /Users/sergevilvovsky/Desktop:/opt/app-root/src/Desktop:rw ghcr.io/sergevil/altastata/jupyter-datascience:2024a_latest
|
|
64
|
+
|
|
65
|
+
## Usage in Python Code
|
|
66
|
+
|
|
67
|
+
```python
|
|
68
|
+
from altastata import AltaStataFunctions
|
|
69
|
+
from altastata.altastata_pytorch_dataset import register_altastata_functions
|
|
70
|
+
|
|
71
|
+
# Configuration parameters
|
|
72
|
+
user_properties = """#My Properties
|
|
73
|
+
#Sun Jan 05 12:10:23 EST 2025
|
|
74
|
+
AWSSecretKey=vcJXbtg/YGApAUpY9sjsj1xvmpz9MUPTYMxY+hDn5zZ3Fmc1BuVS34zoTRDQJ7XAvu2Z0+piCEN3TA5OArj77FlL4doYDZx7YWXUopwUhMVyBvP+gT4buHc3hkf1FvHYElbUe3yX/57fnaYP1Nwg1zN9fupzEOGtCMjy39e9Xj4vvVgXo/+YW6ogG8uXi5JA9Fm2aG7hEWQstjwu5shcMT+Q6BR2SOtkAB8B9gYlCIt7ciJ4ikkAKqtfQ8TWkOsN
|
|
75
|
+
media-player=vlcj
|
|
76
|
+
myuser=bob123
|
|
77
|
+
accounttype=amazon-s3-secure
|
|
78
|
+
AWSAccessKeyId=ZWnrkxX43me3l1YBCGX42RhdzXmhP4q4rEOcquLZJIFWCEA9+sVA+hnRYTFcJoJ5nn0luDmQJJkYaayvtAP1IG6/0h4d4sWb+1NQ/hVozOdQMezUSp+z2Wruv4WX6TQpmz12N7zqQALMDD6qi5hTiv2QLJY084ufcoMZzmK1E0uw3jTG6Pci03Zy8TFbhhbuag88Stc9thyoN44ou/d5/8Id0AruvE0EK2Q7Jg0AZZI\\=
|
|
79
|
+
region=us-east-1
|
|
80
|
+
kms-region=us-east-2
|
|
81
|
+
metadata-encryption=RSA
|
|
82
|
+
password-timeout-interval=9000000000
|
|
83
|
+
acccontainer-prefix=altastata-myorgrsa444-
|
|
84
|
+
logging.level.root=WARN
|
|
85
|
+
logging.level.com.altastata=WARN
|
|
86
|
+
logging.level.org.apache.http=ERROR
|
|
87
|
+
logging.level.software.amazon=ERROR"""
|
|
88
|
+
|
|
89
|
+
private_key = """-----BEGIN RSA PRIVATE KEY-----
|
|
90
|
+
Proc-Type: 4,ENCRYPTED
|
|
91
|
+
DEK-Info: DES-EDE3,F26EBECE6DDAEC52
|
|
92
|
+
|
|
93
|
+
poe21ejZGZQ0GOe+EJjDdJpNvJcq/Yig9aYXY2rCGyxXLGVFeYJFg7z6gMCjIpSd
|
|
94
|
+
aprW/0R8L1a2TKbs7f4K5LkSAZ98cd7N45DtIR6B4JFrDGK3LI48/XH3GT3c4OfS
|
|
95
|
+
3LYldvy4XeIOAtOTTCoyhN0145ZLSoeEQ7MO3rGK0va3RGLtPWKgeZXH9j5O1Ch4
|
|
96
|
+
BvPGMaKapUcgc1slj1GI4Lr+MDSrJKnUNovnVTIClS2rXTEkTri3cPLwcgWjyQIi
|
|
97
|
+
BKVnobUD8Gm9irtUD6GeHrkz6Z7ELF3ctSBRSYCg+1FCvRBuljmS2C2aIiE1cu0/
|
|
98
|
+
6KcqBnjEPAs250832uhAkZWj5WedIwJv+sJoGJaAUWyOfgG7DHa2HuKeR9KPD2kS
|
|
99
|
+
6EygoQtQlXgSvdgZNALtIEfStmnrblTyP9Bh4JU9UzKnE6Tu5h7CjyuzkE0wgIXB
|
|
100
|
+
RxgfbURfdDWs22ujLBbWPGfdY+KdNrnmSqxYahKtq6B+99+xuI0GMzX3/rLpOdF0
|
|
101
|
+
AGwfa1xNe8/B/Nt+e2FXIhT2xOuH8K3sDn3/FKwy1qIsK+4g5iL6Q0xj07ujkiSI
|
|
102
|
+
wZ0X2gtg3L2DW8Y6B8gBdSmDGH+vNX5/CLNn9Ly1VUoMGgs4fUmd3FFZTxiIbpim
|
|
103
|
+
rQgQBHP4l1NsSqDrEyplKG83ejloLaVG+hUY1MGv5tF7B1Ta7j8bwoMTmyVCtCrC
|
|
104
|
+
P+a7ShdrBUsD2TDhilZhwZcWl0a+FfzR47+faJs/9pSTkyFFp3D4xgKAdME1lvcI
|
|
105
|
+
wV5BUmp5CEmbeB4r/+BlFttRZBLBXT1sq80YyQIVLumq0Livao9mOg==
|
|
106
|
+
-----END RSA PRIVATE KEY-----"""
|
|
107
|
+
|
|
108
|
+
# Create an instance of AltaStataFunctions
|
|
109
|
+
altastata_functions = AltaStataFunctions.from_credentials(user_properties, private_key)
|
|
110
|
+
altastata_functions.set_password("123")
|
|
111
|
+
|
|
112
|
+
# register the altastata functions
|
|
113
|
+
register_altastata_functions(altastata_functions, "bob123_rsa")
|
|
114
|
+
```
|
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
# Make sure you have py4j0.10.9.8.jar or similar at altastata/lib directory
|
|
2
|
+
|
|
3
|
+
# for example for Windows
|
|
4
|
+
cp /c/Users/serge/AppData/Local/Packages/PythonSoftwareFoundation.Python.3.13_qbz5n2kfra8p0/LocalCache/local-packages/share/py4j/py4j0.10.9.8.jar altastata/lib/
|
|
5
|
+
|
|
6
|
+
# Make sure you have altastata-hadoop jar (created without bouncy castle) and separate bouncy castle jars
|
|
7
|
+
|
|
8
|
+
# for example
|
|
9
|
+
# go to altastata-hadoop
|
|
10
|
+
|
|
11
|
+
gradle clean build shadowJar -PexcludeBouncyCastle=true copyDeps
|
|
12
|
+
|
|
13
|
+
# to build this one
|
|
14
|
+
|
|
15
|
+
cp ../mycloud/altastata-hadoop/build/libs/altastata-hadoop-all.jar altastata/lib/
|
|
16
|
+
cp ../mycloud/altastata-hadoop/build/libs_dependency/bc*-jdk18on-*.jar altastata/lib/
|
|
17
|
+
|
|
18
|
+
# verify that the jar is ok (it was corrupted in Linux)
|
|
19
|
+
jar tf altastata/lib/py4j0.10.9.5.jar | grep GatewayServer
|
|
20
|
+
|
|
21
|
+
# if py4j file is corrupted, run
|
|
22
|
+
wget https://repo1.maven.org/maven2/net/sf/py4j/py4j/0.10.9.5/py4j-0.10.9.5.jar -O altastata/lib/py4j0.10.9.5.jar
|
|
23
|
+
|
|
24
|
+
# if you want to change the logs level copy and modify this file
|
|
25
|
+
cp ../mycloud/altastata-hadoop/src/main/resources/logback.xml altastata/lib/
|
|
26
|
+
|
|
27
|
+
# install
|
|
28
|
+
pip install -e .
|
|
29
|
+
|
|
30
|
+
# test
|
|
31
|
+
python test_script.py
|
|
32
|
+
|
|
33
|
+
# build docker
|
|
34
|
+
docker buildx build --platform linux/amd64,linux/arm64 --push -t ghcr.io/sergevil/altastata/jupyter-datascience:2024a_latest -f openshift/Dockerfile .
|
|
35
|
+
|
|
36
|
+
# push to the registry if needed
|
|
37
|
+
docker push ghcr.io/sergevil/altastata/jupyter-datascience:2024a_latest
|
|
38
|
+
|
|
39
|
+
# run docker
|
|
40
|
+
docker run --name altastata-jupyter -d -p 8888:8888 -v /Users/sergevilvovsky/.altastata:/opt/app-root/src/.altastata:rw -v /Users/sergevilvovsky/Desktop:/opt/app-root/src/Desktop:rw ghcr.io/sergevil/altastata/jupyter-datascience:2024a_latest
|
|
41
|
+
|
|
42
|
+
## Usage in Python Code
|
|
43
|
+
|
|
44
|
+
```python
|
|
45
|
+
from altastata import AltaStataFunctions
|
|
46
|
+
from altastata.altastata_pytorch_dataset import register_altastata_functions
|
|
47
|
+
|
|
48
|
+
# Configuration parameters
|
|
49
|
+
user_properties = """#My Properties
|
|
50
|
+
#Sun Jan 05 12:10:23 EST 2025
|
|
51
|
+
AWSSecretKey=vcJXbtg/YGApAUpY9sjsj1xvmpz9MUPTYMxY+hDn5zZ3Fmc1BuVS34zoTRDQJ7XAvu2Z0+piCEN3TA5OArj77FlL4doYDZx7YWXUopwUhMVyBvP+gT4buHc3hkf1FvHYElbUe3yX/57fnaYP1Nwg1zN9fupzEOGtCMjy39e9Xj4vvVgXo/+YW6ogG8uXi5JA9Fm2aG7hEWQstjwu5shcMT+Q6BR2SOtkAB8B9gYlCIt7ciJ4ikkAKqtfQ8TWkOsN
|
|
52
|
+
media-player=vlcj
|
|
53
|
+
myuser=bob123
|
|
54
|
+
accounttype=amazon-s3-secure
|
|
55
|
+
AWSAccessKeyId=ZWnrkxX43me3l1YBCGX42RhdzXmhP4q4rEOcquLZJIFWCEA9+sVA+hnRYTFcJoJ5nn0luDmQJJkYaayvtAP1IG6/0h4d4sWb+1NQ/hVozOdQMezUSp+z2Wruv4WX6TQpmz12N7zqQALMDD6qi5hTiv2QLJY084ufcoMZzmK1E0uw3jTG6Pci03Zy8TFbhhbuag88Stc9thyoN44ou/d5/8Id0AruvE0EK2Q7Jg0AZZI\\=
|
|
56
|
+
region=us-east-1
|
|
57
|
+
kms-region=us-east-2
|
|
58
|
+
metadata-encryption=RSA
|
|
59
|
+
password-timeout-interval=9000000000
|
|
60
|
+
acccontainer-prefix=altastata-myorgrsa444-
|
|
61
|
+
logging.level.root=WARN
|
|
62
|
+
logging.level.com.altastata=WARN
|
|
63
|
+
logging.level.org.apache.http=ERROR
|
|
64
|
+
logging.level.software.amazon=ERROR"""
|
|
65
|
+
|
|
66
|
+
private_key = """-----BEGIN RSA PRIVATE KEY-----
|
|
67
|
+
Proc-Type: 4,ENCRYPTED
|
|
68
|
+
DEK-Info: DES-EDE3,F26EBECE6DDAEC52
|
|
69
|
+
|
|
70
|
+
poe21ejZGZQ0GOe+EJjDdJpNvJcq/Yig9aYXY2rCGyxXLGVFeYJFg7z6gMCjIpSd
|
|
71
|
+
aprW/0R8L1a2TKbs7f4K5LkSAZ98cd7N45DtIR6B4JFrDGK3LI48/XH3GT3c4OfS
|
|
72
|
+
3LYldvy4XeIOAtOTTCoyhN0145ZLSoeEQ7MO3rGK0va3RGLtPWKgeZXH9j5O1Ch4
|
|
73
|
+
BvPGMaKapUcgc1slj1GI4Lr+MDSrJKnUNovnVTIClS2rXTEkTri3cPLwcgWjyQIi
|
|
74
|
+
BKVnobUD8Gm9irtUD6GeHrkz6Z7ELF3ctSBRSYCg+1FCvRBuljmS2C2aIiE1cu0/
|
|
75
|
+
6KcqBnjEPAs250832uhAkZWj5WedIwJv+sJoGJaAUWyOfgG7DHa2HuKeR9KPD2kS
|
|
76
|
+
6EygoQtQlXgSvdgZNALtIEfStmnrblTyP9Bh4JU9UzKnE6Tu5h7CjyuzkE0wgIXB
|
|
77
|
+
RxgfbURfdDWs22ujLBbWPGfdY+KdNrnmSqxYahKtq6B+99+xuI0GMzX3/rLpOdF0
|
|
78
|
+
AGwfa1xNe8/B/Nt+e2FXIhT2xOuH8K3sDn3/FKwy1qIsK+4g5iL6Q0xj07ujkiSI
|
|
79
|
+
wZ0X2gtg3L2DW8Y6B8gBdSmDGH+vNX5/CLNn9Ly1VUoMGgs4fUmd3FFZTxiIbpim
|
|
80
|
+
rQgQBHP4l1NsSqDrEyplKG83ejloLaVG+hUY1MGv5tF7B1Ta7j8bwoMTmyVCtCrC
|
|
81
|
+
P+a7ShdrBUsD2TDhilZhwZcWl0a+FfzR47+faJs/9pSTkyFFp3D4xgKAdME1lvcI
|
|
82
|
+
wV5BUmp5CEmbeB4r/+BlFttRZBLBXT1sq80YyQIVLumq0Livao9mOg==
|
|
83
|
+
-----END RSA PRIVATE KEY-----"""
|
|
84
|
+
|
|
85
|
+
# Create an instance of AltaStataFunctions
|
|
86
|
+
altastata_functions = AltaStataFunctions.from_credentials(user_properties, private_key)
|
|
87
|
+
altastata_functions.set_password("123")
|
|
88
|
+
|
|
89
|
+
# register the altastata functions
|
|
90
|
+
register_altastata_functions(altastata_functions, "bob123_rsa")
|
|
91
|
+
```
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
from .altastata_functions import AltaStataFunctions
|
|
2
|
+
from .altastata_pytorch_dataset import AltaStataPyTorchDataset
|
|
3
|
+
|
|
4
|
+
# Lazy import for TensorFlow
|
|
5
|
+
def _import_tensorflow_dataset():
|
|
6
|
+
from .altastata_tensorflow_dataset import AltaStataTensorFlowDataset
|
|
7
|
+
return AltaStataTensorFlowDataset
|
|
8
|
+
|
|
9
|
+
# Create a lazy loader for TensorFlow dataset
|
|
10
|
+
class _LazyTensorFlowDataset:
|
|
11
|
+
def __init__(self):
|
|
12
|
+
self._dataset_class = None
|
|
13
|
+
|
|
14
|
+
def __call__(self, *args, **kwargs):
|
|
15
|
+
if self._dataset_class is None:
|
|
16
|
+
self._dataset_class = _import_tensorflow_dataset()
|
|
17
|
+
return self._dataset_class(*args, **kwargs)
|
|
18
|
+
|
|
19
|
+
AltaStataTensorFlowDataset = _LazyTensorFlowDataset()
|
|
20
|
+
|
|
21
|
+
__all__ = [
|
|
22
|
+
'AltaStataFunctions',
|
|
23
|
+
'AltaStataPyTorchDataset',
|
|
24
|
+
'AltaStataTensorFlowDataset'
|
|
25
|
+
]
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
from .base_gateway import BaseGateway
|
|
2
|
+
|
|
3
|
+
from typing import List
|
|
4
|
+
from py4j.java_gateway import JavaGateway, JavaObject, GatewayParameters, CallbackServerParameters, java_import
|
|
5
|
+
from py4j.java_collections import JavaList
|
|
6
|
+
|
|
7
|
+
import io
|
|
8
|
+
import os
|
|
9
|
+
import mmap
|
|
10
|
+
|
|
11
|
+
class AltaStataFunctions(BaseGateway):
|
|
12
|
+
def __init__(self, port=25333):
|
|
13
|
+
"""
|
|
14
|
+
Base initialization. This should not be called directly.
|
|
15
|
+
Use from_account_dir or from_credentials instead.
|
|
16
|
+
"""
|
|
17
|
+
super().__init__(port)
|
|
18
|
+
|
|
19
|
+
@classmethod
|
|
20
|
+
def from_account_dir(cls, account_dir_path, port=25333):
|
|
21
|
+
"""
|
|
22
|
+
Create an instance using account directory path.
|
|
23
|
+
|
|
24
|
+
Args:
|
|
25
|
+
account_dir_path (str): Path to the account directory
|
|
26
|
+
port (int, optional): Port number for the gateway. Defaults to 25333.
|
|
27
|
+
|
|
28
|
+
Returns:
|
|
29
|
+
AltaStataFunctions: New instance initialized with account directory
|
|
30
|
+
"""
|
|
31
|
+
instance = cls(port)
|
|
32
|
+
instance.altastata_file_system = instance.gateway.jvm.com.altastata.api.AltaStataFileSystem(account_dir_path)
|
|
33
|
+
return instance
|
|
34
|
+
|
|
35
|
+
@classmethod
|
|
36
|
+
def from_credentials(cls, user_properties, private_key_encrypted, port=25333):
|
|
37
|
+
"""
|
|
38
|
+
Create an instance using user properties and private key.
|
|
39
|
+
|
|
40
|
+
Args:
|
|
41
|
+
user_properties (str): User properties string
|
|
42
|
+
private_key_encrypted (str): Encrypted private key
|
|
43
|
+
port (int, optional): Port number for the gateway. Defaults to 25333.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
AltaStataFunctions: New instance initialized with credentials
|
|
47
|
+
"""
|
|
48
|
+
instance = cls(port)
|
|
49
|
+
instance.altastata_file_system = instance.gateway.jvm.com.altastata.api.AltaStataFileSystem(user_properties, private_key_encrypted)
|
|
50
|
+
return instance
|
|
51
|
+
|
|
52
|
+
def convert_java_list_to_python(self, java_list):
|
|
53
|
+
# Ensure the input is a JavaList
|
|
54
|
+
if not isinstance(java_list, JavaList):
|
|
55
|
+
raise TypeError("Expected a JavaList but got something else.")
|
|
56
|
+
|
|
57
|
+
# Convert JavaList to Python list
|
|
58
|
+
python_list = [item for item in java_list]
|
|
59
|
+
|
|
60
|
+
return python_list
|
|
61
|
+
|
|
62
|
+
def python_list_to_java_arraylist(self, python_list: list) -> 'JavaObject':
|
|
63
|
+
# Create a Java ArrayList instance
|
|
64
|
+
java_arraylist = self.gateway.jvm.java.util.ArrayList()
|
|
65
|
+
|
|
66
|
+
# Add each element from the Python list to the Java ArrayList
|
|
67
|
+
for item in python_list:
|
|
68
|
+
java_arraylist.add(item)
|
|
69
|
+
|
|
70
|
+
return java_arraylist
|
|
71
|
+
|
|
72
|
+
def python_list_to_java_array(self, python_list: list) -> JavaObject:
|
|
73
|
+
string_class = self.gateway.jvm.java.lang.String
|
|
74
|
+
|
|
75
|
+
java_array = self.gateway.new_array(string_class, len(python_list))
|
|
76
|
+
for i in range(len(python_list)):
|
|
77
|
+
java_array[i] = python_list[i]
|
|
78
|
+
|
|
79
|
+
return java_array
|
|
80
|
+
|
|
81
|
+
def set_password(self, account_password: str):
|
|
82
|
+
result = self.altastata_file_system.setPassword(account_password)
|
|
83
|
+
|
|
84
|
+
# Process the result
|
|
85
|
+
return result
|
|
86
|
+
|
|
87
|
+
def create_file(self, cloud_file_path, buffer=None):
|
|
88
|
+
"""
|
|
89
|
+
Create a new file version on cloud and add the buffer (may be empty).
|
|
90
|
+
This operation is fast but does not guarantee streaming order.
|
|
91
|
+
|
|
92
|
+
Args:
|
|
93
|
+
cloud_file_path (str): The file path on the cloud
|
|
94
|
+
buffer (bytes, optional): Initial buffer to store in the file. Defaults to None (empty buffer).
|
|
95
|
+
|
|
96
|
+
Returns:
|
|
97
|
+
CloudFileOperationStatus: Status of the file creation operation
|
|
98
|
+
"""
|
|
99
|
+
if buffer is None:
|
|
100
|
+
buffer = bytes()
|
|
101
|
+
|
|
102
|
+
return self.altastata_file_system.createFile(cloud_file_path, buffer)
|
|
103
|
+
|
|
104
|
+
def append_buffer_to_file(self, cloud_file_path, buffer, snapshot_time=None):
|
|
105
|
+
"""
|
|
106
|
+
Append the buffer as an output stream to the File version.
|
|
107
|
+
|
|
108
|
+
Args:
|
|
109
|
+
cloud_file_path (str): The file path on the cloud
|
|
110
|
+
buffer (bytes): The buffer to append
|
|
111
|
+
snapshot_time (Long, optional): File version creation time. Defaults to None (current time).
|
|
112
|
+
|
|
113
|
+
Raises:
|
|
114
|
+
IOException: If there is an error during the append operation
|
|
115
|
+
"""
|
|
116
|
+
self.altastata_file_system.appendBufferToFile(cloud_file_path, snapshot_time, buffer)
|
|
117
|
+
|
|
118
|
+
def store(self, localFilesOrDirectories: List[str], localFSPrefix: str, cloudPathPrefix: str, waitUntilDone: bool):
|
|
119
|
+
# Call the Java method
|
|
120
|
+
java_list = self.altastata_file_system.store(self.python_list_to_java_arraylist(localFilesOrDirectories), localFSPrefix, cloudPathPrefix, waitUntilDone)
|
|
121
|
+
|
|
122
|
+
# Convert the result to a Python list
|
|
123
|
+
return self.convert_java_list_to_python(java_list)
|
|
124
|
+
|
|
125
|
+
def retrieve_files(self, output_dir, cloud_path_prefix, including_subdirectories, snapshot_time, is_streaming, wait_until_done):
|
|
126
|
+
# Call the Java method
|
|
127
|
+
java_list = self.altastata_file_system.retrieve(output_dir, cloud_path_prefix, including_subdirectories, snapshot_time, is_streaming, wait_until_done)
|
|
128
|
+
|
|
129
|
+
# Convert the Java List to Python List
|
|
130
|
+
return self.convert_java_list_to_python(java_list)
|
|
131
|
+
|
|
132
|
+
def delete_files(self, cloud_path_prefix, including_subdirectories, time_interval_start, time_interval_end):
|
|
133
|
+
# Call the Java method
|
|
134
|
+
java_list = self.altastata_file_system.delete(cloud_path_prefix, including_subdirectories, time_interval_start, time_interval_end)
|
|
135
|
+
|
|
136
|
+
# Convert the Java List to Python List
|
|
137
|
+
return self.convert_java_list_to_python(java_list)
|
|
138
|
+
|
|
139
|
+
def share_files(self, cloud_path_prefix: str, including_subdirectories: bool, time_interval_start: str, time_interval_end: str, users: list) -> list:
|
|
140
|
+
# Call the Java method
|
|
141
|
+
java_list = self.altastata_file_system.share(cloud_path_prefix, including_subdirectories, time_interval_start, time_interval_end, self.python_list_to_java_array(users))
|
|
142
|
+
|
|
143
|
+
# Convert the Java List to Python List
|
|
144
|
+
return self.convert_java_list_to_python(java_list)
|
|
145
|
+
|
|
146
|
+
def list_cloud_files_versions(self, cloudPathPrefix, includingSubdirectories, timeIntervalStart, timeIntervalEnd):
|
|
147
|
+
# Call the Java method and return the iterator
|
|
148
|
+
return self.altastata_file_system.listCloudFilesVersions(cloudPathPrefix, includingSubdirectories, timeIntervalStart, timeIntervalEnd)
|
|
149
|
+
|
|
150
|
+
def get_buffer(self, cloudFilePath, snapshotTime, startPosition, howManyChunksInParallel, size):
|
|
151
|
+
"""
|
|
152
|
+
Calls the Java method to get a byte array buffer and returns it as a Python bytes object.
|
|
153
|
+
"""
|
|
154
|
+
java_byte_array = self.altastata_file_system.getBuffer(cloudFilePath, snapshotTime, startPosition, howManyChunksInParallel, size)
|
|
155
|
+
|
|
156
|
+
# Convert Java byte array to Python bytes
|
|
157
|
+
python_bytes = bytes(java_byte_array)
|
|
158
|
+
return python_bytes
|
|
159
|
+
|
|
160
|
+
def get_buffer_via_mapped_file(self, mappedFilePath, cloudFilePath, snapshotTime, startPosition, howManyChunksInParallel, size):
|
|
161
|
+
"""
|
|
162
|
+
Calls the Java method to fill a mapped file.
|
|
163
|
+
"""
|
|
164
|
+
self.altastata_file_system.fillMappedFile(mappedFilePath, cloudFilePath, snapshotTime, startPosition, howManyChunksInParallel,
|
|
165
|
+
size)
|
|
166
|
+
|
|
167
|
+
# Open the file and create a memory map
|
|
168
|
+
with open(mappedFilePath, "r+b") as f:
|
|
169
|
+
mmapped_file = mmap.mmap(f.fileno(), 0)
|
|
170
|
+
|
|
171
|
+
# Access data in Python
|
|
172
|
+
contents = mmapped_file[:]
|
|
173
|
+
|
|
174
|
+
# Close the memory map
|
|
175
|
+
mmapped_file.close()
|
|
176
|
+
|
|
177
|
+
os.remove(mappedFilePath)
|
|
178
|
+
|
|
179
|
+
return contents
|
|
180
|
+
|
|
181
|
+
print("Temporary mapped file does not exist.")
|
|
182
|
+
|
|
183
|
+
def get_java_input_stream(self, cloud_file_path, snapshot_time, start_position, how_many_chunks_in_parallel):
|
|
184
|
+
|
|
185
|
+
# Call the Java method to get the InputStream
|
|
186
|
+
java_input_stream = self.altastata_file_system.getFileInputStream(cloud_file_path, snapshot_time, start_position, how_many_chunks_in_parallel)
|
|
187
|
+
|
|
188
|
+
# Return the reference to Java InputStream, that should be processed by
|
|
189
|
+
return java_input_stream
|
|
190
|
+
|
|
191
|
+
def get_buffer_from_input_stream(self, java_input_stream, buffer_size):
|
|
192
|
+
|
|
193
|
+
return self.altastata_file_system.readBufferFromInputStream(java_input_stream, buffer_size)
|
|
194
|
+
|
|
195
|
+
def get_file_attribute(self, cloud_file_path, snapshot_time, name):
|
|
196
|
+
return self.altastata_file_system.getFileAttribute(cloud_file_path, snapshot_time, name)
|
|
197
|
+
|
|
198
|
+
|
|
199
|
+
|