ppathlib 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. ppathlib-0.1.0/LICENSE +21 -0
  2. ppathlib-0.1.0/PKG-INFO +160 -0
  3. ppathlib-0.1.0/README.md +128 -0
  4. ppathlib-0.1.0/ppathlib/__init__.py +64 -0
  5. ppathlib-0.1.0/ppathlib/anypath.py +92 -0
  6. ppathlib-0.1.0/ppathlib/azure/__init__.py +7 -0
  7. ppathlib-0.1.0/ppathlib/azure/azblobclient.py +513 -0
  8. ppathlib-0.1.0/ppathlib/azure/azblobpath.py +113 -0
  9. ppathlib-0.1.0/ppathlib/client.py +187 -0
  10. ppathlib-0.1.0/ppathlib/cloudpath.py +1721 -0
  11. ppathlib-0.1.0/ppathlib/cloudpath_info.py +31 -0
  12. ppathlib-0.1.0/ppathlib/enums.py +44 -0
  13. ppathlib-0.1.0/ppathlib/exceptions.py +87 -0
  14. ppathlib-0.1.0/ppathlib/gs/__init__.py +7 -0
  15. ppathlib-0.1.0/ppathlib/gs/gsclient.py +314 -0
  16. ppathlib-0.1.0/ppathlib/gs/gspath.py +104 -0
  17. ppathlib-0.1.0/ppathlib/http/__init__.py +9 -0
  18. ppathlib-0.1.0/ppathlib/http/httpclient.py +215 -0
  19. ppathlib-0.1.0/ppathlib/http/httppath.py +166 -0
  20. ppathlib-0.1.0/ppathlib/legacy/glob.py +205 -0
  21. ppathlib-0.1.0/ppathlib/local/__init__.py +34 -0
  22. ppathlib-0.1.0/ppathlib/local/implementations/__init__.py +15 -0
  23. ppathlib-0.1.0/ppathlib/local/implementations/azure.py +84 -0
  24. ppathlib-0.1.0/ppathlib/local/implementations/gs.py +67 -0
  25. ppathlib-0.1.0/ppathlib/local/implementations/s3.py +63 -0
  26. ppathlib-0.1.0/ppathlib/local/localclient.py +219 -0
  27. ppathlib-0.1.0/ppathlib/local/localpath.py +32 -0
  28. ppathlib-0.1.0/ppathlib/patches.py +388 -0
  29. ppathlib-0.1.0/ppathlib/ppath.py +339 -0
  30. ppathlib-0.1.0/ppathlib/py.typed +0 -0
  31. ppathlib-0.1.0/ppathlib/s3/__init__.py +7 -0
  32. ppathlib-0.1.0/ppathlib/s3/s3client.py +387 -0
  33. ppathlib-0.1.0/ppathlib/s3/s3path.py +93 -0
  34. ppathlib-0.1.0/ppathlib/url_utils.py +31 -0
  35. ppathlib-0.1.0/pyproject.toml +111 -0
ppathlib-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 husgbb
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,160 @@
1
+ Metadata-Version: 2.4
2
+ Name: ppathlib
3
+ Version: 0.1.0
4
+ Summary: Pathlib-style classes for local files and named remote storage.
5
+ Keywords: pathlib,remote storage,s3,azure blob storage,google cloud storage
6
+ Author: hus
7
+ Requires-Python: >=3.9
8
+ Description-Content-Type: text/markdown
9
+ Classifier: Intended Audience :: Developers
10
+ Classifier: License :: OSI Approved :: MIT License
11
+ Classifier: Operating System :: OS Independent
12
+ Classifier: Programming Language :: Python :: 3
13
+ Classifier: Programming Language :: Python :: 3.9
14
+ Classifier: Programming Language :: Python :: 3.10
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ License-File: LICENSE
19
+ Requires-Dist: typing-extensions>4 ; python_version < '3.11'
20
+ Requires-Dist: ppathlib[azure] ; extra == "all"
21
+ Requires-Dist: ppathlib[gs] ; extra == "all"
22
+ Requires-Dist: ppathlib[s3] ; extra == "all"
23
+ Requires-Dist: azure-storage-blob>=12 ; extra == "azure"
24
+ Requires-Dist: azure-storage-file-datalake>=12 ; extra == "azure"
25
+ Requires-Dist: google-cloud-storage ; extra == "gs"
26
+ Requires-Dist: boto3>=1.34.0 ; extra == "s3"
27
+ Provides-Extra: all
28
+ Provides-Extra: azure
29
+ Provides-Extra: gs
30
+ Provides-Extra: s3
31
+
32
+ # ppathlib
33
+
34
+ `ppathlib` is a path interface for local files and named remote storage.
35
+
36
+ It behaves like `pathlib.Path` when no `profile` is provided, and it switches to remote mode when a `profile` is given. A profile maps to environment variables that define a backend such as S3, GCS, or Azure Blob Storage, plus an optional default `ROOT`. SFTP and WebDAV support are planned `(구현 예정)`.
37
+
38
+ The main examples in this README use S3 because that is the most familiar starting point for most users. The same model also works for other supported remotes.
39
+
40
+ ## Why ppathlib
41
+
42
+ - Use one path API for both local and remote storage
43
+ - Keep remote access explicit with named profiles
44
+ - Avoid process-global provider credential variables
45
+ - Support relative paths against a configured remote `ROOT`
46
+ - Reuse the same profile across pandas and pyarrow workflows
47
+
48
+ ## Installation
49
+
50
+ ```bash
51
+ pip install ppathlib
52
+ ```
53
+
54
+ For parquet workflows you will typically also want:
55
+
56
+ ```bash
57
+ pip install pandas pyarrow
58
+ ```
59
+
60
+ ## Quick Start
61
+
62
+ ### Local Mode
63
+
64
+ If `profile` is omitted, `PPath(...)` behaves like `pathlib.Path(...)`.
65
+
66
+ ```python
67
+ from ppathlib import PPath
68
+
69
+ path = PPath("data/local-report.parquet")
70
+ ```
71
+
72
+ ### Remote Mode
73
+
74
+ Define a named remote with a stable profile name:
75
+
76
+ ```bash
77
+ export MY_RESEARCH_BUCKET_STORAGE_TYPE=s3
78
+ export MY_RESEARCH_BUCKET_ENDPOINT_URL=https://s3.ap-northeast-2.amazonaws.com
79
+ export MY_RESEARCH_BUCKET_ACCESS_KEY_ID=xxx
80
+ export MY_RESEARCH_BUCKET_SECRET_ACCESS_KEY=yyy
81
+ export MY_RESEARCH_BUCKET_REGION=ap-northeast-2
82
+ export MY_RESEARCH_BUCKET_ROOT=s3://analytics-bucket
83
+ ```
84
+
85
+ Use a relative path against that remote root:
86
+
87
+ ```python
88
+ from ppathlib import PPath
89
+
90
+ path = PPath("daily/report.parquet", profile="MY_RESEARCH_BUCKET")
91
+
92
+ with path.open("rb") as f:
93
+ payload = f.read()
94
+ ```
95
+
96
+ You can also pass a full remote URI:
97
+
98
+ ```python
99
+ from ppathlib import PPath
100
+
101
+ path = PPath(
102
+ "s3://analytics-bucket/daily/report.parquet",
103
+ profile="MY_RESEARCH_BUCKET",
104
+ )
105
+ ```
106
+
107
+ ## Core Behavior
108
+
109
+ ```python
110
+ from ppathlib import PPath
111
+ ```
112
+
113
+ - `PPath("data/file.parquet")`
114
+ - local mode
115
+ - `PPath("daily/report.parquet", profile="MY_RESEARCH_BUCKET")`
116
+ - remote mode with `<PROFILE>_ROOT`
117
+ - `PPath("s3://bucket/file.parquet", profile="MY_RESEARCH_BUCKET")`
118
+ - remote mode with an explicit URI
119
+ - `PPath("s3://bucket/file.parquet")`
120
+ - error, because remote URIs require a profile
121
+
122
+ ## Example Usage
123
+
124
+ ```python
125
+ import pandas as pd
126
+ from ppathlib import PPath
127
+
128
+ src = PPath("in.parquet", profile="MY_RESEARCH_BUCKET")
129
+ dst = PPath("out.parquet", profile="MY_RESEARCH_BUCKET")
130
+
131
+ df = pd.read_parquet(src)
132
+ df.to_parquet(dst)
133
+ ```
134
+
135
+ ## API
136
+
137
+ ### `PPath(path, profile=None)`
138
+
139
+ Creates either:
140
+
141
+ - a local path when `profile` is omitted
142
+ - a remote path when `profile` is provided
143
+
144
+ ### `get_client(profile)`
145
+
146
+ Returns the cached client for a named remote.
147
+
148
+ ### `clear_client_cache()`
149
+
150
+ Clears the internal client registry.
151
+
152
+ ## Documentation
153
+
154
+ - [Configuration](docs/configuration.md)
155
+ - [Backends and Examples](docs/backends.md)
156
+
157
+ ## License
158
+
159
+ MIT
160
+
@@ -0,0 +1,128 @@
1
+ # ppathlib
2
+
3
+ `ppathlib` is a path interface for local files and named remote storage.
4
+
5
+ It behaves like `pathlib.Path` when no `profile` is provided, and it switches to remote mode when a `profile` is given. A profile maps to environment variables that define a backend such as S3, GCS, or Azure Blob Storage, plus an optional default `ROOT`. SFTP and WebDAV support are planned `(구현 예정)`.
6
+
7
+ The main examples in this README use S3 because that is the most familiar starting point for most users. The same model also works for other supported remotes.
8
+
9
+ ## Why ppathlib
10
+
11
+ - Use one path API for both local and remote storage
12
+ - Keep remote access explicit with named profiles
13
+ - Avoid process-global provider credential variables
14
+ - Support relative paths against a configured remote `ROOT`
15
+ - Reuse the same profile across pandas and pyarrow workflows
16
+
17
+ ## Installation
18
+
19
+ ```bash
20
+ pip install ppathlib
21
+ ```
22
+
23
+ For parquet workflows you will typically also want:
24
+
25
+ ```bash
26
+ pip install pandas pyarrow
27
+ ```
28
+
29
+ ## Quick Start
30
+
31
+ ### Local Mode
32
+
33
+ If `profile` is omitted, `PPath(...)` behaves like `pathlib.Path(...)`.
34
+
35
+ ```python
36
+ from ppathlib import PPath
37
+
38
+ path = PPath("data/local-report.parquet")
39
+ ```
40
+
41
+ ### Remote Mode
42
+
43
+ Define a named remote with a stable profile name:
44
+
45
+ ```bash
46
+ export MY_RESEARCH_BUCKET_STORAGE_TYPE=s3
47
+ export MY_RESEARCH_BUCKET_ENDPOINT_URL=https://s3.ap-northeast-2.amazonaws.com
48
+ export MY_RESEARCH_BUCKET_ACCESS_KEY_ID=xxx
49
+ export MY_RESEARCH_BUCKET_SECRET_ACCESS_KEY=yyy
50
+ export MY_RESEARCH_BUCKET_REGION=ap-northeast-2
51
+ export MY_RESEARCH_BUCKET_ROOT=s3://analytics-bucket
52
+ ```
53
+
54
+ Use a relative path against that remote root:
55
+
56
+ ```python
57
+ from ppathlib import PPath
58
+
59
+ path = PPath("daily/report.parquet", profile="MY_RESEARCH_BUCKET")
60
+
61
+ with path.open("rb") as f:
62
+ payload = f.read()
63
+ ```
64
+
65
+ You can also pass a full remote URI:
66
+
67
+ ```python
68
+ from ppathlib import PPath
69
+
70
+ path = PPath(
71
+ "s3://analytics-bucket/daily/report.parquet",
72
+ profile="MY_RESEARCH_BUCKET",
73
+ )
74
+ ```
75
+
76
+ ## Core Behavior
77
+
78
+ ```python
79
+ from ppathlib import PPath
80
+ ```
81
+
82
+ - `PPath("data/file.parquet")`
83
+ - local mode
84
+ - `PPath("daily/report.parquet", profile="MY_RESEARCH_BUCKET")`
85
+ - remote mode with `<PROFILE>_ROOT`
86
+ - `PPath("s3://bucket/file.parquet", profile="MY_RESEARCH_BUCKET")`
87
+ - remote mode with an explicit URI
88
+ - `PPath("s3://bucket/file.parquet")`
89
+ - error, because remote URIs require a profile
90
+
91
+ ## Example Usage
92
+
93
+ ```python
94
+ import pandas as pd
95
+ from ppathlib import PPath
96
+
97
+ src = PPath("in.parquet", profile="MY_RESEARCH_BUCKET")
98
+ dst = PPath("out.parquet", profile="MY_RESEARCH_BUCKET")
99
+
100
+ df = pd.read_parquet(src)
101
+ df.to_parquet(dst)
102
+ ```
103
+
104
+ ## API
105
+
106
+ ### `PPath(path, profile=None)`
107
+
108
+ Creates either:
109
+
110
+ - a local path when `profile` is omitted
111
+ - a remote path when `profile` is provided
112
+
113
+ ### `get_client(profile)`
114
+
115
+ Returns the cached client for a named remote.
116
+
117
+ ### `clear_client_cache()`
118
+
119
+ Clears the internal client registry.
120
+
121
+ ## Documentation
122
+
123
+ - [Configuration](docs/configuration.md)
124
+ - [Backends and Examples](docs/backends.md)
125
+
126
+ ## License
127
+
128
+ MIT
@@ -0,0 +1,64 @@
1
+ import os
2
+ import sys
3
+
4
+ from .anypath import AnyPath
5
+ from .azure.azblobclient import AzureBlobClient
6
+ from .azure.azblobpath import AzureBlobPath
7
+ from .cloudpath import CloudPath, implementation_registry
8
+ from .patches import patch_open, patch_os_functions, patch_glob, patch_all_builtins
9
+ from .gs.gsclient import GSClient
10
+ from .gs.gspath import GSPath
11
+ from .http.httpclient import HttpClient, HttpsClient
12
+ from .http.httppath import HttpPath, HttpsPath
13
+ from .ppath import clear_client_cache, get_client, PPath
14
+ from .s3.s3client import S3Client
15
+ from .s3.s3path import S3Path
16
+
17
+
18
+ if sys.version_info[:2] >= (3, 8):
19
+ import importlib.metadata as importlib_metadata
20
+ else:
21
+ import importlib_metadata
22
+
23
+
24
+ try:
25
+ __version__ = importlib_metadata.version(__name__.split(".", 1)[0])
26
+ except importlib_metadata.PackageNotFoundError:
27
+ __version__ = "0.1.0"
28
+
29
+
30
+ __all__ = [
31
+ "AnyPath",
32
+ "AzureBlobClient",
33
+ "AzureBlobPath",
34
+ "CloudPath",
35
+ "implementation_registry",
36
+ "GSClient",
37
+ "GSPath",
38
+ "HttpClient",
39
+ "HttpsClient",
40
+ "HttpPath",
41
+ "HttpsPath",
42
+ "PPath",
43
+ "get_client",
44
+ "clear_client_cache",
45
+ "patch_open",
46
+ "patch_glob",
47
+ "patch_os_functions",
48
+ "patch_all_builtins",
49
+ "S3Client",
50
+ "S3Path",
51
+ ]
52
+
53
+
54
+ if bool(os.environ.get("PPATHLIB_PATCH_OPEN", "")):
55
+ patch_open()
56
+
57
+ if bool(os.environ.get("PPATHLIB_PATCH_OS", "")):
58
+ patch_os_functions()
59
+
60
+ if bool(os.environ.get("PPATHLIB_PATCH_GLOB", "")):
61
+ patch_glob()
62
+
63
+ if bool(os.environ.get("PPATHLIB_PATCH_ALL", "")):
64
+ patch_all_builtins()
@@ -0,0 +1,92 @@
1
+ import os
2
+ from abc import ABC
3
+ from pathlib import Path
4
+ from typing import Any, Union
5
+
6
+ from .cloudpath import InvalidPrefixError, CloudPath
7
+ from .exceptions import AnyPathTypeError
8
+ from .url_utils import path_from_fileurl
9
+
10
+
11
+ class AnyPath(ABC):
12
+ """Polymorphic virtual superclass for CloudPath and pathlib.Path. Constructing an instance will
13
+ automatically dispatch to CloudPath or Path based on the input. It also supports both
14
+ isinstance and issubclass checks.
15
+
16
+ This class also integrates with Pydantic. When used as a type declaration for a Pydantic
17
+ BaseModel, the Pydantic validation process will appropriately run inputs through this class'
18
+ constructor and dispatch to CloudPath or Path.
19
+ """
20
+
21
+ def __new__(cls, *args, **kwargs) -> Union[CloudPath, Path]: # type: ignore
22
+ try:
23
+ return CloudPath(*args, **kwargs) # type: ignore
24
+ except InvalidPrefixError as cloudpath_exception:
25
+ try:
26
+ if isinstance(args[0], str) and args[0].lower().startswith("file:"):
27
+ path = path_from_fileurl(args[0], **kwargs)
28
+ for part in args[1:]:
29
+ path /= part
30
+ return path
31
+
32
+ return Path(*args, **kwargs)
33
+ except TypeError as path_exception:
34
+ raise AnyPathTypeError(
35
+ "Invalid input for both CloudPath and Path. "
36
+ f"CloudPath exception: {repr(cloudpath_exception)} "
37
+ f"Path exception: {repr(path_exception)}"
38
+ )
39
+
40
+ # =========== pydantic integration special methods ===============
41
+ @classmethod
42
+ def __get_pydantic_core_schema__(cls, _source_type: Any, _handler):
43
+ """Pydantic special method. See
44
+ https://docs.pydantic.dev/2.0/usage/types/custom/"""
45
+ try:
46
+ from pydantic_core import core_schema
47
+
48
+ return core_schema.no_info_after_validator_function(
49
+ cls.validate,
50
+ core_schema.any_schema(),
51
+ )
52
+ except ImportError:
53
+ return None
54
+
55
+ @classmethod
56
+ def validate(cls, v: str) -> Union[CloudPath, Path]:
57
+ """Pydantic special method. See
58
+ https://docs.pydantic.dev/2.0/usage/types/custom/"""
59
+ try:
60
+ return cls.__new__(cls, v)
61
+ except AnyPathTypeError as e:
62
+ # type errors no longer converted to validation errors
63
+ # https://docs.pydantic.dev/2.0/migration/#typeerror-is-no-longer-converted-to-validationerror-in-validators
64
+ raise ValueError(e)
65
+
66
+ @classmethod
67
+ def __get_validators__(cls):
68
+ """Pydantic special method. See
69
+ https://pydantic-docs.helpmanual.io/usage/types/#custom-data-types"""
70
+ yield cls._validate
71
+
72
+ @classmethod
73
+ def _validate(cls, value) -> Union[CloudPath, Path]:
74
+ """Used as a Pydantic validator. See
75
+ https://pydantic-docs.helpmanual.io/usage/types/#custom-data-types"""
76
+ # Note __new__ is static method and not a class method
77
+ return cls.__new__(cls, value)
78
+
79
+
80
+ AnyPath.register(CloudPath) # type: ignore
81
+ AnyPath.register(Path)
82
+
83
+
84
+ def to_anypath(s: Union[str, os.PathLike]) -> Union[CloudPath, Path]:
85
+ """Convenience method to convert a str or os.PathLike to the
86
+ proper Path or CloudPath object using AnyPath.
87
+ """
88
+ # shortcut pathlike items that are already valid Path/CloudPath
89
+ if isinstance(s, (CloudPath, Path)):
90
+ return s
91
+
92
+ return AnyPath(s) # type: ignore
@@ -0,0 +1,7 @@
1
+ from .azblobclient import AzureBlobClient
2
+ from .azblobpath import AzureBlobPath
3
+
4
+ __all__ = [
5
+ "AzureBlobClient",
6
+ "AzureBlobPath",
7
+ ]