permafrost-framework 0.6.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- permafrost_framework-0.6.0/LICENSE +121 -0
- permafrost_framework-0.6.0/PKG-INFO +97 -0
- permafrost_framework-0.6.0/README.md +49 -0
- permafrost_framework-0.6.0/pyproject.toml +45 -0
- permafrost_framework-0.6.0/setup.cfg +4 -0
- permafrost_framework-0.6.0/src/permafrost/__init__.py +114 -0
- permafrost_framework-0.6.0/src/permafrost/__main__.py +64 -0
- permafrost_framework-0.6.0/src/permafrost/catalog.py +441 -0
- permafrost_framework-0.6.0/src/permafrost/chunk_mode.py +225 -0
- permafrost_framework-0.6.0/src/permafrost/cli.py +419 -0
- permafrost_framework-0.6.0/src/permafrost/cluster.py +621 -0
- permafrost_framework-0.6.0/src/permafrost/codec.py +535 -0
- permafrost_framework-0.6.0/src/permafrost/schema_detector.py +250 -0
- permafrost_framework-0.6.0/src/permafrost/spark.py +464 -0
- permafrost_framework-0.6.0/src/permafrost/storage.py +620 -0
- permafrost_framework-0.6.0/src/permafrost_framework.egg-info/PKG-INFO +97 -0
- permafrost_framework-0.6.0/src/permafrost_framework.egg-info/SOURCES.txt +32 -0
- permafrost_framework-0.6.0/src/permafrost_framework.egg-info/dependency_links.txt +1 -0
- permafrost_framework-0.6.0/src/permafrost_framework.egg-info/entry_points.txt +2 -0
- permafrost_framework-0.6.0/src/permafrost_framework.egg-info/requires.txt +34 -0
- permafrost_framework-0.6.0/src/permafrost_framework.egg-info/top_level.txt +1 -0
- permafrost_framework-0.6.0/tests/test_catalog.py +180 -0
- permafrost_framework-0.6.0/tests/test_cli_cobertura.py +237 -0
- permafrost_framework-0.6.0/tests/test_cluster.py +158 -0
- permafrost_framework-0.6.0/tests/test_cluster_fault_tolerance.py +364 -0
- permafrost_framework-0.6.0/tests/test_comprehensive.py +1048 -0
- permafrost_framework-0.6.0/tests/test_concorrencia.py +415 -0
- permafrost_framework-0.6.0/tests/test_fidelidade_total.py +506 -0
- permafrost_framework-0.6.0/tests/test_formato_binario_spec.py +376 -0
- permafrost_framework-0.6.0/tests/test_freeze_thaw.py +298 -0
- permafrost_framework-0.6.0/tests/test_performance_regression.py +344 -0
- permafrost_framework-0.6.0/tests/test_predictor_edge_cases.py +487 -0
- permafrost_framework-0.6.0/tests/test_schema_detector_stress.py +355 -0
- permafrost_framework-0.6.0/tests/test_sparse_index.py +174 -0
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
Apache License
|
|
2
|
+
Version 2.0, January 2004
|
|
3
|
+
http://www.apache.org/licenses/
|
|
4
|
+
|
|
5
|
+
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
|
|
6
|
+
|
|
7
|
+
1. Definitions.
|
|
8
|
+
|
|
9
|
+
"License" shall mean the terms and conditions for use, reproduction,
|
|
10
|
+
and distribution as defined by Sections 1 through 9 of this document.
|
|
11
|
+
|
|
12
|
+
"Licensor" shall mean the copyright owner or entity authorized by
|
|
13
|
+
the copyright owner that is granting the License.
|
|
14
|
+
|
|
15
|
+
"Legal Entity" shall mean the union of the acting entity and all
|
|
16
|
+
other entities that control, are controlled by, or are under common
|
|
17
|
+
control with that entity. For the purposes of this definition,
|
|
18
|
+
"control" means (i) the power, direct or indirect, to cause the
|
|
19
|
+
direction or management of such entity, whether by contract or
|
|
20
|
+
otherwise, or (ii) ownership of fifty percent (50%) or more of the
|
|
21
|
+
outstanding shares, or (iii) beneficial ownership of such entity.
|
|
22
|
+
|
|
23
|
+
"You" (or "Your") shall mean an individual or Legal Entity
|
|
24
|
+
exercising permissions granted by this License.
|
|
25
|
+
|
|
26
|
+
"Source" form shall mean the preferred form for making modifications,
|
|
27
|
+
including but not limited to software source code, documentation
|
|
28
|
+
source, and configuration files.
|
|
29
|
+
|
|
30
|
+
"Object" form shall mean any form resulting from mechanical
|
|
31
|
+
transformation or translation of a Source form, including but
|
|
32
|
+
not limited to compiled object code, generated documentation,
|
|
33
|
+
and conversions to other media types.
|
|
34
|
+
|
|
35
|
+
"Work" shall mean the work of authorship made available under
|
|
36
|
+
the License, as indicated by a copyright notice that is included in
|
|
37
|
+
or attached to the work (an example is provided in the Appendix below).
|
|
38
|
+
|
|
39
|
+
"Derivative Works" shall mean any work, whether in Source or Object
|
|
40
|
+
form, that is based on (or derived from) the Work and for which the
|
|
41
|
+
editorial revisions, annotations, elaborations, or other transformations
|
|
42
|
+
represent, as a whole, an original work of authorship. For the purposes
|
|
43
|
+
of this License, Derivative Works shall not include works that remain
|
|
44
|
+
separable from, or merely link (or bind by name) to the interfaces of,
|
|
45
|
+
the Work and Derivative Works thereof.
|
|
46
|
+
|
|
47
|
+
"Contribution" shall mean, as submitted to the Licensor for inclusion
|
|
48
|
+
in the Work by the copyright owner or by an individual or Legal Entity
|
|
49
|
+
authorized to submit on behalf of the copyright owner.
|
|
50
|
+
|
|
51
|
+
"Contributor" shall mean Licensor and any Legal Entity on behalf of
|
|
52
|
+
whom a Contribution has been received by the Licensor and included
|
|
53
|
+
within the Work.
|
|
54
|
+
|
|
55
|
+
2. Grant of Copyright License. Subject to the terms and conditions of
|
|
56
|
+
this License, each Contributor hereby grants to You a perpetual,
|
|
57
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
58
|
+
copyright license to reproduce, prepare Derivative Works of,
|
|
59
|
+
publicly display, publicly perform, sublicense, and distribute the
|
|
60
|
+
Work and such Derivative Works in Source or Object form.
|
|
61
|
+
|
|
62
|
+
3. Grant of Patent License. Subject to the terms and conditions of
|
|
63
|
+
this License, each Contributor hereby grants to You a perpetual,
|
|
64
|
+
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
|
|
65
|
+
(except as stated in this section) patent license to make, have made,
|
|
66
|
+
use, offer to sell, sell, import, and otherwise transfer the Work.
|
|
67
|
+
|
|
68
|
+
4. Redistribution. You may reproduce and distribute copies of the
|
|
69
|
+
Work or Derivative Works thereof in any medium, with or without
|
|
70
|
+
modifications, and in Source or Object form, provided that You
|
|
71
|
+
meet the following conditions:
|
|
72
|
+
|
|
73
|
+
(a) You must give any other recipients of the Work or
|
|
74
|
+
Derivative Works a copy of this License; and
|
|
75
|
+
|
|
76
|
+
(b) You must cause any modified files to carry prominent notices
|
|
77
|
+
stating that You changed the files; and
|
|
78
|
+
|
|
79
|
+
(c) You must retain, in the Source form of any Derivative Works
|
|
80
|
+
that You distribute, all copyright, patent, trademark, and
|
|
81
|
+
attribution notices from the Source form of the Work; and
|
|
82
|
+
|
|
83
|
+
(d) If the Work includes a "NOTICE" text file, you must include a
|
|
84
|
+
readable copy of the attribution notices contained within such
|
|
85
|
+
NOTICE file.
|
|
86
|
+
|
|
87
|
+
5. Submission of Contributions. Unless You explicitly state otherwise,
|
|
88
|
+
any Contribution submitted for inclusion in the Work shall be under
|
|
89
|
+
the terms and conditions of this License, without any additional terms.
|
|
90
|
+
|
|
91
|
+
6. Trademarks. This License does not grant permission to use the trade
|
|
92
|
+
names, trademarks, service marks, or product names of the Licensor.
|
|
93
|
+
|
|
94
|
+
7. Disclaimer of Warranty. Unless required by applicable law or agreed
|
|
95
|
+
to in writing, Licensor provides the Work on an "AS IS" BASIS,
|
|
96
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND.
|
|
97
|
+
|
|
98
|
+
8. Limitation of Liability. In no event and under no legal theory shall
|
|
99
|
+
any Contributor be liable for damages arising from this License.
|
|
100
|
+
|
|
101
|
+
9. Accepting Warranty or Additional Liability. While redistributing the
|
|
102
|
+
Work, You may offer acceptance of support, warranty, indemnity, or
|
|
103
|
+
other liability obligations. However, in accepting such obligations,
|
|
104
|
+
You may charge a fee, and You must make it clear that any warranties
|
|
105
|
+
are Your own, and not on behalf of the other Contributors.
|
|
106
|
+
|
|
107
|
+
END OF TERMS AND CONDITIONS
|
|
108
|
+
|
|
109
|
+
Copyright 2026 Permafrost Contributors
|
|
110
|
+
|
|
111
|
+
Licensed under the Apache License, Version 2.0 (the "License");
|
|
112
|
+
you may not use this file except in compliance with the License.
|
|
113
|
+
You may obtain a copy of the License at
|
|
114
|
+
|
|
115
|
+
http://www.apache.org/licenses/LICENSE-2.0
|
|
116
|
+
|
|
117
|
+
Unless required by applicable law or agreed to in writing, software
|
|
118
|
+
distributed under the License is distributed on an "AS IS" BASIS,
|
|
119
|
+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
120
|
+
See the License for the specific language governing permissions and
|
|
121
|
+
limitations under the License.
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: permafrost-framework
|
|
3
|
+
Version: 0.6.0
|
|
4
|
+
Summary: Distributed intelligent compression for long-term data archival
|
|
5
|
+
Author: Permafrost Contributors
|
|
6
|
+
License-Expression: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://github.com/caua-ferreira/permafrost-framework
|
|
8
|
+
Project-URL: Repository, https://github.com/caua-ferreira/permafrost-framework
|
|
9
|
+
Keywords: compression,archival,lzma,cold-storage,permafrost,duckdb,distributed
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
14
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
15
|
+
Classifier: Topic :: System :: Archiving :: Compression
|
|
16
|
+
Requires-Python: >=3.10
|
|
17
|
+
Description-Content-Type: text/markdown
|
|
18
|
+
License-File: LICENSE
|
|
19
|
+
Requires-Dist: numpy>=1.24.0
|
|
20
|
+
Requires-Dist: pandas>=2.0.0
|
|
21
|
+
Requires-Dist: pyarrow>=12.0.0
|
|
22
|
+
Requires-Dist: zstandard>=0.21.0
|
|
23
|
+
Requires-Dist: duckdb>=0.9.0
|
|
24
|
+
Requires-Dist: typer>=0.9.0
|
|
25
|
+
Requires-Dist: rich>=13.0.0
|
|
26
|
+
Requires-Dist: fastapi>=0.100.0
|
|
27
|
+
Requires-Dist: uvicorn>=0.23.0
|
|
28
|
+
Requires-Dist: httpx>=0.24.0
|
|
29
|
+
Provides-Extra: s3
|
|
30
|
+
Requires-Dist: boto3>=1.26.0; extra == "s3"
|
|
31
|
+
Provides-Extra: gcs
|
|
32
|
+
Requires-Dist: google-cloud-storage>=2.10.0; extra == "gcs"
|
|
33
|
+
Provides-Extra: azure
|
|
34
|
+
Requires-Dist: azure-storage-blob>=12.17.0; extra == "azure"
|
|
35
|
+
Provides-Extra: all-cloud
|
|
36
|
+
Requires-Dist: boto3>=1.26.0; extra == "all-cloud"
|
|
37
|
+
Requires-Dist: google-cloud-storage>=2.10.0; extra == "all-cloud"
|
|
38
|
+
Requires-Dist: azure-storage-blob>=12.17.0; extra == "all-cloud"
|
|
39
|
+
Provides-Extra: spark
|
|
40
|
+
Requires-Dist: pyspark>=4.0; extra == "spark"
|
|
41
|
+
Provides-Extra: dev
|
|
42
|
+
Requires-Dist: pytest>=7.0; extra == "dev"
|
|
43
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
44
|
+
Requires-Dist: build; extra == "dev"
|
|
45
|
+
Requires-Dist: twine; extra == "dev"
|
|
46
|
+
Requires-Dist: pyspark>=4.0; extra == "dev"
|
|
47
|
+
Dynamic: license-file
|
|
48
|
+
|
|
49
|
+
# ❄️ Permafrost Data Framework
|
|
50
|
+
|
|
51
|
+
<div align="center">
|
|
52
|
+
|
|
53
|
+
[](https://pypi.org/project/permafrost-framework/)
|
|
54
|
+
[](https://github.com/caua-ferreira/permafrost-framework/actions)
|
|
55
|
+
[](LICENSE)
|
|
56
|
+
[](https://pypi.org/project/permafrost-framework/)
|
|
57
|
+
[](https://caua-ferreira.github.io/permafrost-framework)
|
|
58
|
+
|
|
59
|
+
**Plataforma distribuída de compressão inteligente para arquivamento digital de longo prazo.**
|
|
60
|
+
|
|
61
|
+
</div>
|
|
62
|
+
|
|
63
|
+
## Instalação
|
|
64
|
+
|
|
65
|
+
```bash
|
|
66
|
+
pip install permafrost-framework
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
## Quick Start
|
|
70
|
+
|
|
71
|
+
```python
|
|
72
|
+
import permafrost as pf
|
|
73
|
+
|
|
74
|
+
metrics = pf.freeze(df, "vendas.permafrost", codec=pf.CODEC_LZMA2, partition_by="ano")
|
|
75
|
+
print(f"Ratio: {metrics['ratio']:.2f}x")
|
|
76
|
+
|
|
77
|
+
df_back = pf.thaw("vendas.permafrost", verify=True)
|
|
78
|
+
df_2023 = pf.thaw("vendas.permafrost", filter={"ano": 2023}) # sparse index
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
## Benchmarks (medidos)
|
|
82
|
+
|
|
83
|
+
| Dado | Original | .permafrost | Ratio |
|
|
84
|
+
|------|----------|-------------|-------|
|
|
85
|
+
| CSV corporativo 80k linhas | 5.85 MB | **0.678 MB** | **8.37x** |
|
|
86
|
+
| JSONL social media 5k posts | 1.44 MB | **0.043 MB** | **33x** |
|
|
87
|
+
| 1 TB no Glacier Deep Archive | $0.99/mês | **$0.12/mês** | **-88%** |
|
|
88
|
+
|
|
89
|
+
91/91 testes passando — ver [EVIDENCE.md](EVIDENCE.md).
|
|
90
|
+
|
|
91
|
+
## Docs
|
|
92
|
+
|
|
93
|
+
[caua-ferreira.github.io/permafrost-framework](https://caua-ferreira.github.io/permafrost-framework)
|
|
94
|
+
|
|
95
|
+
## Licença
|
|
96
|
+
|
|
97
|
+
Apache License 2.0
|
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
# ❄️ Permafrost Data Framework
|
|
2
|
+
|
|
3
|
+
<div align="center">
|
|
4
|
+
|
|
5
|
+
[](https://pypi.org/project/permafrost-framework/)
|
|
6
|
+
[](https://github.com/caua-ferreira/permafrost-framework/actions)
|
|
7
|
+
[](LICENSE)
|
|
8
|
+
[](https://pypi.org/project/permafrost-framework/)
|
|
9
|
+
[](https://caua-ferreira.github.io/permafrost-framework)
|
|
10
|
+
|
|
11
|
+
**Plataforma distribuída de compressão inteligente para arquivamento digital de longo prazo.**
|
|
12
|
+
|
|
13
|
+
</div>
|
|
14
|
+
|
|
15
|
+
## Instalação
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
pip install permafrost-framework
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
## Quick Start
|
|
22
|
+
|
|
23
|
+
```python
|
|
24
|
+
import permafrost as pf
|
|
25
|
+
|
|
26
|
+
metrics = pf.freeze(df, "vendas.permafrost", codec=pf.CODEC_LZMA2, partition_by="ano")
|
|
27
|
+
print(f"Ratio: {metrics['ratio']:.2f}x")
|
|
28
|
+
|
|
29
|
+
df_back = pf.thaw("vendas.permafrost", verify=True)
|
|
30
|
+
df_2023 = pf.thaw("vendas.permafrost", filter={"ano": 2023}) # sparse index
|
|
31
|
+
```
|
|
32
|
+
|
|
33
|
+
## Benchmarks (medidos)
|
|
34
|
+
|
|
35
|
+
| Dado | Original | .permafrost | Ratio |
|
|
36
|
+
|------|----------|-------------|-------|
|
|
37
|
+
| CSV corporativo 80k linhas | 5.85 MB | **0.678 MB** | **8.37x** |
|
|
38
|
+
| JSONL social media 5k posts | 1.44 MB | **0.043 MB** | **33x** |
|
|
39
|
+
| 1 TB no Glacier Deep Archive | $0.99/mês | **$0.12/mês** | **-88%** |
|
|
40
|
+
|
|
41
|
+
91/91 testes passando — ver [EVIDENCE.md](EVIDENCE.md).
|
|
42
|
+
|
|
43
|
+
## Docs
|
|
44
|
+
|
|
45
|
+
[caua-ferreira.github.io/permafrost-framework](https://caua-ferreira.github.io/permafrost-framework)
|
|
46
|
+
|
|
47
|
+
## Licença
|
|
48
|
+
|
|
49
|
+
Apache License 2.0
|
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "permafrost-framework"
|
|
7
|
+
version = "0.6.0"
|
|
8
|
+
description = "Distributed intelligent compression for long-term data archival"
|
|
9
|
+
readme = "README.md"
|
|
10
|
+
license = "Apache-2.0"
|
|
11
|
+
requires-python = ">=3.10"
|
|
12
|
+
authors = [{ name = "Permafrost Contributors" }]
|
|
13
|
+
keywords = ["compression","archival","lzma","cold-storage","permafrost","duckdb","distributed"]
|
|
14
|
+
classifiers = [
|
|
15
|
+
"Development Status :: 3 - Alpha",
|
|
16
|
+
"Intended Audience :: Developers",
|
|
17
|
+
"Programming Language :: Python :: 3.10",
|
|
18
|
+
"Programming Language :: Python :: 3.11",
|
|
19
|
+
"Programming Language :: Python :: 3.12",
|
|
20
|
+
"Topic :: System :: Archiving :: Compression",
|
|
21
|
+
]
|
|
22
|
+
dependencies = [
|
|
23
|
+
"numpy>=1.24.0", "pandas>=2.0.0", "pyarrow>=12.0.0",
|
|
24
|
+
"zstandard>=0.21.0", "duckdb>=0.9.0",
|
|
25
|
+
"typer>=0.9.0", "rich>=13.0.0",
|
|
26
|
+
"fastapi>=0.100.0", "uvicorn>=0.23.0", "httpx>=0.24.0",
|
|
27
|
+
]
|
|
28
|
+
|
|
29
|
+
[project.optional-dependencies]
|
|
30
|
+
s3 = ["boto3>=1.26.0"]
|
|
31
|
+
gcs = ["google-cloud-storage>=2.10.0"]
|
|
32
|
+
azure = ["azure-storage-blob>=12.17.0"]
|
|
33
|
+
all-cloud = ["boto3>=1.26.0","google-cloud-storage>=2.10.0","azure-storage-blob>=12.17.0"]
|
|
34
|
+
spark = ["pyspark>=4.0"]
|
|
35
|
+
dev = ["pytest>=7.0","pytest-cov","build","twine","pyspark>=4.0"]
|
|
36
|
+
|
|
37
|
+
[project.urls]
|
|
38
|
+
Homepage = "https://github.com/caua-ferreira/permafrost-framework"
|
|
39
|
+
Repository = "https://github.com/caua-ferreira/permafrost-framework"
|
|
40
|
+
|
|
41
|
+
[project.scripts]
|
|
42
|
+
permafrost = "permafrost.cli:app"
|
|
43
|
+
|
|
44
|
+
[tool.setuptools.packages.find]
|
|
45
|
+
where = ["src"]
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Permafrost Data Framework
|
|
3
|
+
=========================
|
|
4
|
+
Plataforma distribuída de compressão inteligente para arquivamento digital de longo prazo.
|
|
5
|
+
|
|
6
|
+
Uso rápido:
|
|
7
|
+
from permafrost import freeze, thaw, audit
|
|
8
|
+
from permafrost import PermafrostCatalog, SchemaDetector
|
|
9
|
+
from permafrost import freeze_to, thaw_from # cloud
|
|
10
|
+
from permafrost import PermafrostMaster, PermafrostWorker, PermafrostClient # cluster
|
|
11
|
+
|
|
12
|
+
Formatos suportados:
|
|
13
|
+
freeze(df, "arquivo.permafrost") # DataFrame tabular
|
|
14
|
+
freeze(detector.detect("dados.jsonl")[0], ...) # JSONL / NoSQL
|
|
15
|
+
freeze_file("dados.csv", "saida.permafrost") # streaming, sem carregar tudo na RAM
|
|
16
|
+
freeze_to(df, "s3://bucket/dados.permafrost") # direto para cloud
|
|
17
|
+
|
|
18
|
+
Exemplos:
|
|
19
|
+
>>> import permafrost as pf
|
|
20
|
+
>>> metrics = pf.freeze(df, "vendas.permafrost", codec=pf.CODEC_LZMA2)
|
|
21
|
+
>>> print(f"Ratio: {metrics['ratio']:.2f}x")
|
|
22
|
+
>>> df_back = pf.thaw("vendas.permafrost")
|
|
23
|
+
>>> info = pf.audit("vendas.permafrost") # sem descomprimir
|
|
24
|
+
|
|
25
|
+
Links:
|
|
26
|
+
GitHub: https://github.com/caua-ferreira/permafrost-framework
|
|
27
|
+
Docs: https://github.com/caua-ferreira/permafrost-framework/tree/main/docs
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
__version__ = "0.6.0"
|
|
31
|
+
__author__ = "Permafrost Contributors"
|
|
32
|
+
__license__ = "Apache-2.0"
|
|
33
|
+
|
|
34
|
+
# ── Core codec ────────────────────────────────────────────────────────────────
|
|
35
|
+
from permafrost.codec import (
|
|
36
|
+
freeze,
|
|
37
|
+
thaw,
|
|
38
|
+
audit,
|
|
39
|
+
# Codec IDs
|
|
40
|
+
CODEC_ZSTD,
|
|
41
|
+
CODEC_LZMA2,
|
|
42
|
+
CODEC_ZPAQ,
|
|
43
|
+
# Quantization levels
|
|
44
|
+
QUANT_NONE,
|
|
45
|
+
QUANT_HIGH,
|
|
46
|
+
QUANT_MEDIUM,
|
|
47
|
+
QUANT_LOW,
|
|
48
|
+
# Format constants
|
|
49
|
+
MAGIC,
|
|
50
|
+
EOF_MAGIC,
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
# ── Schema detection (SQL + NoSQL + JSONL) ────────────────────────────────────
|
|
54
|
+
from permafrost.schema_detector import (
|
|
55
|
+
SchemaDetector,
|
|
56
|
+
DataType,
|
|
57
|
+
FieldKind,
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
# ── Chunk mode (streaming — datasets > RAM) ───────────────────────────────────
|
|
61
|
+
from permafrost.chunk_mode import (
|
|
62
|
+
freeze_stream,
|
|
63
|
+
freeze_file,
|
|
64
|
+
thaw_iter,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
# ── Catalog (DuckDB index) ────────────────────────────────────────────────────
|
|
68
|
+
from permafrost.catalog import PermafrostCatalog
|
|
69
|
+
|
|
70
|
+
# ── Cloud storage adapters ────────────────────────────────────────────────────
|
|
71
|
+
from permafrost.storage import (
|
|
72
|
+
LocalAdapter,
|
|
73
|
+
S3Adapter,
|
|
74
|
+
GCSAdapter,
|
|
75
|
+
AzureAdapter,
|
|
76
|
+
storage_from_uri,
|
|
77
|
+
parse_uri,
|
|
78
|
+
freeze_to,
|
|
79
|
+
thaw_from,
|
|
80
|
+
audit_remote,
|
|
81
|
+
)
|
|
82
|
+
|
|
83
|
+
# ── Cluster (distributed processing) ─────────────────────────────────────────
|
|
84
|
+
from permafrost.cluster import (
|
|
85
|
+
PermafrostMaster,
|
|
86
|
+
PermafrostWorker,
|
|
87
|
+
PermafrostClient,
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
__all__ = [
|
|
91
|
+
# Core
|
|
92
|
+
"freeze", "thaw", "audit",
|
|
93
|
+
# Codecs
|
|
94
|
+
"CODEC_ZSTD", "CODEC_LZMA2", "CODEC_ZPAQ",
|
|
95
|
+
# Quant levels
|
|
96
|
+
"QUANT_NONE", "QUANT_HIGH", "QUANT_MEDIUM", "QUANT_LOW",
|
|
97
|
+
# Schema
|
|
98
|
+
"SchemaDetector", "DataType", "FieldKind",
|
|
99
|
+
# Chunk mode
|
|
100
|
+
"freeze_stream", "freeze_file", "thaw_iter",
|
|
101
|
+
# Catalog
|
|
102
|
+
"PermafrostCatalog",
|
|
103
|
+
# Storage
|
|
104
|
+
"LocalAdapter", "S3Adapter", "GCSAdapter", "AzureAdapter",
|
|
105
|
+
"storage_from_uri", "parse_uri", "freeze_to", "thaw_from", "audit_remote",
|
|
106
|
+
# Cluster
|
|
107
|
+
"PermafrostMaster", "PermafrostWorker", "PermafrostClient",
|
|
108
|
+
]
|
|
109
|
+
# ── Spark DataSource API v2 ───────────────────────────────────────────────────
|
|
110
|
+
try:
|
|
111
|
+
from permafrost.spark import PermafrostDataSource, register as spark_register
|
|
112
|
+
__all__ += ["PermafrostDataSource", "spark_register"]
|
|
113
|
+
except ImportError:
|
|
114
|
+
pass # PySpark não instalado — ok
|
|
@@ -0,0 +1,64 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Entrypoint para execução via `python -m permafrost`.
|
|
3
|
+
|
|
4
|
+
Uso:
|
|
5
|
+
python -m permafrost master [--host HOST] [--port PORT]
|
|
6
|
+
python -m permafrost worker --master URL [--host HOST] [--port PORT] [--id ID]
|
|
7
|
+
python -m permafrost freeze arquivo.csv
|
|
8
|
+
python -m permafrost thaw arquivo.permafrost
|
|
9
|
+
"""
|
|
10
|
+
import sys
|
|
11
|
+
|
|
12
|
+
def main():
|
|
13
|
+
if len(sys.argv) < 2:
|
|
14
|
+
print("Uso: python -m permafrost <comando>")
|
|
15
|
+
print("Comandos: master | worker | freeze | thaw | audit | catalog")
|
|
16
|
+
sys.exit(1)
|
|
17
|
+
|
|
18
|
+
cmd = sys.argv[1]
|
|
19
|
+
|
|
20
|
+
if cmd == "master":
|
|
21
|
+
import argparse, uvicorn
|
|
22
|
+
from permafrost.cluster import PermafrostMaster
|
|
23
|
+
p = argparse.ArgumentParser(description="Permafrost Master node")
|
|
24
|
+
p.add_argument("--host", default="0.0.0.0")
|
|
25
|
+
p.add_argument("--port", type=int, default=8700)
|
|
26
|
+
p.add_argument("--max-retries", type=int, default=3)
|
|
27
|
+
args = p.parse_args(sys.argv[2:])
|
|
28
|
+
master = PermafrostMaster(host=args.host, port=args.port)
|
|
29
|
+
master.MAX_RETRIES = args.max_retries
|
|
30
|
+
print(f"❄ Permafrost Master iniciando em {args.host}:{args.port}")
|
|
31
|
+
uvicorn.run(master.app, host=args.host, port=args.port, log_level="info")
|
|
32
|
+
|
|
33
|
+
elif cmd == "worker":
|
|
34
|
+
import argparse, uvicorn
|
|
35
|
+
from permafrost.cluster import PermafrostWorker
|
|
36
|
+
p = argparse.ArgumentParser(description="Permafrost Worker node")
|
|
37
|
+
p.add_argument("--master", required=True, help="URL do master (ex: http://master:8700)")
|
|
38
|
+
p.add_argument("--host", default="0.0.0.0")
|
|
39
|
+
p.add_argument("--port", type=int, default=8801)
|
|
40
|
+
p.add_argument("--id", default=None, help="ID único do worker")
|
|
41
|
+
args = p.parse_args(sys.argv[2:])
|
|
42
|
+
worker = PermafrostWorker(
|
|
43
|
+
master_url=args.master,
|
|
44
|
+
host=args.host,
|
|
45
|
+
port=args.port,
|
|
46
|
+
worker_id=args.id,
|
|
47
|
+
)
|
|
48
|
+
print(f"❄ Permafrost Worker {worker.worker_id} → {args.master}")
|
|
49
|
+
worker.run(auto_register=True)
|
|
50
|
+
|
|
51
|
+
elif cmd in ("freeze", "thaw", "audit", "verify", "catalog"):
|
|
52
|
+
# Delegar para a CLI typer
|
|
53
|
+
from permafrost.cli import app
|
|
54
|
+
sys.argv = ["permafrost"] + sys.argv[1:]
|
|
55
|
+
app()
|
|
56
|
+
|
|
57
|
+
else:
|
|
58
|
+
print(f"Comando desconhecido: {cmd}")
|
|
59
|
+
print("Comandos disponíveis: master | worker | freeze | thaw | audit | catalog")
|
|
60
|
+
sys.exit(1)
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
if __name__ == "__main__":
|
|
64
|
+
main()
|