limulus 0.1.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- limulus-0.1.0/PKG-INFO +133 -0
- limulus-0.1.0/README.md +107 -0
- limulus-0.1.0/limulus/__init__.py +82 -0
- limulus-0.1.0/limulus/backends.py +562 -0
- limulus-0.1.0/limulus/evaluator.py +257 -0
- limulus-0.1.0/limulus/executor.py +1312 -0
- limulus-0.1.0/limulus/executor_python.py +2347 -0
- limulus-0.1.0/limulus/grammar/datastep.lark +91 -0
- limulus-0.1.0/limulus/io.py +415 -0
- limulus-0.1.0/limulus/io_adapters.py +150 -0
- limulus-0.1.0/limulus/models.py +164 -0
- limulus-0.1.0/limulus/native_bridge.py +12 -0
- limulus-0.1.0/limulus/parser.py +1129 -0
- limulus-0.1.0/limulus/runtime.py +1030 -0
- limulus-0.1.0/limulus/session.py +731 -0
- limulus-0.1.0/native/limulus_native/Cargo.lock +469 -0
- limulus-0.1.0/native/limulus_native/Cargo.toml +16 -0
- limulus-0.1.0/native/limulus_native/src/lib.rs +2919 -0
- limulus-0.1.0/pyproject.toml +63 -0
limulus-0.1.0/PKG-INFO
ADDED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: limulus
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Classifier: Development Status :: 3 - Alpha
|
|
5
|
+
Classifier: Intended Audience :: Developers
|
|
6
|
+
Classifier: Intended Audience :: Science/Research
|
|
7
|
+
Classifier: License :: Other/Proprietary License
|
|
8
|
+
Classifier: Programming Language :: Python
|
|
9
|
+
Classifier: Programming Language :: Python :: 3
|
|
10
|
+
Classifier: Programming Language :: Rust
|
|
11
|
+
Classifier: Topic :: Scientific/Engineering
|
|
12
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
|
13
|
+
Requires-Dist: lark>=1.1
|
|
14
|
+
Requires-Dist: pyarrow>=15.0.0
|
|
15
|
+
Requires-Dist: polars>=0.20
|
|
16
|
+
Requires-Dist: pandas>=2.2 ; extra == 'pandas'
|
|
17
|
+
Provides-Extra: pandas
|
|
18
|
+
Summary: DataStep for Your Workspace
|
|
19
|
+
Keywords: datastep,dataframe,arrow,polars
|
|
20
|
+
Author-email: Ken Nakamatsu <ken-nakamatsu@knworx.com>
|
|
21
|
+
License: PolyForm Noncommercial License 1.0.0
|
|
22
|
+
Requires-Python: >=3.10
|
|
23
|
+
Description-Content-Type: text/markdown; charset=UTF-8; variant=GFM
|
|
24
|
+
Project-URL: Documentation, https://k-nkmt.github.io/limulus/
|
|
25
|
+
Project-URL: Repository, https://github.com/k-nkmt/limulus
|
|
26
|
+
|
|
27
|
+
# limulus — Data Step for Your Workspace
|
|
28
|
+
|
|
29
|
+
[日本語](docs/README_ja.md)
|
|
30
|
+
|
|
31
|
+
---
|
|
32
|
+
|
|
33
|
+

|
|
34
|
+
|
|
35
|
+
**limulus** is a Python library for data processing using Data Step syntax.
|
|
36
|
+
Its goal is to bring the simplicity and long-term stability of Data Step into Python workflows.
|
|
37
|
+
|
|
38
|
+
This is currently an alpha release. Please note that breaking changes to the API and other interfaces may occur before the beta release.
|
|
39
|
+
|
|
40
|
+
---
|
|
41
|
+
|
|
42
|
+
## Installation
|
|
43
|
+
|
|
44
|
+
```bash
|
|
45
|
+
pip install limulus
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
---
|
|
49
|
+
|
|
50
|
+
## Usage
|
|
51
|
+
|
|
52
|
+
### 1. Prepare Your Data
|
|
53
|
+
|
|
54
|
+
Load a DataFrame (Arrow / Polars / Pandas) from a CSV or other source.
|
|
55
|
+
|
|
56
|
+
```python
|
|
57
|
+
import pandas as pd
|
|
58
|
+
import limulus
|
|
59
|
+
|
|
60
|
+
health_df = pd.DataFrame({
|
|
61
|
+
"name": ["Alice", "Bob", "Charlie", "David"],
|
|
62
|
+
"age": [25, 30, 35, 40],
|
|
63
|
+
"height": [65, 70, 68, 72], # inches
|
|
64
|
+
"weight": [140, 180, 130, 200] # pounds
|
|
65
|
+
})
|
|
66
|
+
|
|
67
|
+
# Load data into a Session
|
|
68
|
+
session = limulus.Session()
|
|
69
|
+
session.loads({"health": health_df})
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
### 2. Run a Data Step
|
|
73
|
+
|
|
74
|
+
```python
|
|
75
|
+
session.submit("""
|
|
76
|
+
data result;
|
|
77
|
+
set health;
|
|
78
|
+
where age > 25;
|
|
79
|
+
height_m = height * 0.0254;
|
|
80
|
+
weight_kg = weight * 0.454;
|
|
81
|
+
bmi = round(weight_kg / (height_m**2), 0.1);
|
|
82
|
+
keep name age bmi;
|
|
83
|
+
run;
|
|
84
|
+
""")
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
Multiple Data Steps can be submitted at once. Datasets created by earlier steps can be referenced by subsequent steps.
|
|
88
|
+
|
|
89
|
+
### 3. Retrieve Results
|
|
90
|
+
|
|
91
|
+
The simplest way is to retrieve the result from the session as an Arrow table using subscript notation.
|
|
92
|
+
You can convert it to pandas using Arrow's methods.
|
|
93
|
+
|
|
94
|
+
```python
|
|
95
|
+
df_out = session["result"].to_pandas()
|
|
96
|
+
print(df_out)
|
|
97
|
+
```
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
## Documentation
|
|
102
|
+
|
|
103
|
+
https://k-nkmt.github.io/limulus/
|
|
104
|
+
|
|
105
|
+
## License
|
|
106
|
+
|
|
107
|
+
[PolyForm Noncommercial License 1.0.0](LICENSE)
|
|
108
|
+
|
|
109
|
+
This project is distributed under the PolyForm Noncommercial License.
|
|
110
|
+
Creative Commons licenses are not used because they are generally not recommended for software distribution.
|
|
111
|
+
|
|
112
|
+
The software may be used for personal, educational, academic, and noncommercial research purposes.
|
|
113
|
+
Commercial use is not permitted under the current license terms.
|
|
114
|
+
|
|
115
|
+
I may consider adopting a different licensing model in the future as the project evolves.
|
|
116
|
+
|
|
117
|
+
contact: info@knworx.com
|
|
118
|
+
|
|
119
|
+
---
|
|
120
|
+
|
|
121
|
+
## Notices
|
|
122
|
+
|
|
123
|
+
**Project Positioning**
|
|
124
|
+
limulus is a modern data-step–inspired data transformation framework implemented independently in Python and Rust. It is not affiliated with or endorsed by SAS Institute Inc.
|
|
125
|
+
|
|
126
|
+
**Trademark Notice**
|
|
127
|
+
SAS® is a registered trademark of SAS Institute Inc. All other trademarks are the property of their respective owners.
|
|
128
|
+
|
|
129
|
+
**Independence Statement**
|
|
130
|
+
This is an independent implementation. No SAS source code or proprietary materials have been used.
|
|
131
|
+
|
|
132
|
+
**Compatibility Disclaimer**
|
|
133
|
+
Compatibility with SAS software is not guaranteed and is not a project goal. Certain behaviors intentionally differ to provide modern semantics.
|
limulus-0.1.0/README.md
ADDED
|
@@ -0,0 +1,107 @@
|
|
|
1
|
+
# limulus — Data Step for Your Workspace
|
|
2
|
+
|
|
3
|
+
[日本語](docs/README_ja.md)
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+

|
|
8
|
+
|
|
9
|
+
**limulus** is a Python library for data processing using Data Step syntax.
|
|
10
|
+
Its goal is to bring the simplicity and long-term stability of Data Step into Python workflows.
|
|
11
|
+
|
|
12
|
+
This is currently an alpha release. Please note that breaking changes to the API and other interfaces may occur before the beta release.
|
|
13
|
+
|
|
14
|
+
---
|
|
15
|
+
|
|
16
|
+
## Installation
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
pip install limulus
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
---
|
|
23
|
+
|
|
24
|
+
## Usage
|
|
25
|
+
|
|
26
|
+
### 1. Prepare Your Data
|
|
27
|
+
|
|
28
|
+
Load a DataFrame (Arrow / Polars / Pandas) from a CSV or other source.
|
|
29
|
+
|
|
30
|
+
```python
|
|
31
|
+
import pandas as pd
|
|
32
|
+
import limulus
|
|
33
|
+
|
|
34
|
+
health_df = pd.DataFrame({
|
|
35
|
+
"name": ["Alice", "Bob", "Charlie", "David"],
|
|
36
|
+
"age": [25, 30, 35, 40],
|
|
37
|
+
"height": [65, 70, 68, 72], # inches
|
|
38
|
+
"weight": [140, 180, 130, 200] # pounds
|
|
39
|
+
})
|
|
40
|
+
|
|
41
|
+
# Load data into a Session
|
|
42
|
+
session = limulus.Session()
|
|
43
|
+
session.loads({"health": health_df})
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### 2. Run a Data Step
|
|
47
|
+
|
|
48
|
+
```python
|
|
49
|
+
session.submit("""
|
|
50
|
+
data result;
|
|
51
|
+
set health;
|
|
52
|
+
where age > 25;
|
|
53
|
+
height_m = height * 0.0254;
|
|
54
|
+
weight_kg = weight * 0.454;
|
|
55
|
+
bmi = round(weight_kg / (height_m**2), 0.1);
|
|
56
|
+
keep name age bmi;
|
|
57
|
+
run;
|
|
58
|
+
""")
|
|
59
|
+
```
|
|
60
|
+
|
|
61
|
+
Multiple Data Steps can be submitted at once. Datasets created by earlier steps can be referenced by subsequent steps.
|
|
62
|
+
|
|
63
|
+
### 3. Retrieve Results
|
|
64
|
+
|
|
65
|
+
The simplest way is to retrieve the result from the session as an Arrow table using subscript notation.
|
|
66
|
+
You can convert it to pandas using Arrow's methods.
|
|
67
|
+
|
|
68
|
+
```python
|
|
69
|
+
df_out = session["result"].to_pandas()
|
|
70
|
+
print(df_out)
|
|
71
|
+
```
|
|
72
|
+
---
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
## Documentation
|
|
76
|
+
|
|
77
|
+
https://k-nkmt.github.io/limulus/
|
|
78
|
+
|
|
79
|
+
## License
|
|
80
|
+
|
|
81
|
+
[PolyForm Noncommercial License 1.0.0](LICENSE)
|
|
82
|
+
|
|
83
|
+
This project is distributed under the PolyForm Noncommercial License.
|
|
84
|
+
Creative Commons licenses are not used because they are generally not recommended for software distribution.
|
|
85
|
+
|
|
86
|
+
The software may be used for personal, educational, academic, and noncommercial research purposes.
|
|
87
|
+
Commercial use is not permitted under the current license terms.
|
|
88
|
+
|
|
89
|
+
I may consider adopting a different licensing model in the future as the project evolves.
|
|
90
|
+
|
|
91
|
+
contact: info@knworx.com
|
|
92
|
+
|
|
93
|
+
---
|
|
94
|
+
|
|
95
|
+
## Notices
|
|
96
|
+
|
|
97
|
+
**Project Positioning**
|
|
98
|
+
limulus is a modern data-step–inspired data transformation framework implemented independently in Python and Rust. It is not affiliated with or endorsed by SAS Institute Inc.
|
|
99
|
+
|
|
100
|
+
**Trademark Notice**
|
|
101
|
+
SAS® is a registered trademark of SAS Institute Inc. All other trademarks are the property of their respective owners.
|
|
102
|
+
|
|
103
|
+
**Independence Statement**
|
|
104
|
+
This is an independent implementation. No SAS source code or proprietary materials have been used.
|
|
105
|
+
|
|
106
|
+
**Compatibility Disclaimer**
|
|
107
|
+
Compatibility with SAS software is not guaranteed and is not a project goal. Certain behaviors intentionally differ to provide modern semantics.
|
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Any
|
|
4
|
+
|
|
5
|
+
# Public API
|
|
6
|
+
from .session import Session
|
|
7
|
+
from .models import DatasetCatalog, LogEntry, SubmitResult
|
|
8
|
+
|
|
9
|
+
# Internal / legacy API (still importable for advanced use and internal benchmarks)
|
|
10
|
+
from .runtime import DataStepExecutor, FormatSupportResult, RuntimeRequirements
|
|
11
|
+
from .io_adapters import (
|
|
12
|
+
DataAdapterError,
|
|
13
|
+
DataFrameAdapterPandas,
|
|
14
|
+
DataInputAdapterArrow,
|
|
15
|
+
DataOutputAdapterArrow,
|
|
16
|
+
InputSpec,
|
|
17
|
+
OutputSpec,
|
|
18
|
+
)
|
|
19
|
+
from .models import (
|
|
20
|
+
CompatibilityNotice,
|
|
21
|
+
DataSetRef,
|
|
22
|
+
Diagnostic,
|
|
23
|
+
ExecuteRequest,
|
|
24
|
+
ExecuteResponse,
|
|
25
|
+
OutputConversionResult,
|
|
26
|
+
)
|
|
27
|
+
from .parser import DataStepAst, ParseResult, ParsedStatement, ParserService
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def submit(code: str, *, backend: str = "auto", **datasets: Any) -> SubmitResult:
|
|
31
|
+
"""One-shot execution (when session management is not needed).
|
|
32
|
+
|
|
33
|
+
Args:
|
|
34
|
+
code: Data Step DSL text to execute.
|
|
35
|
+
backend: Runtime backend to use, one of ``"rust"``, ``"python"``, or ``"auto"``.
|
|
36
|
+
Defaults to ``"auto"``.
|
|
37
|
+
|
|
38
|
+
Examples:
|
|
39
|
+
>>> import limulus, pyarrow as pa
|
|
40
|
+
>>> result = limulus.submit("data out; set inp; run;", inp=pa.table({"x": [1, 2]}))
|
|
41
|
+
>>> result.success
|
|
42
|
+
True
|
|
43
|
+
"""
|
|
44
|
+
session = Session(backend=backend)
|
|
45
|
+
for name, data in datasets.items():
|
|
46
|
+
session.load(name, data)
|
|
47
|
+
return session.submit(code, show_result=True)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
def run(code: str, *, backend: str = "auto", **datasets: Any) -> SubmitResult:
|
|
51
|
+
"""Alias for :func:`submit`."""
|
|
52
|
+
return submit(code, backend=backend, **datasets)
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
# Public surface
|
|
56
|
+
__all__ = [
|
|
57
|
+
"Session",
|
|
58
|
+
"submit",
|
|
59
|
+
"run",
|
|
60
|
+
"SubmitResult",
|
|
61
|
+
"LogEntry",
|
|
62
|
+
"DatasetCatalog",
|
|
63
|
+
"DataStepExecutor",
|
|
64
|
+
"FormatSupportResult",
|
|
65
|
+
"RuntimeRequirements",
|
|
66
|
+
"DataAdapterError",
|
|
67
|
+
"DataFrameAdapterPandas",
|
|
68
|
+
"DataInputAdapterArrow",
|
|
69
|
+
"DataOutputAdapterArrow",
|
|
70
|
+
"InputSpec",
|
|
71
|
+
"OutputSpec",
|
|
72
|
+
"CompatibilityNotice",
|
|
73
|
+
"DataSetRef",
|
|
74
|
+
"Diagnostic",
|
|
75
|
+
"ExecuteRequest",
|
|
76
|
+
"ExecuteResponse",
|
|
77
|
+
"OutputConversionResult",
|
|
78
|
+
"DataStepAst",
|
|
79
|
+
"ParseResult",
|
|
80
|
+
"ParsedStatement",
|
|
81
|
+
"ParserService",
|
|
82
|
+
]
|