saslite 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- saslite/__init__.py +7 -0
- saslite/__main__.py +5 -0
- saslite/api/__init__.py +0 -0
- saslite/api/facade.py +208 -0
- saslite/api/results.py +5 -0
- saslite/ast/__init__.py +0 -0
- saslite/ast/base.py +22 -0
- saslite/ast/data_step.py +207 -0
- saslite/ast/expressions.py +105 -0
- saslite/ast/proc.py +92 -0
- saslite/ast/program.py +36 -0
- saslite/ast/sql.py +106 -0
- saslite/cli/__init__.py +0 -0
- saslite/cli/main.py +153 -0
- saslite/diagnostics/__init__.py +0 -0
- saslite/diagnostics/errors.py +50 -0
- saslite/diagnostics/reporter.py +56 -0
- saslite/executor/__init__.py +0 -0
- saslite/executor/data_step/__init__.py +0 -0
- saslite/executor/data_step/executor.py +1130 -0
- saslite/executor/dispatcher.py +89 -0
- saslite/executor/expression_eval.py +385 -0
- saslite/executor/libname.py +78 -0
- saslite/executor/proc/__init__.py +0 -0
- saslite/executor/proc/registry.py +880 -0
- saslite/executor/sql/__init__.py +0 -0
- saslite/executor/sql/executor.py +2150 -0
- saslite/functions/__init__.py +100 -0
- saslite/functions/char_funcs.py +325 -0
- saslite/functions/conditional_funcs.py +36 -0
- saslite/functions/convert_funcs.py +207 -0
- saslite/functions/date_funcs.py +308 -0
- saslite/functions/numeric_funcs.py +213 -0
- saslite/functions/registry.py +31 -0
- saslite/macro/__init__.py +0 -0
- saslite/macro/expander.py +547 -0
- saslite/parser/__init__.py +0 -0
- saslite/parser/grammar/saslite.lark +420 -0
- saslite/parser/program_parser.py +46 -0
- saslite/parser/transformer.py +1912 -0
- saslite/planner/__init__.py +0 -0
- saslite/runtime/__init__.py +0 -0
- saslite/runtime/dataset.py +112 -0
- saslite/runtime/execution_result.py +36 -0
- saslite/runtime/formatting.py +68 -0
- saslite/runtime/metadata.py +76 -0
- saslite/runtime/pdv.py +166 -0
- saslite/runtime/types.py +72 -0
- saslite/session/__init__.py +0 -0
- saslite/session/session.py +100 -0
- saslite/source/__init__.py +0 -0
- saslite/storage/__init__.py +0 -0
- saslite/storage/base.py +32 -0
- saslite/storage/csv_backend.py +91 -0
- saslite/storage/memory.py +31 -0
- saslite/storage/path_resolver.py +32 -0
- saslite/storage/sas_backend.py +280 -0
- saslite/testing/__init__.py +0 -0
- saslite-0.1.0.dist-info/METADATA +244 -0
- saslite-0.1.0.dist-info/RECORD +64 -0
- saslite-0.1.0.dist-info/WHEEL +5 -0
- saslite-0.1.0.dist-info/entry_points.txt +2 -0
- saslite-0.1.0.dist-info/licenses/LICENSE +21 -0
- saslite-0.1.0.dist-info/top_level.txt +1 -0
saslite/__init__.py
ADDED
saslite/__main__.py
ADDED
saslite/api/__init__.py
ADDED
|
File without changes
|
saslite/api/facade.py
ADDED
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
"""SasInterpreter — the main programming API for SASLite."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import re
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
from typing import Any
|
|
8
|
+
|
|
9
|
+
import pandas as pd
|
|
10
|
+
|
|
11
|
+
from saslite.macro.expander import MacroExpander
|
|
12
|
+
from saslite.parser.program_parser import ProgramParser
|
|
13
|
+
from saslite.session.session import Session
|
|
14
|
+
from saslite.storage.path_resolver import StorageRouter
|
|
15
|
+
from saslite.executor.dispatcher import Dispatcher
|
|
16
|
+
from saslite.executor.proc.registry import (
|
|
17
|
+
handle_proc_print, handle_proc_sort, handle_proc_contents,
|
|
18
|
+
handle_proc_means, handle_proc_freq, handle_proc_import, handle_proc_export,
|
|
19
|
+
handle_proc_append, handle_proc_datasets,
|
|
20
|
+
)
|
|
21
|
+
from saslite.runtime.execution_result import RunSummary
|
|
22
|
+
from saslite.diagnostics.reporter import Reporter
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class SasInterpreter:
|
|
26
|
+
"""Main API for executing SAS code."""
|
|
27
|
+
|
|
28
|
+
def __init__(self, work_dir: str | None = None) -> None:
|
|
29
|
+
self._macro = MacroExpander()
|
|
30
|
+
self._parser = ProgramParser()
|
|
31
|
+
self._session = Session(StorageRouter(work_dir))
|
|
32
|
+
self._reporter = Reporter()
|
|
33
|
+
|
|
34
|
+
@property
|
|
35
|
+
def session(self) -> Session:
|
|
36
|
+
return self._session
|
|
37
|
+
|
|
38
|
+
@property
|
|
39
|
+
def reporter(self) -> Reporter:
|
|
40
|
+
return self._reporter
|
|
41
|
+
|
|
42
|
+
def execute(self, source: str, source_name: str = "<input>") -> RunSummary:
|
|
43
|
+
"""Execute SAS source code."""
|
|
44
|
+
try:
|
|
45
|
+
# Step 0: Preprocess DATALINES blocks
|
|
46
|
+
source, datalines_list = self._preprocess_datalines(source)
|
|
47
|
+
|
|
48
|
+
# Step 1: Macro expansion
|
|
49
|
+
expanded = self._macro.expand(source)
|
|
50
|
+
|
|
51
|
+
# Report %PUT output
|
|
52
|
+
for line in self._macro.put_output:
|
|
53
|
+
self._reporter.log(line)
|
|
54
|
+
self._macro.put_output.clear()
|
|
55
|
+
|
|
56
|
+
if not expanded.strip():
|
|
57
|
+
return RunSummary(success=True)
|
|
58
|
+
|
|
59
|
+
# Step 2: Parse
|
|
60
|
+
program = self._parser.parse(expanded)
|
|
61
|
+
|
|
62
|
+
# Step 2.5: Inject DATALINES data into InputNodes
|
|
63
|
+
self._inject_datalines(program, datalines_list)
|
|
64
|
+
|
|
65
|
+
# Step 3: Dispatch and execute
|
|
66
|
+
dispatcher = Dispatcher(self._session, self._reporter)
|
|
67
|
+
|
|
68
|
+
# Register PROC handlers
|
|
69
|
+
session = self._session
|
|
70
|
+
reporter = self._reporter
|
|
71
|
+
dispatcher.register_proc("PRINT", lambda p: handle_proc_print(p, session, reporter))
|
|
72
|
+
dispatcher.register_proc("SORT", lambda p: handle_proc_sort(p, session, reporter))
|
|
73
|
+
dispatcher.register_proc("CONTENTS", lambda p: handle_proc_contents(p, session, reporter))
|
|
74
|
+
dispatcher.register_proc("MEANS", lambda p: handle_proc_means(p, session, reporter))
|
|
75
|
+
dispatcher.register_proc("SUMMARY", lambda p: handle_proc_means(p, session, reporter))
|
|
76
|
+
dispatcher.register_proc("FREQ", lambda p: handle_proc_freq(p, session, reporter))
|
|
77
|
+
dispatcher.register_proc("IMPORT", lambda p: handle_proc_import(p, session, reporter))
|
|
78
|
+
dispatcher.register_proc("EXPORT", lambda p: handle_proc_export(p, session, reporter))
|
|
79
|
+
dispatcher.register_proc("APPEND", lambda p: handle_proc_append(p, session, reporter))
|
|
80
|
+
dispatcher.register_proc("DATASETS", lambda p: handle_proc_datasets(p, session, reporter))
|
|
81
|
+
|
|
82
|
+
return dispatcher.run(program)
|
|
83
|
+
|
|
84
|
+
except Exception as e:
|
|
85
|
+
summary = RunSummary(success=False, error=str(e))
|
|
86
|
+
self._reporter.error(str(e))
|
|
87
|
+
return summary
|
|
88
|
+
|
|
89
|
+
def execute_file(
|
|
90
|
+
self,
|
|
91
|
+
path: str | Path,
|
|
92
|
+
encoding: str = "utf-8",
|
|
93
|
+
errors: str = "strict",
|
|
94
|
+
) -> RunSummary:
|
|
95
|
+
"""Execute a SAS script file."""
|
|
96
|
+
path = Path(path)
|
|
97
|
+
source = path.read_text(encoding=encoding, errors=errors)
|
|
98
|
+
return self.execute(source, source_name=str(path))
|
|
99
|
+
|
|
100
|
+
def create_dataset(self, name: str, df: pd.DataFrame, libref: str = "WORK") -> None:
|
|
101
|
+
"""Create a dataset from a pandas DataFrame."""
|
|
102
|
+
from saslite.runtime.dataset import Dataset
|
|
103
|
+
ds = Dataset.from_dataframe(df, name=name, libref=libref)
|
|
104
|
+
self._session.put_dataset(libref, name, ds)
|
|
105
|
+
|
|
106
|
+
def get_dataset(self, libref: str, name: str) -> pd.DataFrame:
|
|
107
|
+
"""Get a dataset as a pandas DataFrame."""
|
|
108
|
+
ds = self._session.get_dataset(libref, name)
|
|
109
|
+
return ds.data
|
|
110
|
+
|
|
111
|
+
def import_csv(self, filepath: str, dataset_name: str, libref: str = "WORK") -> None:
|
|
112
|
+
"""Import a CSV file as a dataset."""
|
|
113
|
+
df = pd.read_csv(filepath)
|
|
114
|
+
self.create_dataset(dataset_name, df, libref)
|
|
115
|
+
|
|
116
|
+
def export_csv(self, dataset_name: str, filepath: str, libref: str = "WORK") -> None:
|
|
117
|
+
"""Export a dataset to CSV."""
|
|
118
|
+
from saslite.runtime.formatting import csv_dataframe
|
|
119
|
+
|
|
120
|
+
ds = self._session.get_dataset(libref, dataset_name)
|
|
121
|
+
csv_dataframe(ds).to_csv(filepath, index=False)
|
|
122
|
+
|
|
123
|
+
@staticmethod
|
|
124
|
+
def _preprocess_datalines(source: str) -> tuple[str, list[str]]:
|
|
125
|
+
"""Extract DATALINES/CARDS blocks and replace with assignment markers.
|
|
126
|
+
|
|
127
|
+
Returns (modified_source, list_of_raw_data_strings).
|
|
128
|
+
"""
|
|
129
|
+
datalines_list: list[str] = []
|
|
130
|
+
_EMPTY_PLACEHOLDER = "\x01" # placeholder for "" and '' empty strings
|
|
131
|
+
|
|
132
|
+
def _replace_empty_strings(data: str) -> str:
|
|
133
|
+
"""Replace \"\" and '' with placeholder so they survive escaping."""
|
|
134
|
+
return data.replace('""', _EMPTY_PLACEHOLDER).replace("''", _EMPTY_PLACEHOLDER)
|
|
135
|
+
|
|
136
|
+
# Pass 1: Handle inline DATALINES (same line as keyword)
|
|
137
|
+
# Pattern: DATALINES; non_semicolon_data ;
|
|
138
|
+
def _replace_inline(m: re.Match) -> str:
|
|
139
|
+
data = m.group(1)
|
|
140
|
+
data = _replace_empty_strings(data)
|
|
141
|
+
idx = len(datalines_list)
|
|
142
|
+
datalines_list.append(data)
|
|
143
|
+
escaped = data.replace("\\", "\\\\").replace('"', '\\"')
|
|
144
|
+
return f'__DATALINES_{idx}__ = "{escaped}";'
|
|
145
|
+
|
|
146
|
+
source = re.sub(
|
|
147
|
+
r"(?i)(?:DATALINES|CARDS|LINES4)\s*;\s*([^;]+?)\s*;",
|
|
148
|
+
_replace_inline,
|
|
149
|
+
source,
|
|
150
|
+
)
|
|
151
|
+
|
|
152
|
+
# Pass 2: Handle multi-line DATALINES (keyword on own line)
|
|
153
|
+
lines = source.split("\n")
|
|
154
|
+
result_lines: list[str] = []
|
|
155
|
+
i = 0
|
|
156
|
+
while i < len(lines):
|
|
157
|
+
stripped = lines[i].strip().upper()
|
|
158
|
+
if stripped in ("DATALINES;", "CARDS;", "LINES4;"):
|
|
159
|
+
data_lines: list[str] = []
|
|
160
|
+
i += 1
|
|
161
|
+
while i < len(lines):
|
|
162
|
+
if lines[i].strip() == ";":
|
|
163
|
+
break
|
|
164
|
+
data_lines.append(lines[i])
|
|
165
|
+
i += 1
|
|
166
|
+
idx = len(datalines_list)
|
|
167
|
+
datalines_data = "\n".join(data_lines)
|
|
168
|
+
datalines_data = _replace_empty_strings(datalines_data)
|
|
169
|
+
datalines_list.append(datalines_data)
|
|
170
|
+
escaped = datalines_data.replace("\\", "\\\\").replace('"', '\\"')
|
|
171
|
+
result_lines.append(f' __DATALINES_{idx}__ = "{escaped}";')
|
|
172
|
+
i += 1
|
|
173
|
+
else:
|
|
174
|
+
result_lines.append(lines[i])
|
|
175
|
+
i += 1
|
|
176
|
+
|
|
177
|
+
return "\n".join(result_lines), datalines_list
|
|
178
|
+
|
|
179
|
+
@staticmethod
|
|
180
|
+
def _inject_datalines(program: Any, datalines_list: list[str]) -> None:
|
|
181
|
+
"""Inject DATALINES data into DataStepNodes that have InputNodes."""
|
|
182
|
+
from saslite.ast.data_step import DataStepNode, InputNode, AssignNode
|
|
183
|
+
from saslite.ast.expressions import LiteralNode
|
|
184
|
+
|
|
185
|
+
datalines_idx = 0
|
|
186
|
+
for step in program.steps:
|
|
187
|
+
if not isinstance(step, DataStepNode):
|
|
188
|
+
continue
|
|
189
|
+
|
|
190
|
+
input_node = None
|
|
191
|
+
placeholder_idx = None
|
|
192
|
+
|
|
193
|
+
for j, stmt in enumerate(step.statements):
|
|
194
|
+
if isinstance(stmt, InputNode):
|
|
195
|
+
input_node = stmt
|
|
196
|
+
elif (isinstance(stmt, AssignNode)
|
|
197
|
+
and stmt.target.startswith("__DATALINES_")
|
|
198
|
+
and isinstance(stmt.expr, LiteralNode)
|
|
199
|
+
and stmt.expr.literal_type == "string"):
|
|
200
|
+
placeholder_idx = j
|
|
201
|
+
|
|
202
|
+
if input_node is not None and placeholder_idx is not None:
|
|
203
|
+
# Extract data from the placeholder assignment
|
|
204
|
+
placeholder = step.statements[placeholder_idx]
|
|
205
|
+
raw_data = placeholder.expr.value
|
|
206
|
+
input_node.datalines_data = raw_data
|
|
207
|
+
# Remove the placeholder statement
|
|
208
|
+
step.statements.pop(placeholder_idx)
|
saslite/api/results.py
ADDED
saslite/ast/__init__.py
ADDED
|
File without changes
|
saslite/ast/base.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
"""Base AST node types."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
@dataclass
|
|
10
|
+
class Span:
|
|
11
|
+
"""Source location information."""
|
|
12
|
+
start_line: int = 0
|
|
13
|
+
start_col: int = 0
|
|
14
|
+
end_line: int = 0
|
|
15
|
+
end_col: int = 0
|
|
16
|
+
source: str = ""
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class Node:
|
|
21
|
+
"""Base AST node."""
|
|
22
|
+
span: Span = field(default_factory=Span)
|
saslite/ast/data_step.py
ADDED
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
"""DATA step AST nodes."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from saslite.ast.base import Node
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class DataStepNode(Node):
|
|
13
|
+
"""DATA step."""
|
|
14
|
+
target: str = ""
|
|
15
|
+
target_options: dict[str, Any] = field(default_factory=dict)
|
|
16
|
+
statements: list[Any] = field(default_factory=list)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class SetNode(Node):
|
|
21
|
+
"""SET statement."""
|
|
22
|
+
datasets: list[Any] = field(default_factory=list)
|
|
23
|
+
options: dict[str, Any] = field(default_factory=dict)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class MergeNode(Node):
|
|
28
|
+
"""MERGE statement."""
|
|
29
|
+
datasets: list[Any] = field(default_factory=list)
|
|
30
|
+
by_vars: list[str] = field(default_factory=list)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
@dataclass
|
|
34
|
+
class DatasetRefNode(Node):
|
|
35
|
+
"""Dataset reference with options."""
|
|
36
|
+
name: str = ""
|
|
37
|
+
libref: str = "WORK"
|
|
38
|
+
options: list[Any] = field(default_factory=list)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass
|
|
42
|
+
class AssignNode(Node):
|
|
43
|
+
"""Variable assignment."""
|
|
44
|
+
target: str = ""
|
|
45
|
+
expr: Any = None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass
|
|
49
|
+
class IfNode(Node):
|
|
50
|
+
"""IF/THEN/ELSE statement."""
|
|
51
|
+
condition: Any = None
|
|
52
|
+
then_stmt: Any = None
|
|
53
|
+
else_stmt: Any = None
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
@dataclass
|
|
57
|
+
class IfBlockNode(Node):
|
|
58
|
+
"""IF condition THEN DO; ... END; ELSE DO; ... END;"""
|
|
59
|
+
condition: Any = None
|
|
60
|
+
then_body: list[Any] = field(default_factory=list)
|
|
61
|
+
else_body: list[Any] = field(default_factory=list)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
@dataclass
|
|
65
|
+
class DoNode(Node):
|
|
66
|
+
"""DO block or iterative DO loop."""
|
|
67
|
+
body: list[Any] = field(default_factory=list)
|
|
68
|
+
# Iterative DO
|
|
69
|
+
var: str = ""
|
|
70
|
+
start: Any = None
|
|
71
|
+
end: Any = None
|
|
72
|
+
by: Any = None
|
|
73
|
+
# WHILE/UNTIL
|
|
74
|
+
while_cond: Any = None
|
|
75
|
+
until_cond: Any = None
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
@dataclass
|
|
79
|
+
class OutputNode(Node):
|
|
80
|
+
"""OUTPUT statement."""
|
|
81
|
+
target: str = ""
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@dataclass
|
|
85
|
+
class DeleteNode(Node):
|
|
86
|
+
"""DELETE statement."""
|
|
87
|
+
pass
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
@dataclass
|
|
91
|
+
class StopNode(Node):
|
|
92
|
+
"""STOP statement."""
|
|
93
|
+
pass
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
@dataclass
|
|
97
|
+
class RetainNode(Node):
|
|
98
|
+
"""RETAIN statement."""
|
|
99
|
+
items: list[tuple[str, Any | None]] = field(default_factory=list)
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
@dataclass
|
|
103
|
+
class WhereNode(Node):
|
|
104
|
+
"""WHERE statement."""
|
|
105
|
+
condition: Any = None
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
@dataclass
|
|
109
|
+
class KeepNode(Node):
|
|
110
|
+
"""KEEP statement (in-step, not option)."""
|
|
111
|
+
variables: list[str] = field(default_factory=list)
|
|
112
|
+
|
|
113
|
+
|
|
114
|
+
@dataclass
|
|
115
|
+
class DropNode(Node):
|
|
116
|
+
"""DROP statement (in-step, not option)."""
|
|
117
|
+
variables: list[str] = field(default_factory=list)
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
@dataclass
|
|
121
|
+
class RenameNode(Node):
|
|
122
|
+
"""RENAME statement."""
|
|
123
|
+
mapping: dict[str, str] = field(default_factory=dict)
|
|
124
|
+
|
|
125
|
+
|
|
126
|
+
@dataclass
|
|
127
|
+
class ArrayNode(Node):
|
|
128
|
+
"""ARRAY statement."""
|
|
129
|
+
name: str = ""
|
|
130
|
+
bounds: Any = None
|
|
131
|
+
variables: list[str] = field(default_factory=list)
|
|
132
|
+
is_character: bool = False
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
@dataclass
|
|
136
|
+
class FormatNode(Node):
|
|
137
|
+
"""FORMAT statement."""
|
|
138
|
+
items: list[tuple[str, str]] = field(default_factory=list)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
@dataclass
|
|
142
|
+
class LabelNode(Node):
|
|
143
|
+
"""LABEL statement."""
|
|
144
|
+
items: list[tuple[str, str]] = field(default_factory=list)
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
@dataclass
|
|
148
|
+
class InfileNode(Node):
|
|
149
|
+
"""INFILE statement — specify input source and options."""
|
|
150
|
+
source: str = "" # 'datalines', 'cards', or file path
|
|
151
|
+
options: dict[str, Any] = field(default_factory=dict)
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
@dataclass
|
|
155
|
+
class InputNode(Node):
|
|
156
|
+
"""INPUT statement — variable list for reading raw data."""
|
|
157
|
+
variables: list[str] = field(default_factory=list)
|
|
158
|
+
is_character: dict[str, bool] = field(default_factory=dict)
|
|
159
|
+
formats: dict[str, str] = field(default_factory=dict)
|
|
160
|
+
datalines_data: str = ""
|
|
161
|
+
|
|
162
|
+
|
|
163
|
+
@dataclass
|
|
164
|
+
class SubstrAssignNode(Node):
|
|
165
|
+
"""SUBSTR(target, start [, length]) = expression;"""
|
|
166
|
+
target: str = ""
|
|
167
|
+
start: Any = None
|
|
168
|
+
length: Any = None
|
|
169
|
+
expr: Any = None
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
@dataclass
|
|
173
|
+
class LengthNode(Node):
|
|
174
|
+
"""LENGTH statement — set variable lengths."""
|
|
175
|
+
items: list[tuple[str, int | None]] = field(default_factory=list)
|
|
176
|
+
|
|
177
|
+
|
|
178
|
+
@dataclass
|
|
179
|
+
class AttribNode(Node):
|
|
180
|
+
"""ATTRIB statement — set variable attributes."""
|
|
181
|
+
items: list[tuple[str, str, str]] = field(default_factory=list)
|
|
182
|
+
|
|
183
|
+
|
|
184
|
+
@dataclass
|
|
185
|
+
class PutNode(Node):
|
|
186
|
+
"""PUT statement — write to log."""
|
|
187
|
+
items: list[Any] = field(default_factory=list)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
@dataclass
|
|
191
|
+
class PutItemNode(Node):
|
|
192
|
+
"""A single item in a PUT list: variable, literal, or formatting."""
|
|
193
|
+
expr: Any = None
|
|
194
|
+
format_spec: str = ""
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
@dataclass
|
|
198
|
+
class UpdateDataNode(Node):
|
|
199
|
+
"""DATA step UPDATE statement (not SQL UPDATE)."""
|
|
200
|
+
datasets: list[Any] = field(default_factory=list)
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
@dataclass
|
|
204
|
+
class CallSymputNode(Node):
|
|
205
|
+
"""CALL SYMPUT('macro_var', value)."""
|
|
206
|
+
macro_var: Any = None
|
|
207
|
+
value: Any = None
|
|
@@ -0,0 +1,105 @@
|
|
|
1
|
+
"""Expression AST nodes."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from saslite.ast.base import Node
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class LiteralNode(Node):
|
|
13
|
+
"""Literal value."""
|
|
14
|
+
value: Any = None
|
|
15
|
+
literal_type: str = "string" # string, number, missing
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@dataclass
|
|
19
|
+
class VariableNode(Node):
|
|
20
|
+
"""Variable reference."""
|
|
21
|
+
name: str = ""
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
@dataclass
|
|
25
|
+
class BinaryOpNode(Node):
|
|
26
|
+
"""Binary operation."""
|
|
27
|
+
op: str = ""
|
|
28
|
+
left: Any = None
|
|
29
|
+
right: Any = None
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class UnaryOpNode(Node):
|
|
34
|
+
"""Unary operation."""
|
|
35
|
+
op: str = ""
|
|
36
|
+
operand: Any = None
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
@dataclass
|
|
40
|
+
class FunctionCallNode(Node):
|
|
41
|
+
"""Function call."""
|
|
42
|
+
name: str = ""
|
|
43
|
+
args: list[Any] = field(default_factory=list)
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@dataclass
|
|
47
|
+
class InListNode(Node):
|
|
48
|
+
"""IN (list) expression."""
|
|
49
|
+
expr: Any = None
|
|
50
|
+
values: list[Any] = field(default_factory=list)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
@dataclass
|
|
54
|
+
class BetweenNode(Node):
|
|
55
|
+
"""BETWEEN expression."""
|
|
56
|
+
expr: Any = None
|
|
57
|
+
low: Any = None
|
|
58
|
+
high: Any = None
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
@dataclass
|
|
62
|
+
class CaseNode(Node):
|
|
63
|
+
"""CASE WHEN expression."""
|
|
64
|
+
conditions: list[Any] = field(default_factory=list)
|
|
65
|
+
results: list[Any] = field(default_factory=list)
|
|
66
|
+
else_result: Any = None
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
@dataclass
|
|
70
|
+
class SubqueryNode(Node):
|
|
71
|
+
"""Subquery expression."""
|
|
72
|
+
select_node: Any = None
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@dataclass
|
|
76
|
+
class LikeNode(Node):
|
|
77
|
+
"""LIKE expression."""
|
|
78
|
+
expr: Any = None
|
|
79
|
+
pattern: Any = None
|
|
80
|
+
negated: bool = False
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
@dataclass
|
|
84
|
+
class ExistsNode(Node):
|
|
85
|
+
"""EXISTS (subquery) expression."""
|
|
86
|
+
select_node: Any = None
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
@dataclass
|
|
90
|
+
class CalculatedNode(Node):
|
|
91
|
+
"""CALCULATED column reference in PROC SQL."""
|
|
92
|
+
name: str = ""
|
|
93
|
+
|
|
94
|
+
|
|
95
|
+
@dataclass
|
|
96
|
+
class ScalarSubqueryNode(Node):
|
|
97
|
+
"""Scalar subquery: (SELECT ... ) used as an expression."""
|
|
98
|
+
select_node: Any = None
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
@dataclass
|
|
102
|
+
class ArrayRefNode(Node):
|
|
103
|
+
"""Array subscript reference: arr[i]."""
|
|
104
|
+
name: str = ""
|
|
105
|
+
index: Any = None
|
saslite/ast/proc.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
"""PROC (non-SQL) AST nodes."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from saslite.ast.base import Node
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class ProcNode(Node):
|
|
13
|
+
"""Generic PROC statement."""
|
|
14
|
+
proc_name: str = ""
|
|
15
|
+
options: dict[str, Any] = field(default_factory=dict)
|
|
16
|
+
statements: list[Any] = field(default_factory=list)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@dataclass
|
|
20
|
+
class VarListNode(Node):
|
|
21
|
+
"""VAR statement."""
|
|
22
|
+
variables: list[str] = field(default_factory=list)
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@dataclass
|
|
26
|
+
class ByNode(Node):
|
|
27
|
+
"""BY statement."""
|
|
28
|
+
variables: list[str] = field(default_factory=list)
|
|
29
|
+
descending: list[bool] = field(default_factory=list)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class IdNode(Node):
|
|
34
|
+
"""ID statement."""
|
|
35
|
+
variables: list[str] = field(default_factory=list)
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
@dataclass
|
|
39
|
+
class SumNode(Node):
|
|
40
|
+
"""SUM statement (for PROC PRINT)."""
|
|
41
|
+
variables: list[str] = field(default_factory=list)
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
@dataclass
|
|
45
|
+
class ClassNode(Node):
|
|
46
|
+
"""CLASS statement (for PROC MEANS/FREQ)."""
|
|
47
|
+
variables: list[str] = field(default_factory=list)
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@dataclass
|
|
51
|
+
class OutputNode(Node):
|
|
52
|
+
"""OUTPUT OUT= statement (for PROC MEANS/FREQ)."""
|
|
53
|
+
out: str = ""
|
|
54
|
+
out_libref: str = "WORK"
|
|
55
|
+
stats: dict[str, str] = field(default_factory=dict)
|
|
56
|
+
|
|
57
|
+
|
|
58
|
+
@dataclass
|
|
59
|
+
class TablesNode(Node):
|
|
60
|
+
"""TABLES statement (for PROC FREQ)."""
|
|
61
|
+
table_specs: list[str] = field(default_factory=list)
|
|
62
|
+
options: dict[str, Any] = field(default_factory=dict)
|
|
63
|
+
|
|
64
|
+
|
|
65
|
+
@dataclass
|
|
66
|
+
class FreqTableSpec(Node):
|
|
67
|
+
"""Single table specification in PROC FREQ TABLES statement.
|
|
68
|
+
e.g., a * b / norow nocol means: cross-tab of a and b, suppress row/col percents.
|
|
69
|
+
"""
|
|
70
|
+
var_names: list[str] = field(default_factory=list)
|
|
71
|
+
options: dict[str, Any] = field(default_factory=dict)
|
|
72
|
+
|
|
73
|
+
|
|
74
|
+
@dataclass
|
|
75
|
+
class ProcImportNode(Node):
|
|
76
|
+
"""PROC IMPORT."""
|
|
77
|
+
datafile: str = ""
|
|
78
|
+
out: str = ""
|
|
79
|
+
out_libref: str = "WORK"
|
|
80
|
+
dbms: str = "csv"
|
|
81
|
+
getnames: bool = True
|
|
82
|
+
delimiter: str = ","
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
@dataclass
|
|
86
|
+
class ProcExportNode(Node):
|
|
87
|
+
"""PROC EXPORT."""
|
|
88
|
+
data: str = ""
|
|
89
|
+
data_libref: str = "WORK"
|
|
90
|
+
outfile: str = ""
|
|
91
|
+
dbms: str = "csv"
|
|
92
|
+
delimiter: str = ","
|
saslite/ast/program.py
ADDED
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
"""Top-level program AST nodes."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from dataclasses import dataclass, field
|
|
6
|
+
from typing import Any
|
|
7
|
+
|
|
8
|
+
from saslite.ast.base import Node
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@dataclass
|
|
12
|
+
class ProgramNode(Node):
|
|
13
|
+
"""Root node containing all steps."""
|
|
14
|
+
steps: list[Any] = field(default_factory=list)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
@dataclass
|
|
18
|
+
class LibnameNode(Node):
|
|
19
|
+
"""LIBNAME statement."""
|
|
20
|
+
libref: str = ""
|
|
21
|
+
engine: str = ""
|
|
22
|
+
path: str = ""
|
|
23
|
+
options: dict[str, Any] = field(default_factory=dict)
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@dataclass
|
|
27
|
+
class OptionsNode(Node):
|
|
28
|
+
"""OPTIONS statement."""
|
|
29
|
+
options: dict[str, Any] = field(default_factory=dict)
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
@dataclass
|
|
33
|
+
class FilenameNode(Node):
|
|
34
|
+
"""FILENAME statement."""
|
|
35
|
+
fileref: str = ""
|
|
36
|
+
filepath: str = ""
|