PyPI - peppermint-lang - Versions diffs - 0.1.0__tar.gz - Mend

peppermint-lang 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

peppermint_lang-0.1.0/PKG-INFO +74 -0
peppermint_lang-0.1.0/README.md +45 -0
peppermint_lang-0.1.0/peppermint/__init__.py +0 -0
peppermint_lang-0.1.0/peppermint/__main__.py +162 -0
peppermint_lang-0.1.0/peppermint/ast_nodes.py +207 -0
peppermint_lang-0.1.0/peppermint/bridge.py +220 -0
peppermint_lang-0.1.0/peppermint/diagnostics.py +70 -0
peppermint_lang-0.1.0/peppermint/interpreter.py +532 -0
peppermint_lang-0.1.0/peppermint/libs/__init__.py +0 -0
peppermint_lang-0.1.0/peppermint/libs/math_.py +41 -0
peppermint_lang-0.1.0/peppermint/libs/ml.py +150 -0
peppermint_lang-0.1.0/peppermint/libs/str_.py +72 -0
peppermint_lang-0.1.0/peppermint/libs/viz.py +129 -0
peppermint_lang-0.1.0/peppermint/parser.py +635 -0
peppermint_lang-0.1.0/peppermint/stdlib/__init__.py +28 -0
peppermint_lang-0.1.0/peppermint/stdlib/core.py +288 -0
peppermint_lang-0.1.0/peppermint_lang.egg-info/PKG-INFO +74 -0
peppermint_lang-0.1.0/peppermint_lang.egg-info/SOURCES.txt +25 -0
peppermint_lang-0.1.0/peppermint_lang.egg-info/dependency_links.txt +1 -0
peppermint_lang-0.1.0/peppermint_lang.egg-info/entry_points.txt +2 -0
peppermint_lang-0.1.0/peppermint_lang.egg-info/requires.txt +20 -0
peppermint_lang-0.1.0/peppermint_lang.egg-info/top_level.txt +1 -0
peppermint_lang-0.1.0/pyproject.toml +34 -0
peppermint_lang-0.1.0/setup.cfg +4 -0
peppermint_lang-0.1.0/tests/test_grammar.py +332 -0
peppermint_lang-0.1.0/tests/test_interpreter.py +232 -0
peppermint_lang-0.1.0/tests/test_parser.py +431 -0

peppermint_lang-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,74 @@
+Metadata-Version: 2.4
+Name: peppermint-lang
+Version: 0.1.0
+Summary: A pipe-first DSL for data and ML work
+Author-email: Chayapatr Archiwaranguprok <pub@mit.edu>
+License-Expression: MIT
+Project-URL: Repository, https://github.com/chayapatr/peppermint
+Classifier: Programming Language :: Python :: 3
+Classifier: Operating System :: OS Independent
+Classifier: Topic :: Scientific/Engineering :: Information Analysis
+Requires-Python: >=3.11
+Description-Content-Type: text/markdown
+Provides-Extra: data
+Requires-Dist: pandas; extra == "data"
+Provides-Extra: ml
+Requires-Dist: pandas; extra == "ml"
+Requires-Dist: scikit-learn; extra == "ml"
+Requires-Dist: umap-learn; extra == "ml"
+Provides-Extra: viz
+Requires-Dist: pandas; extra == "viz"
+Requires-Dist: matplotlib; extra == "viz"
+Requires-Dist: seaborn; extra == "viz"
+Provides-Extra: all
+Requires-Dist: pandas; extra == "all"
+Requires-Dist: scikit-learn; extra == "all"
+Requires-Dist: umap-learn; extra == "all"
+Requires-Dist: matplotlib; extra == "all"
+Requires-Dist: seaborn; extra == "all"
+# Peppermint
+A pipe-first DSL for data and ML work. Designed to be lightweight and readable, where every operation is a pipeline step, errors propagate automatically, and the heavy lifting happens internally so you don't have to worry about it.
+## Install
+```sh
+pip install -e .
+```
+## Run
+```sh
+pep file.pep  # run a file
+pep           # interactive REPL
+```
+## Example
+```
+load("survey.csv")
+  |> filter(it.age > 18)
+  |> add(score: it.income / it.age)
+  |> sort(by: "score", dir: "desc")
+  |> print()
+```
+```
+load("survey.csv")
+  |> group(by: "region") {
+      |> agg(avg_score: mean(it.score), n: count())
+  }
+  |> sort(by: "avg_score", dir: "desc")
+  |> print()
+```
+Each step prints a summary as it runs:
+```
+|> filter    → List  843 rows × 5 cols  (157 dropped)
+|> add       → List  843 rows × 6 cols  (+score)
+|> sort      → List  843 rows × 6 cols
+```
+See [docs/language.md](docs/language.md) for the full language reference and [examples/](examples/) for more.

peppermint_lang-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,45 @@
+# Peppermint
+A pipe-first DSL for data and ML work. Designed to be lightweight and readable, where every operation is a pipeline step, errors propagate automatically, and the heavy lifting happens internally so you don't have to worry about it.
+## Install
+```sh
+pip install -e .
+```
+## Run
+```sh
+pep file.pep  # run a file
+pep           # interactive REPL
+```
+## Example
+```
+load("survey.csv")
+  |> filter(it.age > 18)
+  |> add(score: it.income / it.age)
+  |> sort(by: "score", dir: "desc")
+  |> print()
+```
+```
+load("survey.csv")
+  |> group(by: "region") {
+      |> agg(avg_score: mean(it.score), n: count())
+  }
+  |> sort(by: "avg_score", dir: "desc")
+  |> print()
+```
+Each step prints a summary as it runs:
+```
+|> filter    → List  843 rows × 5 cols  (157 dropped)
+|> add       → List  843 rows × 6 cols  (+score)
+|> sort      → List  843 rows × 6 cols
+```
+See [docs/language.md](docs/language.md) for the full language reference and [examples/](examples/) for more.

peppermint_lang-0.1.0/peppermint/__init__.py ADDED Viewed

File without changes

peppermint_lang-0.1.0/peppermint/__main__.py ADDED Viewed

@@ -0,0 +1,162 @@
+import sys
+import argparse
+sys.setrecursionlimit(50000)
+from .parser import parse, ParseError
+from .interpreter import Interpreter, Err, Ok, PepError
+from .stdlib import build_global_env
+from .diagnostics import report_parse_error, report_pep_error, report_err
+from .interpreter import ListValue, PmFunction, PmRange
+def _repl_display(value):
+    if value is None:
+        return
+    if isinstance(value, bool):
+        val_str = "true" if value else "false"
+        type_str = "bool"
+    elif isinstance(value, int):
+        val_str = str(value)
+        type_str = "num"
+    elif isinstance(value, float):
+        val_str = str(value)
+        type_str = "num"
+    elif isinstance(value, str):
+        val_str = repr(value)
+        type_str = "str"
+    elif isinstance(value, ListValue):
+        val_str = repr(value)
+        type_str = ""
+    elif isinstance(value, PmFunction):
+        val_str = repr(value)
+        type_str = "fn"
+    elif isinstance(value, PmRange):
+        val_str = f"{value.start}..{value.end}"
+        type_str = "range"
+    elif isinstance(value, dict):
+        pairs = ", ".join(f"{k}: {v!r}" for k, v in value.items())
+        val_str = "{ " + pairs + " }"
+        type_str = "obj"
+    elif isinstance(value, list):
+        val_str = repr(value)
+        type_str = "list"
+    else:
+        val_str = str(value)
+        type_str = type(value).__name__
+    prefix = "\033[32m<<<\033[0m"
+    # "<<< " is 4 chars; pad so arrow aligns at col 34 (matching pipe step)
+    if type_str:
+        pad = max(0, 34 - 4 - len(val_str))
+        tag = f"\033[33m← {type_str}\033[0m"
+        print(f"{prefix} {val_str}{' ' * pad}\033[33m←\033[0m \033[33m{type_str}\033[0m")
+    else:
+        print(f"{prefix} {val_str}")
+def run_file(args):
+    try:
+        src = open(args.file).read()
+    except FileNotFoundError:
+        print(f"\033[1;31mError:\033[0m × file not found: {args.file}", file=sys.stderr)
+        sys.exit(1)
+    try:
+        program = parse(src)
+    except ParseError as e:
+        report_parse_error(e, src, args.file)
+        sys.exit(1)
+    except Exception as e:
+        print(f"\033[1;31mError:\033[0m × {e}", file=sys.stderr)
+        sys.exit(1)
+    env = build_global_env()
+    interp = Interpreter(env, quiet=args.quiet)
+    try:
+        result = interp.run(program)
+    except PepError as e:
+        report_pep_error(e, src, args.file)
+        sys.exit(1)
+    except Exception as e:
+        print(f"\033[1;31mError:\033[0m × {e}", file=sys.stderr)
+        sys.exit(1)
+    if isinstance(result, Err):
+        report_err(result, src, args.file)
+        sys.exit(1)
+def run_repl(args):
+    import readline  # enables arrow keys and history
+    env = build_global_env()
+    interp = Interpreter(env, quiet=False)
+    print("Peppermint REPL  (Ctrl+D to exit)")
+    buf = []
+    while True:
+        prompt = "\033[2m...\033[0m " if buf else "\033[32m>>>\033[0m "
+        try:
+            line = input(prompt)
+        except EOFError:
+            print()
+            break
+        except KeyboardInterrupt:
+            print()
+            buf = []
+            continue
+        stripped = line.rstrip()
+        buf.append(line)
+        # Try joining with newlines first, then with spaces (handles `1 +\n2` style)
+        program = None
+        for src in ("\n".join(buf), " ".join(buf)):
+            try:
+                program = parse(src)
+                break
+            except Exception:
+                pass
+        if program is None:
+            if stripped == "" and len(buf) == 1:
+                buf = []
+            continue
+        buf = []
+        try:
+            result = interp.run(program)
+        except PepError as e:
+            report_pep_error(e, src, "<repl>")  # src is still in scope from the loop above
+            continue
+        except Exception as e:
+            print(f"\033[1;31mError:\033[0m × {e}", file=sys.stderr)
+            continue
+        if result is None:
+            continue
+        if isinstance(result, Err):
+            report_err(result, src, "<repl>")
+        elif isinstance(result, Ok):
+            _repl_display(result.value)
+        else:
+            _repl_display(result)
+def main():
+    ap = argparse.ArgumentParser(prog="pep", description="Peppermint language")
+    ap.add_argument("file", nargs="?", help="Path to .pep file (omit to start REPL)")
+    ap.add_argument("--quiet", action="store_true", help="Suppress pipe step summaries")
+    args = ap.parse_args()
+    if args.file:
+        run_file(args)
+    else:
+        run_repl(args)
+if __name__ == "__main__":
+    main()

peppermint_lang-0.1.0/peppermint/ast_nodes.py ADDED Viewed

@@ -0,0 +1,207 @@
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Any
+@dataclass
+class Loc:
+    line: int
+    col: int
+    def __repr__(self):
+        return f"{self.line}:{self.col}"
+NO_LOC = Loc(0, 0)
+# --- Patterns (used in match arms) ---
+@dataclass
+class PatComparison:
+    op: str        # >, <, >=, <=, ==, !=
+    value: Any     # int | float | str | Expr (variable)
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class PatOk:
+    name: str
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class PatErr:
+    name: str
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class PatTuple:
+    patterns: list
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class PatWildcard:
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+Pattern = PatComparison | PatOk | PatErr | PatTuple | PatWildcard
+# --- Expressions ---
+@dataclass
+class IntLit:
+    value: int
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class FloatLit:
+    value: float
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class StrLit:
+    value: str
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class BoolLit:
+    value: bool
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class NoneLit:
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class Ident:
+    name: str
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class FieldAccess:
+    obj: Any       # Expr
+    field: str
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class Neg:
+    operand: Any   # Expr
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class BinOp:
+    op: str        # +, -, *, /, >, <, >=, <=, ==, !=
+    left: Any      # Expr
+    right: Any     # Expr
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class Call:
+    func: Any      # Expr — already resolved to FieldAccess or Ident by postfix rule
+    args: list     # list[Expr]
+    kwargs: dict   # str -> Expr
+    block: list | None  # list[PipeStep] | None
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class Lambda:
+    params: list[str]
+    body: Any      # Expr
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class Pipe:
+    steps: list    # list[Expr] — first is the source, rest are calls
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class PipeStep:
+    expr: Any      # Call
+    quiet: bool
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class Match:
+    subject: Any   # Expr
+    arms: list     # list[MatchArm]
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class MatchArm:
+    pattern: Pattern
+    body: Any      # Expr
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class ListLit:
+    items: list    # list[Expr]
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class ObjField:
+    key: str
+    value: Any     # Expr
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class ObjSpread:
+    obj: Any       # Expr
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class ObjShorthand:
+    key: str       # { x } = { x: x }
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class ObjLit:
+    entries: list  # list[ObjField | ObjSpread]
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class TupleLit:
+    items: list    # list[Expr]
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class Spread:
+    obj: Any       # Expr
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class Range:
+    start: int
+    end: int
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class Block:
+    stmts: list   # list[Expr] — evaluates each, returns last
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class Literal:
+    value: Any    # already-evaluated runtime value, passes through eval unchanged
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+# --- Statements ---
+@dataclass
+class Assign:
+    name: str
+    value: Any     # Expr
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class UseDecl:
+    path: str      # module name or file path string
+    alias: str | None
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class NsDecl:
+    name: str
+    body: list     # list[Assign]
+    loc: Loc = field(default_factory=lambda: NO_LOC)
+@dataclass
+class Program:
+    body: list     # list[Assign | UseDecl | NsDecl | Expr]
+    loc: Loc = field(default_factory=lambda: NO_LOC)

peppermint_lang-0.1.0/peppermint/bridge.py ADDED Viewed

@@ -0,0 +1,220 @@
+"""
+Peppermint ↔ Python bridge.
+Python libraries loaded via `use` should import from here rather than
+touching interpreter internals directly. The bridge is the single place
+that knows about both worlds.
+"""
+from __future__ import annotations
+from typing import Any, Callable
+import functools
+# Single cached tuple — atomic assignment avoids the partial-init race
+_types: tuple | None = None
+def _interp_types():
+    global _types
+    if _types is None:
+        from .interpreter import Ok, Err, ListValue
+        _types = (Ok, Err, ListValue)
+    return _types
+# --- Type predicates ---
+def is_ok(val) -> bool:
+    Ok, Err, ListValue = _interp_types()
+    return isinstance(val, Ok)
+def is_err(val) -> bool:
+    Ok, Err, ListValue = _interp_types()
+    return isinstance(val, Err)
+def is_list(val) -> bool:
+    Ok, Err, ListValue = _interp_types()
+    return isinstance(val, (ListValue, list))
+def is_object_list(val) -> bool:
+    Ok, Err, ListValue = _interp_types()
+    if isinstance(val, ListValue):
+        return True
+    if isinstance(val, list):
+        return all(isinstance(el, dict) for el in val)
+    return False
+# --- Conversion: Peppermint → Python ---
+def to_python(val) -> Any:
+    """Unwrap Peppermint runtime values to plain Python.
+    Never raises — Err is returned as its message string so callers
+    don't need to handle two failure modes.
+    """
+    Ok, Err, ListValue = _interp_types()
+    if isinstance(val, Ok):
+        return to_python(val.value)
+    if isinstance(val, Err):
+        return val.msg  # caller decides what to do with it
+    if isinstance(val, ListValue):
+        return [to_python(r) for r in val.rows]
+    if isinstance(val, list):
+        return [to_python(r) for r in val]
+    try:
+        import numpy as np
+        if isinstance(val, np.integer):  return int(val)
+        if isinstance(val, np.floating): return float(val)
+        if isinstance(val, np.ndarray):  return val.tolist()
+    except ImportError:
+        pass
+    try:
+        import pandas as pd
+        if isinstance(val, pd.DataFrame): return val.to_dict(orient="records")
+        if isinstance(val, pd.Series):    return val.tolist()
+    except ImportError:
+        pass
+    return val
+# --- Conversion: Python → Peppermint ---
+def _infer_schema(rows: list[dict]) -> dict:
+    schema: dict = {}
+    unknown: set = set()
+    for row in rows:
+        for k, v in row.items():
+            if k in schema:
+                continue
+            if v is not None:
+                schema[k] = type(v)
+            else:
+                unknown.add(k)
+    # columns that were None in every row get type NoneType
+    for k in unknown - schema.keys():
+        schema[k] = type(None)
+    return schema
+def make_list(rows: list[dict]):
+    """Wrap a list of dicts into a Peppermint ListValue."""
+    Ok, Err, ListValue = _interp_types()
+    return ListValue(rows=rows, schema=_infer_schema(rows))
+def _normalize(val) -> Any:
+    """Coerce numpy/pandas types and unwrap Ok; never recurses into ListValue."""
+    Ok, Err, ListValue = _interp_types()
+    if isinstance(val, Ok):
+        return _normalize(val.value)
+    if isinstance(val, ListValue):
+        return val  # preserve as-is; from_python will wrap in Ok
+    try:
+        import numpy as np
+        if isinstance(val, np.integer):  return int(val)
+        if isinstance(val, np.floating): return float(val)
+        if isinstance(val, np.ndarray):  return val.tolist()
+    except ImportError:
+        pass
+    try:
+        import pandas as pd
+        if isinstance(val, pd.DataFrame): return val.to_dict(orient="records")
+        if isinstance(val, pd.Series):    return val.tolist()
+    except ImportError:
+        pass
+    return val
+def from_python(val) -> Any:
+    """Wrap a plain Python value into a Peppermint Ok result."""
+    Ok, Err, ListValue = _interp_types()
+    val = _normalize(val)
+    if isinstance(val, ListValue):
+        return Ok(val)  # already tabular, schema intact
+    if isinstance(val, list):
+        if all(isinstance(el, dict) for el in val):
+            return Ok(make_list(val))
+        return Ok(val)  # scalar list — not tabular data
+    return Ok(val)
+def err(msg: str):
+    """Return a Peppermint Err."""
+    Ok, Err, ListValue = _interp_types()
+    return Err(msg)
+def ok(val):
+    """Return a Peppermint Ok."""
+    Ok, Err, ListValue = _interp_types()
+    return Ok(val)
+# --- Row utilities ---
+def get_rows(val) -> list[dict]:
+    """Extract rows from a ListValue or plain list[dict]."""
+    Ok, Err, ListValue = _interp_types()
+    if isinstance(val, ListValue):
+        return val.rows
+    if isinstance(val, list):
+        return val
+    raise TypeError(f"expected a list, got {type(val).__name__}")
+def map_rows(val, fn: Callable[[dict], dict]):
+    """Apply fn to each row, return a new ListValue wrapped in Ok."""
+    rows = get_rows(val)
+    return from_python([fn(row) for row in rows])
+def add_column(val, name: str, fn: Callable[[dict], Any]):
+    """Add a new field to every row using fn(row) -> value."""
+    rows = get_rows(val)
+    return from_python([{**row, name: fn(row)} for row in rows])
+def filter_rows(val, fn: Callable[[dict], bool]):
+    """Keep rows where fn(row) is truthy."""
+    rows = get_rows(val)
+    return from_python([row for row in rows if fn(row)])
+# --- Library loader ---
+def load_python_file(path: str, alias: str | None = None) -> dict:
+    """Import a .py file and return its public functions wrapped for Peppermint."""
+    import importlib.util, inspect
+    spec = importlib.util.spec_from_file_location("_pep_user_lib", path)
+    mod = importlib.util.module_from_spec(spec)
+    spec.loader.exec_module(mod)
+    return {
+        name: wrap(obj)
+        for name, obj in inspect.getmembers(mod, inspect.isfunction)
+        if not name.startswith("_")
+    }
+def wrap(fn: Callable) -> Callable:
+    """Wrap a plain Python function for use in Peppermint.
+    - Args converted from Peppermint values to plain Python
+    - Return value wrapped in Ok
+    - Exceptions become Err
+    """
+    @functools.wraps(fn)
+    def wrapper(*args, _interp=None, _env=None, _block=None, **kwargs):
+        try:
+            converted = [to_python(a) for a in args]
+            converted_kwargs = {k: to_python(v) for k, v in kwargs.items()}
+            return from_python(fn(*converted, **converted_kwargs))
+        except Exception as e:
+            Ok, Err, ListValue = _interp_types()
+            return Err(str(e))
+    return wrapper
+def wrap_lib(fns: dict) -> dict:
+    """Wrap a dict of plain Python functions for use in Peppermint."""
+    return {name: wrap(fn) for name, fn in fns.items()}