api-parity-py 0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- api_parity_py/__init__.py +5 -0
- api_parity_py/__main__.py +148 -0
- api_parity_py/parity.py +212 -0
- api_parity_py/walk.py +242 -0
- api_parity_py-0.0.2.dist-info/METADATA +5 -0
- api_parity_py-0.0.2.dist-info/RECORD +8 -0
- api_parity_py-0.0.2.dist-info/WHEEL +4 -0
- api_parity_py-0.0.2.dist-info/entry_points.txt +2 -0
|
@@ -0,0 +1,148 @@
|
|
|
1
|
+
"""CLI: ``api-parity-py <kind> [--mode walker|annotation] <target> [-o PATH]``.
|
|
2
|
+
|
|
3
|
+
Supported combos:
|
|
4
|
+
reference walker walk a package's public API (default for reference)
|
|
5
|
+
reference annotation collect ``@parity_ref`` decorators in a package
|
|
6
|
+
port walker walk a package and synthesize implemented port entries
|
|
7
|
+
port annotation collect ``@parity_impl`` / ``@parity`` decorators
|
|
8
|
+
(default for port)
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
import argparse
|
|
12
|
+
import importlib
|
|
13
|
+
import json
|
|
14
|
+
import sys
|
|
15
|
+
from pathlib import Path
|
|
16
|
+
|
|
17
|
+
from . import walk
|
|
18
|
+
from .parity import collect_port_entries, collect_reference_entries
|
|
19
|
+
|
|
20
|
+
EXIT_USAGE = 64
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def main() -> int:
|
|
24
|
+
ap = argparse.ArgumentParser(prog="api-parity-py")
|
|
25
|
+
ap.add_argument("kind", choices=("reference", "port"))
|
|
26
|
+
ap.add_argument(
|
|
27
|
+
"--mode",
|
|
28
|
+
choices=("walker", "annotation"),
|
|
29
|
+
default=None,
|
|
30
|
+
help="how entries are produced (defaults: reference→walker, port→annotation)",
|
|
31
|
+
)
|
|
32
|
+
ap.add_argument(
|
|
33
|
+
"target",
|
|
34
|
+
help="package name (or comma-separated names, e.g. "
|
|
35
|
+
"'pyspark.sql.connect,pyspark.sql.session')",
|
|
36
|
+
)
|
|
37
|
+
ap.add_argument(
|
|
38
|
+
"--version-from",
|
|
39
|
+
default=None,
|
|
40
|
+
help="module to read `__version__` from (e.g. `pyspark`)",
|
|
41
|
+
)
|
|
42
|
+
ap.add_argument(
|
|
43
|
+
"-o",
|
|
44
|
+
"--output",
|
|
45
|
+
default="-",
|
|
46
|
+
help="output file path, or `-` for stdout (default)",
|
|
47
|
+
)
|
|
48
|
+
args = ap.parse_args()
|
|
49
|
+
|
|
50
|
+
mode = args.mode or _default_mode(args.kind)
|
|
51
|
+
envelope = _build_envelope(
|
|
52
|
+
kind=args.kind,
|
|
53
|
+
mode=mode,
|
|
54
|
+
target=args.target,
|
|
55
|
+
version_from=args.version_from,
|
|
56
|
+
)
|
|
57
|
+
if envelope is None:
|
|
58
|
+
sys.stderr.write(
|
|
59
|
+
f"api-parity-py: ({args.kind}, mode={mode}) is not yet implemented\n"
|
|
60
|
+
)
|
|
61
|
+
return EXIT_USAGE
|
|
62
|
+
|
|
63
|
+
text = json.dumps(envelope, indent=2)
|
|
64
|
+
if args.output == "-":
|
|
65
|
+
sys.stdout.write(text + "\n")
|
|
66
|
+
else:
|
|
67
|
+
Path(args.output).write_text(text + "\n")
|
|
68
|
+
return 0
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _default_mode(kind: str) -> str:
|
|
72
|
+
return "annotation" if kind == "port" else "walker"
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def _build_envelope(
|
|
76
|
+
*,
|
|
77
|
+
kind: str,
|
|
78
|
+
mode: str,
|
|
79
|
+
target: str,
|
|
80
|
+
version_from: str | None,
|
|
81
|
+
) -> dict | None:
|
|
82
|
+
if kind == "reference" and mode == "walker":
|
|
83
|
+
return walk.walk_package(target, version_from=version_from)
|
|
84
|
+
if kind == "reference" and mode == "annotation":
|
|
85
|
+
return _collect_annotations(target, version_from, kind="reference")
|
|
86
|
+
if kind == "port" and mode == "annotation":
|
|
87
|
+
return _collect_annotations(target, version_from, kind="port")
|
|
88
|
+
if kind == "port" and mode == "walker":
|
|
89
|
+
return _walker_as_port(target, version_from)
|
|
90
|
+
return None
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def _import_target_packages(target: str) -> list[str]:
|
|
94
|
+
"""Import every submodule of every requested package so decorators run."""
|
|
95
|
+
packages = [p.strip() for p in target.split(",") if p.strip()]
|
|
96
|
+
for pkg in packages:
|
|
97
|
+
walk._preload_submodules(pkg)
|
|
98
|
+
return packages
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _read_version(version_from: str | None) -> str | None:
|
|
102
|
+
if not version_from:
|
|
103
|
+
return None
|
|
104
|
+
try:
|
|
105
|
+
mod = importlib.import_module(version_from)
|
|
106
|
+
return getattr(mod, "__version__", None)
|
|
107
|
+
except Exception:
|
|
108
|
+
return None
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _collect_annotations(
|
|
112
|
+
target: str, version_from: str | None, *, kind: str,
|
|
113
|
+
) -> dict:
|
|
114
|
+
packages = _import_target_packages(target)
|
|
115
|
+
if kind == "port":
|
|
116
|
+
entries = collect_port_entries()
|
|
117
|
+
else:
|
|
118
|
+
entries = collect_reference_entries()
|
|
119
|
+
return {
|
|
120
|
+
"schema_version": 1,
|
|
121
|
+
"kind": kind,
|
|
122
|
+
"language": "python",
|
|
123
|
+
"version": _read_version(version_from),
|
|
124
|
+
"source": ",".join(packages),
|
|
125
|
+
"entries": entries,
|
|
126
|
+
}
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _walker_as_port(target: str, version_from: str | None) -> dict:
|
|
130
|
+
"""Treat every walked public API as `status=implemented`. Useful for
|
|
131
|
+
py-vs-py comparisons where neither side is annotated."""
|
|
132
|
+
ref = walk.walk_package(target, version_from=version_from)
|
|
133
|
+
entries = []
|
|
134
|
+
for e in ref["entries"]:
|
|
135
|
+
impl = e["path"]
|
|
136
|
+
entries.append({
|
|
137
|
+
"path": e["path"],
|
|
138
|
+
"implementation": impl,
|
|
139
|
+
"status": "implemented",
|
|
140
|
+
"since": None,
|
|
141
|
+
"issue": None,
|
|
142
|
+
"comment": None,
|
|
143
|
+
})
|
|
144
|
+
return {**ref, "kind": "port", "entries": entries}
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
if __name__ == "__main__":
|
|
148
|
+
sys.exit(main())
|
api_parity_py/parity.py
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
"""Annotation API for Python plugins.
|
|
2
|
+
|
|
3
|
+
Three decorators:
|
|
4
|
+
|
|
5
|
+
- ``@parity_impl(path=..., status=...)`` on a class. Sets a parent path
|
|
6
|
+
for its methods; if both ``path`` and ``status`` are given, also
|
|
7
|
+
registers a class-level port entry.
|
|
8
|
+
- ``@parity(path=..., status=...)`` on a method. A leading ``.`` makes
|
|
9
|
+
the path relative to the enclosing ``@parity_impl``'s path. Free
|
|
10
|
+
functions (no enclosing class) are supported with absolute paths.
|
|
11
|
+
- ``@parity_ref(path=..., kind=...)`` on a class / method / function.
|
|
12
|
+
For declaring a reference inventory in code (rare; usually a walker
|
|
13
|
+
is better).
|
|
14
|
+
|
|
15
|
+
Decorators write to module-level registries that the CLI dumps in
|
|
16
|
+
``port`` / ``reference`` mode with ``--mode annotation``.
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
from enum import Enum
|
|
22
|
+
from typing import Any, Callable
|
|
23
|
+
|
|
24
|
+
_PORT_ENTRIES: list[dict] = []
|
|
25
|
+
_REFERENCE_ENTRIES: list[dict] = []
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class Status(str, Enum):
|
|
29
|
+
IMPLEMENTED = "implemented"
|
|
30
|
+
PARTIAL = "partial"
|
|
31
|
+
UNIMPLEMENTED = "unimplemented"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _normalize_status(status: Any) -> str:
|
|
35
|
+
if isinstance(status, Status):
|
|
36
|
+
return status.value
|
|
37
|
+
if isinstance(status, str) and status in {s.value for s in Status}:
|
|
38
|
+
return status
|
|
39
|
+
raise TypeError(
|
|
40
|
+
f"status must be a Status enum or one of {{implemented, partial, unimplemented}}, "
|
|
41
|
+
f"got {status!r}"
|
|
42
|
+
)
|
|
43
|
+
|
|
44
|
+
|
|
45
|
+
def _validate_port_args(status: str, comment: str | None) -> None:
|
|
46
|
+
if status == Status.UNIMPLEMENTED.value and not comment:
|
|
47
|
+
raise ValueError(
|
|
48
|
+
"status=Unimplemented requires a comment explaining why"
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def parity(
|
|
53
|
+
path: str,
|
|
54
|
+
status: Status | str,
|
|
55
|
+
*,
|
|
56
|
+
since: str | None = None,
|
|
57
|
+
issue: int | None = None,
|
|
58
|
+
comment: str | None = None,
|
|
59
|
+
) -> Callable:
|
|
60
|
+
"""Method-level / free-function port annotation.
|
|
61
|
+
|
|
62
|
+
Inside a ``@parity_impl`` class, a leading ``.`` in ``path`` is
|
|
63
|
+
rewritten as ``parent_path + child`` when the class decorator runs.
|
|
64
|
+
For free functions the path must be absolute.
|
|
65
|
+
"""
|
|
66
|
+
status_str = _normalize_status(status)
|
|
67
|
+
_validate_port_args(status_str, comment)
|
|
68
|
+
meta = {
|
|
69
|
+
"path": path,
|
|
70
|
+
"status": status_str,
|
|
71
|
+
"since": since,
|
|
72
|
+
"issue": issue,
|
|
73
|
+
"comment": comment,
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
def decorate(fn: Callable) -> Callable:
|
|
77
|
+
if path.startswith("."):
|
|
78
|
+
# Relative path: stash meta so the enclosing `parity_impl`
|
|
79
|
+
# sweep can resolve it against the parent path.
|
|
80
|
+
fn._parity_meta = meta # type: ignore[attr-defined]
|
|
81
|
+
return fn
|
|
82
|
+
# Absolute path: register immediately. Works for free functions
|
|
83
|
+
# and for class methods whose path is unrelated to their enclosing
|
|
84
|
+
# type. We don't stash meta — that would cause `parity_impl` to
|
|
85
|
+
# double-register the same entry.
|
|
86
|
+
impl = f"{fn.__module__}.{fn.__qualname__}"
|
|
87
|
+
_PORT_ENTRIES.append({**meta, "implementation": impl})
|
|
88
|
+
return fn
|
|
89
|
+
|
|
90
|
+
return decorate
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
def parity_impl(
|
|
94
|
+
path: Any = None,
|
|
95
|
+
status: Status | str | None = None,
|
|
96
|
+
*,
|
|
97
|
+
since: str | None = None,
|
|
98
|
+
issue: int | None = None,
|
|
99
|
+
comment: str | None = None,
|
|
100
|
+
) -> Callable:
|
|
101
|
+
"""Class-level port annotation.
|
|
102
|
+
|
|
103
|
+
Two call forms:
|
|
104
|
+
|
|
105
|
+
- ``@parity_impl(path="...", status=...)`` — with args, registers a
|
|
106
|
+
class-level entry and provides a parent path for relative children.
|
|
107
|
+
- ``@parity_impl`` — bare, no args. Sweeps the class for relative
|
|
108
|
+
``@parity`` children but registers no class-level entry (and a
|
|
109
|
+
child with a leading-``.`` path is an error in this form).
|
|
110
|
+
|
|
111
|
+
Either way, the class's ``__dict__`` is swept and any
|
|
112
|
+
``@parity``-decorated method with relative path gets joined with the
|
|
113
|
+
parent's ``path`` and registered.
|
|
114
|
+
"""
|
|
115
|
+
# Bare-decorator form: Python passes the class as the first arg.
|
|
116
|
+
if isinstance(path, type) and status is None:
|
|
117
|
+
return parity_impl()(path)
|
|
118
|
+
|
|
119
|
+
parent_path = path
|
|
120
|
+
parent_status = _normalize_status(status) if status is not None else None
|
|
121
|
+
if parent_status is not None:
|
|
122
|
+
_validate_port_args(parent_status, comment)
|
|
123
|
+
|
|
124
|
+
def decorate(cls: type) -> type:
|
|
125
|
+
# Class-level entry, if both path and status given.
|
|
126
|
+
if parent_path and parent_status:
|
|
127
|
+
_PORT_ENTRIES.append({
|
|
128
|
+
"path": parent_path,
|
|
129
|
+
"implementation": f"{cls.__module__}.{cls.__qualname__}",
|
|
130
|
+
"status": parent_status,
|
|
131
|
+
"since": since,
|
|
132
|
+
"issue": issue,
|
|
133
|
+
"comment": comment,
|
|
134
|
+
})
|
|
135
|
+
|
|
136
|
+
# Sweep methods. We look at __dict__ rather than dir() so we
|
|
137
|
+
# only see things defined on this class (not inherited), and so
|
|
138
|
+
# properties / classmethods come back as their raw descriptor
|
|
139
|
+
# (which is what the @parity decorator attached the meta to).
|
|
140
|
+
for name, value in cls.__dict__.items():
|
|
141
|
+
inner = value
|
|
142
|
+
if isinstance(value, (classmethod, staticmethod)):
|
|
143
|
+
inner = value.__func__
|
|
144
|
+
elif isinstance(value, property):
|
|
145
|
+
inner = value.fget
|
|
146
|
+
meta = getattr(inner, "_parity_meta", None)
|
|
147
|
+
if not meta:
|
|
148
|
+
continue
|
|
149
|
+
child_path = meta["path"]
|
|
150
|
+
if child_path.startswith("."):
|
|
151
|
+
if not parent_path:
|
|
152
|
+
raise ValueError(
|
|
153
|
+
f"@parity({child_path!r}) on {cls.__qualname__}.{name} "
|
|
154
|
+
f"requires the enclosing @parity_impl to declare a path"
|
|
155
|
+
)
|
|
156
|
+
full_path = f"{parent_path}{child_path}"
|
|
157
|
+
else:
|
|
158
|
+
full_path = child_path
|
|
159
|
+
_PORT_ENTRIES.append({
|
|
160
|
+
"path": full_path,
|
|
161
|
+
"implementation": f"{cls.__module__}.{cls.__qualname__}.{name}",
|
|
162
|
+
"status": meta["status"],
|
|
163
|
+
"since": meta["since"],
|
|
164
|
+
"issue": meta["issue"],
|
|
165
|
+
"comment": meta["comment"],
|
|
166
|
+
})
|
|
167
|
+
return cls
|
|
168
|
+
|
|
169
|
+
return decorate
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def parity_ref(path: str, kind: str) -> Callable:
|
|
173
|
+
"""Code-level reference entry. Apply to a class / method / function."""
|
|
174
|
+
if kind not in {"class", "method", "property", "function"}:
|
|
175
|
+
raise ValueError(
|
|
176
|
+
f"kind must be one of class/method/property/function, got {kind!r}"
|
|
177
|
+
)
|
|
178
|
+
entry = {"path": path, "kind": kind}
|
|
179
|
+
|
|
180
|
+
def decorate(target: Any) -> Any:
|
|
181
|
+
_REFERENCE_ENTRIES.append(entry)
|
|
182
|
+
return target
|
|
183
|
+
|
|
184
|
+
return decorate
|
|
185
|
+
|
|
186
|
+
|
|
187
|
+
def collect_port_entries() -> list[dict]:
|
|
188
|
+
"""Return all port entries registered so far, sorted+deduped by path."""
|
|
189
|
+
return _sort_dedup(_PORT_ENTRIES, key=("path",))
|
|
190
|
+
|
|
191
|
+
|
|
192
|
+
def collect_reference_entries() -> list[dict]:
|
|
193
|
+
"""Return all reference entries registered so far, sorted+deduped."""
|
|
194
|
+
return _sort_dedup(_REFERENCE_ENTRIES, key=("path", "kind"))
|
|
195
|
+
|
|
196
|
+
|
|
197
|
+
def reset_registries() -> None:
|
|
198
|
+
"""Clear both registries. Intended for tests."""
|
|
199
|
+
_PORT_ENTRIES.clear()
|
|
200
|
+
_REFERENCE_ENTRIES.clear()
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
def _sort_dedup(entries: list[dict], key: tuple[str, ...]) -> list[dict]:
|
|
204
|
+
seen: set[tuple] = set()
|
|
205
|
+
out: list[dict] = []
|
|
206
|
+
for e in sorted(entries, key=lambda d: tuple(d.get(k) or "" for k in key)):
|
|
207
|
+
k = tuple(e.get(field) for field in key)
|
|
208
|
+
if k in seen:
|
|
209
|
+
continue
|
|
210
|
+
seen.add(k)
|
|
211
|
+
out.append(e)
|
|
212
|
+
return out
|
api_parity_py/walk.py
ADDED
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
"""Walk a Python package and emit a reference-side envelope (see SCHEMA.md).
|
|
2
|
+
|
|
3
|
+
# Algorithm
|
|
4
|
+
|
|
5
|
+
1. Preload every submodule of each requested package, so classes defined
|
|
6
|
+
in lazy-imported modules show up in `sys.modules` and forward-reference
|
|
7
|
+
strings are resolvable.
|
|
8
|
+
2. Walk the loaded modules. For each module under one of the requested
|
|
9
|
+
packages, list its public classes — but only the ones *defined* there
|
|
10
|
+
(i.e. `cls.__module__ == mod_name`), so re-exports don't double-count.
|
|
11
|
+
3. For each class, recurse into nested classes (e.g. `SparkSession.Builder`)
|
|
12
|
+
and emit one entry per public method/property. Class-level data attrs
|
|
13
|
+
(`MAX_MESSAGE_LENGTH = 128`) and nested classes are handled separately.
|
|
14
|
+
4. Module-level free functions are emitted with `kind = "function"`.
|
|
15
|
+
|
|
16
|
+
# Path keying
|
|
17
|
+
|
|
18
|
+
Each entry's `path` is `cls.__module__ + "." + cls.__qualname__` (for a
|
|
19
|
+
class) or `<class path>.<member>` (for a member). `__qualname__` includes
|
|
20
|
+
nesting, so `SparkSession.Builder` keeps its dotted lexical structure.
|
|
21
|
+
|
|
22
|
+
This means `pyspark.sql.session.SparkSession` and
|
|
23
|
+
`pyspark.sql.connect.session.SparkSession` are distinct entries — both
|
|
24
|
+
classes are tracked, no qualname collision, no priority/preference flag.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
import importlib
|
|
28
|
+
import inspect
|
|
29
|
+
import pkgutil
|
|
30
|
+
import sys
|
|
31
|
+
from collections import Counter
|
|
32
|
+
|
|
33
|
+
# `inspect.getmembers(object)` brings in `__init__`, `__doc__`, etc.; we
|
|
34
|
+
# strip those by name to keep the public API list focused on real surface.
|
|
35
|
+
_OBJECT_MEMBERS = frozenset(name for name, _ in inspect.getmembers(object))
|
|
36
|
+
# Subclasses of these are "data shapes", not API surface. Filtering by
|
|
37
|
+
# class hierarchy avoids clutter from `Row` (a tuple subclass), exception
|
|
38
|
+
# types, etc.
|
|
39
|
+
_DATA_BASES = (tuple, list, dict, BaseException)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _preload_submodules(package_name: str) -> None:
|
|
43
|
+
"""Import every submodule of `package_name` into `sys.modules`.
|
|
44
|
+
|
|
45
|
+
Without this, classes defined in lazy-loaded submodules would never be
|
|
46
|
+
seen by the discovery walk, and string-form forward-reference type
|
|
47
|
+
annotations (`'SparkConnectClient'`) wouldn't resolve.
|
|
48
|
+
|
|
49
|
+
Submodules whose import raises (e.g. because an optional dep like
|
|
50
|
+
pandas isn't installed) are collected and summarized on stderr so an
|
|
51
|
+
incomplete walk is visibly incomplete instead of silently truncated.
|
|
52
|
+
"""
|
|
53
|
+
try:
|
|
54
|
+
pkg = importlib.import_module(package_name)
|
|
55
|
+
except Exception as e:
|
|
56
|
+
sys.stderr.write(
|
|
57
|
+
f"api-parity-py: failed to import top-level package "
|
|
58
|
+
f"{package_name!r}: {type(e).__name__}: {e}\n"
|
|
59
|
+
)
|
|
60
|
+
return
|
|
61
|
+
pkg_path = getattr(pkg, "__path__", None)
|
|
62
|
+
if pkg_path is None:
|
|
63
|
+
return
|
|
64
|
+
|
|
65
|
+
failures: list[tuple[str, BaseException]] = []
|
|
66
|
+
total = 0
|
|
67
|
+
for _, mod_name, _ in pkgutil.walk_packages(pkg_path, prefix=package_name + "."):
|
|
68
|
+
total += 1
|
|
69
|
+
try:
|
|
70
|
+
importlib.import_module(mod_name)
|
|
71
|
+
except Exception as e:
|
|
72
|
+
failures.append((mod_name, e))
|
|
73
|
+
|
|
74
|
+
if failures:
|
|
75
|
+
_report_skipped(package_name, total, failures)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def _report_skipped(
|
|
79
|
+
package_name: str,
|
|
80
|
+
total: int,
|
|
81
|
+
failures: list[tuple[str, BaseException]],
|
|
82
|
+
) -> None:
|
|
83
|
+
"""Print a grouped, terse summary of import failures to stderr."""
|
|
84
|
+
reasons: Counter[str] = Counter()
|
|
85
|
+
examples: dict[str, str] = {}
|
|
86
|
+
for mod, err in failures:
|
|
87
|
+
first_line = str(err).splitlines()[0][:120] if str(err) else ""
|
|
88
|
+
key = f"{type(err).__name__}: {first_line}" if first_line else type(err).__name__
|
|
89
|
+
reasons[key] += 1
|
|
90
|
+
examples.setdefault(key, mod)
|
|
91
|
+
|
|
92
|
+
sys.stderr.write(
|
|
93
|
+
f"api-parity-py: {package_name}: "
|
|
94
|
+
f"skipped {len(failures)}/{total} submodule(s) — inventory will be incomplete\n"
|
|
95
|
+
)
|
|
96
|
+
for reason, count in reasons.most_common():
|
|
97
|
+
sys.stderr.write(f" - [{count}x] {reason}\n")
|
|
98
|
+
sys.stderr.write(f" e.g. {examples[reason]}\n")
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _raw_attr(cls: type, name: str) -> object:
|
|
102
|
+
"""Return the raw descriptor for `name` on `cls`, walking the MRO.
|
|
103
|
+
|
|
104
|
+
We can't use `getattr(cls, name)` to detect properties — that triggers
|
|
105
|
+
descriptor invocation and gives us the resolved value (e.g. a `Builder`
|
|
106
|
+
instance) instead of the descriptor itself. Looking up via `__dict__`
|
|
107
|
+
on each MRO class returns the raw `property` / `classmethod` object.
|
|
108
|
+
"""
|
|
109
|
+
for klass in cls.__mro__:
|
|
110
|
+
if name in klass.__dict__:
|
|
111
|
+
return klass.__dict__[name]
|
|
112
|
+
return None
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def _member_kind(cls: type, name: str, value: object) -> str | None:
|
|
116
|
+
"""Classify a class member, or `None` to drop it from the inventory.
|
|
117
|
+
|
|
118
|
+
`isinstance(raw, property)` catches `property` subclasses like pyspark's
|
|
119
|
+
`classproperty`. Nested classes return None — they're emitted as their
|
|
120
|
+
own top-level entries by the recursion in `_collect_class`. Callable
|
|
121
|
+
values (functions, classmethods after descriptor invocation, etc.)
|
|
122
|
+
become methods.
|
|
123
|
+
"""
|
|
124
|
+
raw = _raw_attr(cls, name)
|
|
125
|
+
if isinstance(raw, property):
|
|
126
|
+
return "property"
|
|
127
|
+
if isinstance(value, type):
|
|
128
|
+
return None
|
|
129
|
+
if callable(value):
|
|
130
|
+
return "method"
|
|
131
|
+
# Plain class attributes (constants, default values) are not API surface.
|
|
132
|
+
return None
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
def _is_api_class(obj: object) -> bool:
|
|
136
|
+
return isinstance(obj, type) and not issubclass(obj, _DATA_BASES)
|
|
137
|
+
|
|
138
|
+
|
|
139
|
+
def _collect_class(cls: type, entries: list[dict]) -> None:
|
|
140
|
+
"""Record `cls` and its members, then recurse into nested classes."""
|
|
141
|
+
full = f"{cls.__module__}.{cls.__qualname__}"
|
|
142
|
+
# Cheap dedup: re-exports may cause us to revisit a class. We bail
|
|
143
|
+
# before doing redundant member walks.
|
|
144
|
+
if any(e["path"] == full and e["kind"] == "class" for e in entries):
|
|
145
|
+
return
|
|
146
|
+
entries.append({"path": full, "kind": "class"})
|
|
147
|
+
|
|
148
|
+
for name, value in inspect.getmembers(cls):
|
|
149
|
+
if name.startswith("_") or name in _OBJECT_MEMBERS:
|
|
150
|
+
continue
|
|
151
|
+
kind = _member_kind(cls, name, value)
|
|
152
|
+
if kind is None:
|
|
153
|
+
continue
|
|
154
|
+
entries.append({"path": f"{full}.{name}", "kind": kind})
|
|
155
|
+
|
|
156
|
+
# Nested classes: their `__module__` matches the outer class's, and
|
|
157
|
+
# their `__qualname__` is prefixed with `<outer>.` — those two checks
|
|
158
|
+
# together filter out unrelated classes that happen to be exposed as
|
|
159
|
+
# attributes (e.g. types pulled in for type hints).
|
|
160
|
+
for name, obj in inspect.getmembers(cls):
|
|
161
|
+
if name.startswith("_") or not _is_api_class(obj):
|
|
162
|
+
continue
|
|
163
|
+
if obj.__module__ != cls.__module__:
|
|
164
|
+
continue
|
|
165
|
+
if not obj.__qualname__.startswith(cls.__qualname__ + "."):
|
|
166
|
+
continue
|
|
167
|
+
_collect_class(obj, entries)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def _discover(packages: list[str]) -> list[dict]:
|
|
171
|
+
"""Walk every loaded module under `packages`, emit entries."""
|
|
172
|
+
entries: list[dict] = []
|
|
173
|
+
for mod_name, mod in list(sys.modules.items()):
|
|
174
|
+
if mod is None:
|
|
175
|
+
continue
|
|
176
|
+
if not any(mod_name == p or mod_name.startswith(p + ".") for p in packages):
|
|
177
|
+
continue
|
|
178
|
+
|
|
179
|
+
# Top-level classes, recursing into their nested classes.
|
|
180
|
+
for name, obj in inspect.getmembers(mod):
|
|
181
|
+
if name.startswith("_") or not _is_api_class(obj):
|
|
182
|
+
continue
|
|
183
|
+
# Skip re-exports: only record a class in its defining module.
|
|
184
|
+
# Without this, `pyspark.sql.SparkSession` (re-export from
|
|
185
|
+
# `pyspark.sql.session`) would be listed twice with different
|
|
186
|
+
# paths.
|
|
187
|
+
if obj.__module__ != mod_name:
|
|
188
|
+
continue
|
|
189
|
+
_collect_class(obj, entries)
|
|
190
|
+
|
|
191
|
+
# Module-level free functions (e.g. `pyspark.sql.functions.col`).
|
|
192
|
+
for name, obj in inspect.getmembers(mod, inspect.isfunction):
|
|
193
|
+
if name.startswith("_"):
|
|
194
|
+
continue
|
|
195
|
+
if obj.__module__ != mod_name:
|
|
196
|
+
continue
|
|
197
|
+
entries.append({
|
|
198
|
+
"path": f"{obj.__module__}.{obj.__qualname__}",
|
|
199
|
+
"kind": "function",
|
|
200
|
+
})
|
|
201
|
+
|
|
202
|
+
# Sort + dedup by (path, kind). Stability matters because the JSON
|
|
203
|
+
# output is part of the contract — diffing two versions should produce
|
|
204
|
+
# minimal noise.
|
|
205
|
+
seen: set[tuple[str, str]] = set()
|
|
206
|
+
out: list[dict] = []
|
|
207
|
+
for e in sorted(entries, key=lambda e: (e["path"], e["kind"])):
|
|
208
|
+
key = (e["path"], e["kind"])
|
|
209
|
+
if key in seen:
|
|
210
|
+
continue
|
|
211
|
+
seen.add(key)
|
|
212
|
+
out.append(e)
|
|
213
|
+
return out
|
|
214
|
+
|
|
215
|
+
|
|
216
|
+
def walk_package(target: str, *, version_from: str | None = None) -> dict:
|
|
217
|
+
"""Build a reference envelope for one or more comma-separated packages.
|
|
218
|
+
|
|
219
|
+
`version_from` names a module to import for its `__version__` (e.g.
|
|
220
|
+
`"pyspark"`). If unimportable or missing the attribute, version is
|
|
221
|
+
left null; this is informational metadata, not part of the join key.
|
|
222
|
+
"""
|
|
223
|
+
packages = [p.strip() for p in target.split(",") if p.strip()]
|
|
224
|
+
for pkg in packages:
|
|
225
|
+
_preload_submodules(pkg)
|
|
226
|
+
|
|
227
|
+
version = None
|
|
228
|
+
if version_from:
|
|
229
|
+
try:
|
|
230
|
+
mod = importlib.import_module(version_from)
|
|
231
|
+
version = getattr(mod, "__version__", None)
|
|
232
|
+
except Exception:
|
|
233
|
+
pass
|
|
234
|
+
|
|
235
|
+
return {
|
|
236
|
+
"schema_version": 1,
|
|
237
|
+
"kind": "reference",
|
|
238
|
+
"language": "python",
|
|
239
|
+
"version": version,
|
|
240
|
+
"source": ",".join(packages),
|
|
241
|
+
"entries": _discover(packages),
|
|
242
|
+
}
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
api_parity_py/__init__.py,sha256=KlBe5r7WDDKsGENk_3f6XpDCA4tFJGzCvshMbfya7DA,192
|
|
2
|
+
api_parity_py/__main__.py,sha256=7tlHU_KDHK8oDjUFPVU9138PpnA-RhZIHCgCIIR598g,4371
|
|
3
|
+
api_parity_py/parity.py,sha256=GAIE4jBDov2bgbn8djPaw5FBKQlpbH-TaQQChUerNJ8,7331
|
|
4
|
+
api_parity_py/walk.py,sha256=eGOLLKA27svr4CmsWWww6wPIc0Jv6QHCWeZTC7j6vls,9299
|
|
5
|
+
api_parity_py-0.0.2.dist-info/METADATA,sha256=ZcbCkFdK2jVYNjWMUIGC9XacDwvibqUggQseYfQZJk0,158
|
|
6
|
+
api_parity_py-0.0.2.dist-info/WHEEL,sha256=QccIxa26bgl1E6uMy58deGWi-0aeIkkangHcxk2kWfw,87
|
|
7
|
+
api_parity_py-0.0.2.dist-info/entry_points.txt,sha256=ZLa5eWK18jaDfYgBmfwoQNWAVqV0fzMdp6zv6b5j3nA,62
|
|
8
|
+
api_parity_py-0.0.2.dist-info/RECORD,,
|