python-infrakit-dev 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- infrakit/__init__.py +0 -0
- infrakit/cli/__init__.py +1 -0
- infrakit/cli/commands/__init__.py +1 -0
- infrakit/cli/commands/deps.py +530 -0
- infrakit/cli/commands/init.py +129 -0
- infrakit/cli/commands/llm.py +295 -0
- infrakit/cli/commands/logger.py +160 -0
- infrakit/cli/commands/module.py +342 -0
- infrakit/cli/commands/time.py +81 -0
- infrakit/cli/main.py +65 -0
- infrakit/core/__init__.py +0 -0
- infrakit/core/config/__init__.py +0 -0
- infrakit/core/config/converter.py +480 -0
- infrakit/core/config/exporter.py +304 -0
- infrakit/core/config/loader.py +713 -0
- infrakit/core/config/validator.py +389 -0
- infrakit/core/logger/__init__.py +21 -0
- infrakit/core/logger/formatters.py +143 -0
- infrakit/core/logger/handlers.py +322 -0
- infrakit/core/logger/retention.py +176 -0
- infrakit/core/logger/setup.py +314 -0
- infrakit/deps/__init__.py +239 -0
- infrakit/deps/clean.py +141 -0
- infrakit/deps/depfile.py +405 -0
- infrakit/deps/health.py +357 -0
- infrakit/deps/optimizer.py +642 -0
- infrakit/deps/scanner.py +550 -0
- infrakit/llm/__init__.py +35 -0
- infrakit/llm/batch.py +165 -0
- infrakit/llm/client.py +575 -0
- infrakit/llm/key_manager.py +728 -0
- infrakit/llm/llm_readme.md +306 -0
- infrakit/llm/models.py +148 -0
- infrakit/llm/providers/__init__.py +5 -0
- infrakit/llm/providers/base.py +112 -0
- infrakit/llm/providers/gemini.py +164 -0
- infrakit/llm/providers/openai.py +168 -0
- infrakit/llm/rate_limiter.py +54 -0
- infrakit/scaffolder/__init__.py +31 -0
- infrakit/scaffolder/ai.py +508 -0
- infrakit/scaffolder/backend.py +555 -0
- infrakit/scaffolder/cli_tool.py +386 -0
- infrakit/scaffolder/generator.py +338 -0
- infrakit/scaffolder/pipeline.py +562 -0
- infrakit/scaffolder/registry.py +121 -0
- infrakit/time/__init__.py +60 -0
- infrakit/time/profiler.py +511 -0
- python_infrakit_dev-0.1.0.dist-info/METADATA +124 -0
- python_infrakit_dev-0.1.0.dist-info/RECORD +51 -0
- python_infrakit_dev-0.1.0.dist-info/WHEEL +4 -0
- python_infrakit_dev-0.1.0.dist-info/entry_points.txt +3 -0
infrakit/deps/scanner.py
ADDED
|
@@ -0,0 +1,550 @@
|
|
|
1
|
+
"""
|
|
2
|
+
infrakit.deps._scanner
|
|
3
|
+
~~~~~~~~~~~~~~~~~~~~~~
|
|
4
|
+
AST-based scanner that walks Python (and optionally Jupyter) files,
|
|
5
|
+
extracts imports, and checks whether the imported names are actually
|
|
6
|
+
referenced in the code body (conservative unused-import detection).
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import ast
|
|
12
|
+
import json
|
|
13
|
+
import tokenize
|
|
14
|
+
import io
|
|
15
|
+
from dataclasses import dataclass, field
|
|
16
|
+
from pathlib import Path
|
|
17
|
+
from typing import Iterator, Optional
|
|
18
|
+
|
|
19
|
+
# ---------------------------------------------------------------------------
|
|
20
|
+
# Import ↔ pip-package name mapping
|
|
21
|
+
# Many packages are installed under a different name than their import name.
|
|
22
|
+
# ---------------------------------------------------------------------------
|
|
23
|
+
IMPORT_TO_PIP: dict[str, str] = {
|
|
24
|
+
# Computer vision / image
|
|
25
|
+
"cv2": "opencv-python",
|
|
26
|
+
"PIL": "Pillow",
|
|
27
|
+
"skimage": "scikit-image",
|
|
28
|
+
"imageio": "imageio",
|
|
29
|
+
# ML / data science
|
|
30
|
+
"sklearn": "scikit-learn",
|
|
31
|
+
"xgb": "xgboost",
|
|
32
|
+
"lightgbm": "lightgbm",
|
|
33
|
+
"catboost": "catboost",
|
|
34
|
+
"torch": "torch",
|
|
35
|
+
"torchvision": "torchvision",
|
|
36
|
+
"torchaudio": "torchaudio",
|
|
37
|
+
"tensorflow": "tensorflow",
|
|
38
|
+
"tf": "tensorflow",
|
|
39
|
+
"keras": "keras",
|
|
40
|
+
"transformers": "transformers",
|
|
41
|
+
"diffusers": "diffusers",
|
|
42
|
+
"datasets": "datasets",
|
|
43
|
+
"accelerate": "accelerate",
|
|
44
|
+
"peft": "peft",
|
|
45
|
+
"sentence_transformers": "sentence-transformers",
|
|
46
|
+
"faiss": "faiss-cpu",
|
|
47
|
+
# Data
|
|
48
|
+
"pd": "pandas",
|
|
49
|
+
"pandas": "pandas",
|
|
50
|
+
"np": "numpy",
|
|
51
|
+
"numpy": "numpy",
|
|
52
|
+
"scipy": "scipy",
|
|
53
|
+
"statsmodels": "statsmodels",
|
|
54
|
+
"polars": "polars",
|
|
55
|
+
"pyarrow": "pyarrow",
|
|
56
|
+
"openpyxl": "openpyxl",
|
|
57
|
+
"xlrd": "xlrd",
|
|
58
|
+
"xlwt": "xlwt",
|
|
59
|
+
"xlsxwriter": "XlsxWriter",
|
|
60
|
+
# Visualisation
|
|
61
|
+
"matplotlib": "matplotlib",
|
|
62
|
+
"mpl": "matplotlib",
|
|
63
|
+
"plt": "matplotlib",
|
|
64
|
+
"seaborn": "seaborn",
|
|
65
|
+
"plotly": "plotly",
|
|
66
|
+
"bokeh": "bokeh",
|
|
67
|
+
"altair": "altair",
|
|
68
|
+
"dash": "dash",
|
|
69
|
+
"streamlit": "streamlit",
|
|
70
|
+
"gradio": "gradio",
|
|
71
|
+
# Web / API
|
|
72
|
+
"flask": "Flask",
|
|
73
|
+
"fastapi": "fastapi",
|
|
74
|
+
"uvicorn": "uvicorn",
|
|
75
|
+
"starlette": "starlette",
|
|
76
|
+
"django": "Django",
|
|
77
|
+
"aiohttp": "aiohttp",
|
|
78
|
+
"httpx": "httpx",
|
|
79
|
+
"requests": "requests",
|
|
80
|
+
"urllib3": "urllib3",
|
|
81
|
+
"bs4": "beautifulsoup4",
|
|
82
|
+
"lxml": "lxml",
|
|
83
|
+
"scrapy": "Scrapy",
|
|
84
|
+
"pydantic": "pydantic",
|
|
85
|
+
# Database / storage
|
|
86
|
+
"sqlalchemy": "SQLAlchemy",
|
|
87
|
+
"pymongo": "pymongo",
|
|
88
|
+
"motor": "motor",
|
|
89
|
+
"redis": "redis",
|
|
90
|
+
"psycopg2": "psycopg2-binary",
|
|
91
|
+
"psycopg": "psycopg",
|
|
92
|
+
"pymysql": "PyMySQL",
|
|
93
|
+
"cx_Oracle": "cx_Oracle",
|
|
94
|
+
"boto3": "boto3",
|
|
95
|
+
"botocore": "botocore",
|
|
96
|
+
"google.cloud": "google-cloud",
|
|
97
|
+
# CLI / config
|
|
98
|
+
"click": "click",
|
|
99
|
+
"typer": "typer",
|
|
100
|
+
"rich": "rich",
|
|
101
|
+
"colorama": "colorama",
|
|
102
|
+
"tqdm": "tqdm",
|
|
103
|
+
"dotenv": "python-dotenv",
|
|
104
|
+
"yaml": "PyYAML",
|
|
105
|
+
"toml": "toml",
|
|
106
|
+
"tomllib": "tomli", # stdlib in 3.11+, else tomli
|
|
107
|
+
"decouple": "python-decouple",
|
|
108
|
+
"environs": "environs",
|
|
109
|
+
# Async
|
|
110
|
+
"anyio": "anyio",
|
|
111
|
+
"trio": "trio",
|
|
112
|
+
"asyncio": "asyncio", # stdlib – will be filtered anyway
|
|
113
|
+
"celery": "celery",
|
|
114
|
+
"kombu": "kombu",
|
|
115
|
+
# Testing
|
|
116
|
+
"pytest": "pytest",
|
|
117
|
+
"hypothesis": "hypothesis",
|
|
118
|
+
"factory_boy": "factory_boy",
|
|
119
|
+
"faker": "Faker",
|
|
120
|
+
"mock": "mock", # stdlib in 3.3+
|
|
121
|
+
"responses": "responses",
|
|
122
|
+
"httpretty": "httpretty",
|
|
123
|
+
# Serialisation
|
|
124
|
+
"msgpack": "msgpack",
|
|
125
|
+
"orjson": "orjson",
|
|
126
|
+
"ujson": "ujson",
|
|
127
|
+
"simplejson": "simplejson",
|
|
128
|
+
"cbor2": "cbor2",
|
|
129
|
+
"avro": "avro-python3",
|
|
130
|
+
# Misc utilities
|
|
131
|
+
"dateutil": "python-dateutil",
|
|
132
|
+
"arrow": "arrow",
|
|
133
|
+
"pendulum": "pendulum",
|
|
134
|
+
"pytz": "pytz",
|
|
135
|
+
"tzlocal": "tzlocal",
|
|
136
|
+
"cryptography": "cryptography",
|
|
137
|
+
"jwt": "PyJWT",
|
|
138
|
+
"paramiko": "paramiko",
|
|
139
|
+
"fabric": "fabric",
|
|
140
|
+
"invoke": "invoke",
|
|
141
|
+
"sh": "sh",
|
|
142
|
+
"psutil": "psutil",
|
|
143
|
+
"loguru": "loguru",
|
|
144
|
+
"structlog": "structlog",
|
|
145
|
+
"attr": "attrs",
|
|
146
|
+
"attrs": "attrs",
|
|
147
|
+
"cattrs": "cattrs",
|
|
148
|
+
"dacite": "dacite",
|
|
149
|
+
"marshmallow": "marshmallow",
|
|
150
|
+
"cerberus": "Cerberus",
|
|
151
|
+
"voluptuous": "voluptuous",
|
|
152
|
+
"jsonschema": "jsonschema",
|
|
153
|
+
"packaging": "packaging",
|
|
154
|
+
"semver": "semver",
|
|
155
|
+
"tabulate": "tabulate",
|
|
156
|
+
"prettytable": "prettytable",
|
|
157
|
+
"jinja2": "Jinja2",
|
|
158
|
+
"mako": "Mako",
|
|
159
|
+
"Mako": "Mako",
|
|
160
|
+
"markupsafe": "MarkupSafe",
|
|
161
|
+
"cachetools": "cachetools",
|
|
162
|
+
"diskcache": "diskcache",
|
|
163
|
+
"joblib": "joblib",
|
|
164
|
+
"dill": "dill",
|
|
165
|
+
"cloudpickle": "cloudpickle",
|
|
166
|
+
"more_itertools": "more-itertools",
|
|
167
|
+
"toolz": "toolz",
|
|
168
|
+
"cytoolz": "cytoolz",
|
|
169
|
+
"sortedcontainers": "sortedcontainers",
|
|
170
|
+
"intervaltree": "intervaltree",
|
|
171
|
+
"networkx": "networkx",
|
|
172
|
+
"nx": "networkx",
|
|
173
|
+
"igraph": "python-igraph",
|
|
174
|
+
"shapely": "Shapely",
|
|
175
|
+
"geopandas": "geopandas",
|
|
176
|
+
"fiona": "Fiona",
|
|
177
|
+
"pyproj": "pyproj",
|
|
178
|
+
"rasterio": "rasterio",
|
|
179
|
+
"sympy": "sympy",
|
|
180
|
+
"numba": "numba",
|
|
181
|
+
"numexpr": "numexpr",
|
|
182
|
+
"cython": "Cython",
|
|
183
|
+
"cffi": "cffi",
|
|
184
|
+
"ctypes": "ctypes", # stdlib
|
|
185
|
+
"pybind11": "pybind11",
|
|
186
|
+
"swig": "swig",
|
|
187
|
+
"nox": "nox",
|
|
188
|
+
"tox": "tox",
|
|
189
|
+
"pre_commit": "pre-commit",
|
|
190
|
+
"black": "black",
|
|
191
|
+
"isort": "isort",
|
|
192
|
+
"flake8": "flake8",
|
|
193
|
+
"pylint": "pylint",
|
|
194
|
+
"mypy": "mypy",
|
|
195
|
+
"pyright": "pyright",
|
|
196
|
+
"bandit": "bandit",
|
|
197
|
+
"safety": "safety",
|
|
198
|
+
"pip_audit": "pip-audit",
|
|
199
|
+
"twine": "twine",
|
|
200
|
+
"build": "build",
|
|
201
|
+
"setuptools": "setuptools",
|
|
202
|
+
"wheel": "wheel",
|
|
203
|
+
"flit": "flit",
|
|
204
|
+
"hatchling": "hatchling",
|
|
205
|
+
"poetry": "poetry",
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
# Python stdlib top-level module names (3.8-3.12 superset)
|
|
209
|
+
_STDLIB_MODULES: frozenset[str] = frozenset({
|
|
210
|
+
"__future__", "_thread", "abc", "aifc", "argparse", "array", "ast",
|
|
211
|
+
"asynchat", "asyncio", "asyncore", "atexit", "audioop", "base64",
|
|
212
|
+
"bdb", "binascii", "binhex", "bisect", "builtins", "bz2", "calendar",
|
|
213
|
+
"cgi", "cgitb", "chunk", "cmath", "cmd", "code", "codecs", "codeop",
|
|
214
|
+
"collections", "colorsys", "compileall", "concurrent", "configparser",
|
|
215
|
+
"contextlib", "contextvars", "copy", "copyreg", "cProfile", "csv",
|
|
216
|
+
"ctypes", "curses", "dataclasses", "datetime", "dbm", "decimal",
|
|
217
|
+
"difflib", "dis", "distutils", "doctest", "email", "encodings",
|
|
218
|
+
"enum", "errno", "faulthandler", "fcntl", "filecmp", "fileinput",
|
|
219
|
+
"fnmatch", "fractions", "ftplib", "functools", "gc", "getopt",
|
|
220
|
+
"getpass", "gettext", "glob", "grp", "gzip", "hashlib", "heapq",
|
|
221
|
+
"hmac", "html", "http", "idlelib", "imaplib", "imghdr", "imp",
|
|
222
|
+
"importlib", "inspect", "io", "ipaddress", "itertools", "json",
|
|
223
|
+
"keyword", "lib2to3", "linecache", "locale", "logging", "lzma",
|
|
224
|
+
"mailbox", "mailcap", "marshal", "math", "mimetypes", "mmap",
|
|
225
|
+
"modulefinder", "multiprocessing", "netrc", "nis", "nntplib",
|
|
226
|
+
"numbers", "operator", "optparse", "os", "ossaudiodev", "pathlib",
|
|
227
|
+
"pdb", "pickle", "pickletools", "pipes", "pkgutil", "platform",
|
|
228
|
+
"plistlib", "poplib", "posix", "posixpath", "pprint", "profile",
|
|
229
|
+
"pstats", "pty", "pwd", "py_compile", "pyclbr", "pydoc", "queue",
|
|
230
|
+
"quopri", "random", "re", "readline", "reprlib", "resource", "rlcompleter",
|
|
231
|
+
"runpy", "sched", "secrets", "select", "selectors", "shelve", "shlex",
|
|
232
|
+
"shutil", "signal", "site", "smtpd", "smtplib", "sndhdr", "socket",
|
|
233
|
+
"socketserver", "spwd", "sqlite3", "sre_compile", "sre_constants",
|
|
234
|
+
"sre_parse", "ssl", "stat", "statistics", "string", "stringprep",
|
|
235
|
+
"struct", "subprocess", "sunau", "symtable", "sys", "sysconfig",
|
|
236
|
+
"syslog", "tabnanny", "tarfile", "telnetlib", "tempfile", "termios",
|
|
237
|
+
"test", "textwrap", "threading", "time", "timeit", "tkinter", "token",
|
|
238
|
+
"tokenize", "tomllib", "trace", "traceback", "tracemalloc", "tty",
|
|
239
|
+
"turtle", "turtledemo", "types", "typing", "unicodedata", "unittest",
|
|
240
|
+
"urllib", "uu", "uuid", "venv", "warnings", "wave", "weakref",
|
|
241
|
+
"webbrowser", "winreg", "winsound", "wsgiref", "xdrlib", "xml",
|
|
242
|
+
"xmlrpc", "zipapp", "zipfile", "zipimport", "zlib", "zoneinfo",
|
|
243
|
+
# typing extensions that are sometimes separate packages
|
|
244
|
+
"typing_extensions",
|
|
245
|
+
# common internal/local prefixes to skip
|
|
246
|
+
"_collections_abc", "_weakrefset",
|
|
247
|
+
})
|
|
248
|
+
|
|
249
|
+
|
|
250
|
+
def is_stdlib(module_root: str) -> bool:
|
|
251
|
+
"""Return True if the top-level module name is part of stdlib."""
|
|
252
|
+
return module_root in _STDLIB_MODULES
|
|
253
|
+
|
|
254
|
+
|
|
255
|
+
def import_root(module_name: str) -> str:
|
|
256
|
+
"""Return the top-level package name, e.g. 'google.cloud.storage' → 'google'."""
|
|
257
|
+
return module_name.split(".")[0]
|
|
258
|
+
|
|
259
|
+
|
|
260
|
+
# ---------------------------------------------------------------------------
|
|
261
|
+
# Data structures
|
|
262
|
+
# ---------------------------------------------------------------------------
|
|
263
|
+
|
|
264
|
+
@dataclass
|
|
265
|
+
class ImportRecord:
|
|
266
|
+
"""One import statement found in a file."""
|
|
267
|
+
module: str # full module path, e.g. 'os.path' or 'numpy'
|
|
268
|
+
alias: str # local name used in code (None → same as last segment)
|
|
269
|
+
names: list[str] # for 'from x import a, b' → ['a', 'b']; else []
|
|
270
|
+
lineno: int
|
|
271
|
+
is_from: bool # True for 'from x import y'
|
|
272
|
+
file: Path
|
|
273
|
+
is_relative: bool = False
|
|
274
|
+
|
|
275
|
+
@property
|
|
276
|
+
def root(self) -> str:
|
|
277
|
+
return import_root(self.module)
|
|
278
|
+
|
|
279
|
+
@property
|
|
280
|
+
def pip_name(self) -> str:
|
|
281
|
+
"""Resolve to pip package name using the mapping table."""
|
|
282
|
+
# Try full module first, then root
|
|
283
|
+
return (
|
|
284
|
+
IMPORT_TO_PIP.get(self.module)
|
|
285
|
+
or IMPORT_TO_PIP.get(self.root)
|
|
286
|
+
or self.root.replace("_", "-")
|
|
287
|
+
)
|
|
288
|
+
|
|
289
|
+
@property
|
|
290
|
+
def local_names(self) -> list[str]:
|
|
291
|
+
"""Names that will be used in code to reference this import."""
|
|
292
|
+
if self.names:
|
|
293
|
+
return self.names
|
|
294
|
+
return [self.alias or self.module.split(".")[-1]]
|
|
295
|
+
|
|
296
|
+
|
|
297
|
+
@dataclass
|
|
298
|
+
class FileAnalysis:
|
|
299
|
+
path: Path
|
|
300
|
+
imports: list[ImportRecord] = field(default_factory=list)
|
|
301
|
+
# names used in the code body (identifiers, attributes)
|
|
302
|
+
used_names: set[str] = field(default_factory=set)
|
|
303
|
+
parse_error: Optional[str] = None
|
|
304
|
+
|
|
305
|
+
|
|
306
|
+
@dataclass
|
|
307
|
+
class ScanResult:
|
|
308
|
+
files: list[FileAnalysis] = field(default_factory=list)
|
|
309
|
+
# pip package name → set of files that use it
|
|
310
|
+
used_packages: dict[str, set[Path]] = field(default_factory=dict)
|
|
311
|
+
# pip package name → set of files that import but never reference it
|
|
312
|
+
possibly_unused: dict[str, set[Path]] = field(default_factory=dict)
|
|
313
|
+
# import names that couldn't be resolved to a known pip package
|
|
314
|
+
unknown_imports: dict[str, set[Path]] = field(default_factory=dict)
|
|
315
|
+
errors: list[tuple[Path, str]] = field(default_factory=list)
|
|
316
|
+
|
|
317
|
+
|
|
318
|
+
# ---------------------------------------------------------------------------
|
|
319
|
+
# AST helpers
|
|
320
|
+
# ---------------------------------------------------------------------------
|
|
321
|
+
|
|
322
|
+
def _collect_used_names(tree: ast.AST) -> set[str]:
|
|
323
|
+
"""
|
|
324
|
+
Walk the AST and collect every Name and Attribute identifier used
|
|
325
|
+
in the code body (excluding Import/ImportFrom nodes themselves).
|
|
326
|
+
"""
|
|
327
|
+
used: set[str] = set()
|
|
328
|
+
|
|
329
|
+
class _Visitor(ast.NodeVisitor):
|
|
330
|
+
def visit_Import(self, node):
|
|
331
|
+
pass # skip — don't treat import names as "used"
|
|
332
|
+
|
|
333
|
+
def visit_ImportFrom(self, node):
|
|
334
|
+
pass # skip
|
|
335
|
+
|
|
336
|
+
def visit_Name(self, node):
|
|
337
|
+
used.add(node.id)
|
|
338
|
+
|
|
339
|
+
def visit_Attribute(self, node):
|
|
340
|
+
# collect the root name for dotted access like np.array
|
|
341
|
+
if isinstance(node.value, ast.Name):
|
|
342
|
+
used.add(node.value.id)
|
|
343
|
+
self.generic_visit(node)
|
|
344
|
+
|
|
345
|
+
_Visitor().visit(tree)
|
|
346
|
+
return used
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def _extract_imports(tree: ast.AST, filepath: Path) -> list[ImportRecord]:
|
|
350
|
+
records: list[ImportRecord] = []
|
|
351
|
+
|
|
352
|
+
for node in ast.walk(tree):
|
|
353
|
+
if isinstance(node, ast.Import):
|
|
354
|
+
for alias in node.names:
|
|
355
|
+
records.append(ImportRecord(
|
|
356
|
+
module=alias.name,
|
|
357
|
+
alias=alias.asname or alias.name.split(".")[0],
|
|
358
|
+
names=[],
|
|
359
|
+
lineno=node.lineno,
|
|
360
|
+
is_from=False,
|
|
361
|
+
file=filepath,
|
|
362
|
+
is_relative=False,
|
|
363
|
+
))
|
|
364
|
+
elif isinstance(node, ast.ImportFrom):
|
|
365
|
+
module = node.module or ""
|
|
366
|
+
aliases = node.names
|
|
367
|
+
|
|
368
|
+
is_relative = node.level>0
|
|
369
|
+
# from x import * → treat module itself as the dep
|
|
370
|
+
if any(a.name == "*" for a in aliases):
|
|
371
|
+
records.append(ImportRecord(
|
|
372
|
+
module=module,
|
|
373
|
+
alias=module.split(".")[0],
|
|
374
|
+
names=["*"],
|
|
375
|
+
lineno=node.lineno,
|
|
376
|
+
is_from=True,
|
|
377
|
+
file=filepath,
|
|
378
|
+
is_relative=is_relative,
|
|
379
|
+
))
|
|
380
|
+
else:
|
|
381
|
+
imported_names = [a.asname or a.name for a in aliases]
|
|
382
|
+
records.append(ImportRecord(
|
|
383
|
+
module=module,
|
|
384
|
+
alias=module.split(".")[0],
|
|
385
|
+
names=imported_names,
|
|
386
|
+
lineno=node.lineno,
|
|
387
|
+
is_from=True,
|
|
388
|
+
file=filepath,
|
|
389
|
+
is_relative=is_relative,
|
|
390
|
+
))
|
|
391
|
+
|
|
392
|
+
return records
|
|
393
|
+
|
|
394
|
+
|
|
395
|
+
# ---------------------------------------------------------------------------
|
|
396
|
+
# Notebook support
|
|
397
|
+
# ---------------------------------------------------------------------------
|
|
398
|
+
|
|
399
|
+
def _extract_notebook_source(path: Path) -> str:
|
|
400
|
+
"""Extract all code cells from a .ipynb file as a single Python string."""
|
|
401
|
+
try:
|
|
402
|
+
nb = json.loads(path.read_text(encoding="utf-8"))
|
|
403
|
+
except Exception as exc:
|
|
404
|
+
raise ValueError(f"Cannot parse notebook JSON: {exc}") from exc
|
|
405
|
+
|
|
406
|
+
lines: list[str] = []
|
|
407
|
+
for cell in nb.get("cells", []):
|
|
408
|
+
if cell.get("cell_type") == "code":
|
|
409
|
+
src = cell.get("source", [])
|
|
410
|
+
if isinstance(src, list):
|
|
411
|
+
src = "".join(src)
|
|
412
|
+
lines.append(src)
|
|
413
|
+
return "\n\n".join(lines)
|
|
414
|
+
|
|
415
|
+
|
|
416
|
+
# ---------------------------------------------------------------------------
|
|
417
|
+
# Per-file analysis
|
|
418
|
+
# ---------------------------------------------------------------------------
|
|
419
|
+
|
|
420
|
+
def analyse_file(path: Path) -> FileAnalysis:
|
|
421
|
+
analysis = FileAnalysis(path=path)
|
|
422
|
+
|
|
423
|
+
try:
|
|
424
|
+
if path.suffix == ".ipynb":
|
|
425
|
+
source = _extract_notebook_source(path)
|
|
426
|
+
else:
|
|
427
|
+
source = path.read_text(encoding="utf-8", errors="replace")
|
|
428
|
+
|
|
429
|
+
tree = ast.parse(source, filename=str(path))
|
|
430
|
+
analysis.imports = _extract_imports(tree, path)
|
|
431
|
+
analysis.used_names = _collect_used_names(tree)
|
|
432
|
+
|
|
433
|
+
except SyntaxError as exc:
|
|
434
|
+
analysis.parse_error = f"SyntaxError: {exc}"
|
|
435
|
+
except Exception as exc:
|
|
436
|
+
analysis.parse_error = str(exc)
|
|
437
|
+
|
|
438
|
+
return analysis
|
|
439
|
+
|
|
440
|
+
|
|
441
|
+
# ---------------------------------------------------------------------------
|
|
442
|
+
# Scanner (walks a project directory)
|
|
443
|
+
# ---------------------------------------------------------------------------
|
|
444
|
+
|
|
445
|
+
def _should_include(path: Path, include_notebooks: bool) -> bool:
|
|
446
|
+
if path.suffix == ".py":
|
|
447
|
+
return True
|
|
448
|
+
if include_notebooks and path.suffix == ".ipynb":
|
|
449
|
+
return True
|
|
450
|
+
return False
|
|
451
|
+
|
|
452
|
+
|
|
453
|
+
def scan_project(
|
|
454
|
+
root: Path,
|
|
455
|
+
include_notebooks: bool = False,
|
|
456
|
+
gitignore_filter=None, # callable(Path) → bool (True = ignored)
|
|
457
|
+
) -> ScanResult:
|
|
458
|
+
"""
|
|
459
|
+
Walk *root*, analyse every eligible file, return a ScanResult.
|
|
460
|
+
|
|
461
|
+
``gitignore_filter`` should be a callable that returns True when a
|
|
462
|
+
Path should be *excluded*. When None, no gitignore filtering is applied.
|
|
463
|
+
"""
|
|
464
|
+
result = ScanResult()
|
|
465
|
+
|
|
466
|
+
files = _walk_files(root, include_notebooks, gitignore_filter)
|
|
467
|
+
|
|
468
|
+
for filepath in files:
|
|
469
|
+
analysis = analyse_file(filepath)
|
|
470
|
+
result.files.append(analysis)
|
|
471
|
+
|
|
472
|
+
if analysis.parse_error:
|
|
473
|
+
result.errors.append((filepath, analysis.parse_error))
|
|
474
|
+
continue
|
|
475
|
+
|
|
476
|
+
for imp in analysis.imports:
|
|
477
|
+
if is_stdlib(imp.root):
|
|
478
|
+
continue # stdlib — skip entirely
|
|
479
|
+
|
|
480
|
+
if getattr(imp, "is_relative", False):
|
|
481
|
+
continue
|
|
482
|
+
|
|
483
|
+
pip = imp.pip_name
|
|
484
|
+
unknown = pip == imp.root.replace("_", "-") and imp.root not in IMPORT_TO_PIP
|
|
485
|
+
|
|
486
|
+
# Determine if the import is actually referenced in code
|
|
487
|
+
local_names = imp.local_names
|
|
488
|
+
is_star = "*" in local_names
|
|
489
|
+
|
|
490
|
+
if is_star:
|
|
491
|
+
# Can't tell — assume used
|
|
492
|
+
actually_used = True
|
|
493
|
+
else:
|
|
494
|
+
actually_used = any(
|
|
495
|
+
name in analysis.used_names for name in local_names
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
if actually_used:
|
|
499
|
+
result.used_packages.setdefault(pip, set()).add(filepath)
|
|
500
|
+
else:
|
|
501
|
+
result.possibly_unused.setdefault(pip, set()).add(filepath)
|
|
502
|
+
|
|
503
|
+
if unknown and not is_stdlib(imp.root):
|
|
504
|
+
result.unknown_imports.setdefault(pip, set()).add(filepath)
|
|
505
|
+
|
|
506
|
+
# A package is "used" if it appears as used in ANY file.
|
|
507
|
+
# Remove from possibly_unused those that are confirmed used elsewhere.
|
|
508
|
+
for pkg in list(result.possibly_unused.keys()):
|
|
509
|
+
if pkg in result.used_packages:
|
|
510
|
+
del result.possibly_unused[pkg]
|
|
511
|
+
|
|
512
|
+
return result
|
|
513
|
+
|
|
514
|
+
|
|
515
|
+
def _walk_files(
|
|
516
|
+
root: Path,
|
|
517
|
+
include_notebooks: bool,
|
|
518
|
+
gitignore_filter,
|
|
519
|
+
) -> Iterator[Path]:
|
|
520
|
+
for path in sorted(root.rglob("*")):
|
|
521
|
+
if not path.is_file():
|
|
522
|
+
continue
|
|
523
|
+
if not _should_include(path, include_notebooks):
|
|
524
|
+
continue
|
|
525
|
+
|
|
526
|
+
# Use forward-slash relative path for cross-platform consistency
|
|
527
|
+
try:
|
|
528
|
+
rel = path.relative_to(root)
|
|
529
|
+
except ValueError:
|
|
530
|
+
continue
|
|
531
|
+
|
|
532
|
+
parts = rel.parts # tuple of path components, no slashes
|
|
533
|
+
|
|
534
|
+
# Skip hidden dirs/files and common noise dirs
|
|
535
|
+
if any(p.startswith(".") for p in parts):
|
|
536
|
+
continue
|
|
537
|
+
if any(p in ("__pycache__", ".venv", "venv", "env",
|
|
538
|
+
"node_modules", "dist", "build", ".tox", ".nox")
|
|
539
|
+
for p in parts):
|
|
540
|
+
continue
|
|
541
|
+
|
|
542
|
+
# Gitignore filter: pass both the full relative path and each
|
|
543
|
+
# directory component so patterns like "ignored/" match correctly
|
|
544
|
+
if gitignore_filter:
|
|
545
|
+
# Check the file itself and every ancestor directory component
|
|
546
|
+
rel_posix = rel.as_posix()
|
|
547
|
+
if gitignore_filter(rel_posix, parts):
|
|
548
|
+
continue
|
|
549
|
+
|
|
550
|
+
yield path
|
infrakit/llm/__init__.py
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
"""
|
|
2
|
+
infrakit.llm
|
|
3
|
+
------------
|
|
4
|
+
Unified async/sync LLM client with key rotation, quota tracking,
|
|
5
|
+
rate limiting, and batch processing.
|
|
6
|
+
|
|
7
|
+
Public API::
|
|
8
|
+
|
|
9
|
+
from infrakit.llm import LLMClient, Prompt, QuotaConfig
|
|
10
|
+
from infrakit.llm import LLMResponse, BatchResult
|
|
11
|
+
from infrakit.llm.providers import OpenAIProvider, GeminiProvider
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
from .client import LLMClient
|
|
15
|
+
from .models import (
|
|
16
|
+
BatchResult,
|
|
17
|
+
LLMResponse,
|
|
18
|
+
Prompt,
|
|
19
|
+
Provider,
|
|
20
|
+
QuotaConfig,
|
|
21
|
+
RequestMeta,
|
|
22
|
+
)
|
|
23
|
+
from .providers import GeminiProvider, OpenAIProvider
|
|
24
|
+
|
|
25
|
+
__all__ = [
|
|
26
|
+
"LLMClient",
|
|
27
|
+
"Prompt",
|
|
28
|
+
"QuotaConfig",
|
|
29
|
+
"LLMResponse",
|
|
30
|
+
"BatchResult",
|
|
31
|
+
"RequestMeta",
|
|
32
|
+
"Provider",
|
|
33
|
+
"OpenAIProvider",
|
|
34
|
+
"GeminiProvider",
|
|
35
|
+
]
|