fc-data 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- datasmith/__init__.py +330 -0
- datasmith/__init__.pyi +194 -0
- datasmith/agents/__init__.py +31 -0
- datasmith/agents/classifiers.py +272 -0
- datasmith/agents/codex.py +25 -0
- datasmith/agents/config.py +108 -0
- datasmith/agents/extractors.py +197 -0
- datasmith/agents/installed/README.md +52 -0
- datasmith/agents/installed/__init__.py +22 -0
- datasmith/agents/installed/base.py +240 -0
- datasmith/agents/installed/claude.py +134 -0
- datasmith/agents/installed/codex.py +91 -0
- datasmith/agents/installed/gemini.py +118 -0
- datasmith/agents/installed/none.py +27 -0
- datasmith/agents/sandbox.py +547 -0
- datasmith/agents/synthesizer.py +439 -0
- datasmith/agents/templates/AGENTS.md.j2 +150 -0
- datasmith/agents/templates/sandbox_verify.py +428 -0
- datasmith/docker/__init__.py +31 -0
- datasmith/docker/context.py +112 -0
- datasmith/docker/images.py +158 -0
- datasmith/docker/publish.py +56 -0
- datasmith/docker/templates/Dockerfile.base +26 -0
- datasmith/docker/templates/Dockerfile.pr +42 -0
- datasmith/docker/templates/Dockerfile.repo +11 -0
- datasmith/docker/templates/docker_build_base.sh +780 -0
- datasmith/docker/templates/docker_build_env.sh +309 -0
- datasmith/docker/templates/docker_build_final.sh +106 -0
- datasmith/docker/templates/docker_build_pkg.sh +99 -0
- datasmith/docker/templates/docker_build_run.sh +124 -0
- datasmith/docker/templates/entrypoint.sh +62 -0
- datasmith/docker/templates/parser.py +1405 -0
- datasmith/docker/templates/profile.sh +199 -0
- datasmith/docker/templates/pytest_runner.py +692 -0
- datasmith/docker/templates/run-tests.sh +197 -0
- datasmith/docker/verifiers.py +131 -0
- datasmith/filters.py +154 -0
- datasmith/github/__init__.py +22 -0
- datasmith/github/client.py +333 -0
- datasmith/github/hooks.py +50 -0
- datasmith/github/links.py +110 -0
- datasmith/github/models.py +206 -0
- datasmith/github/render.py +173 -0
- datasmith/github/search.py +66 -0
- datasmith/github/templates/comment.md.j2 +5 -0
- datasmith/github/templates/final.md.j2 +66 -0
- datasmith/github/templates/issues.md.j2 +21 -0
- datasmith/github/templates/repo.md.j2 +1 -0
- datasmith/preflight.py +162 -0
- datasmith/publish/__init__.py +13 -0
- datasmith/publish/huggingface.py +104 -0
- datasmith/publish/pipeline.py +60 -0
- datasmith/publish/records.py +91 -0
- datasmith/py.typed +1 -0
- datasmith/resolution/__init__.py +14 -0
- datasmith/resolution/blocklist.py +145 -0
- datasmith/resolution/cache.py +120 -0
- datasmith/resolution/constants.py +277 -0
- datasmith/resolution/dependency_resolver.py +174 -0
- datasmith/resolution/git_utils.py +378 -0
- datasmith/resolution/import_analyzer.py +66 -0
- datasmith/resolution/metadata_parser.py +412 -0
- datasmith/resolution/models.py +41 -0
- datasmith/resolution/orchestrator.py +522 -0
- datasmith/resolution/package_filters.py +312 -0
- datasmith/resolution/python_manager.py +110 -0
- datasmith/runners/__init__.py +15 -0
- datasmith/runners/base.py +112 -0
- datasmith/runners/classify_prs.py +48 -0
- datasmith/runners/render_problems.py +113 -0
- datasmith/runners/resolve_packages.py +66 -0
- datasmith/runners/scrape_commits.py +166 -0
- datasmith/runners/scrape_repos.py +44 -0
- datasmith/runners/synthesize_images.py +310 -0
- datasmith/update/__init__.py +5 -0
- datasmith/update/cli.py +169 -0
- datasmith/update/offline.py +173 -0
- datasmith/update/pipeline.py +497 -0
- datasmith/utils/__init__.py +18 -0
- datasmith/utils/core.py +67 -0
- datasmith/utils/db.py +156 -0
- datasmith/utils/tokens.py +65 -0
- fc_data-0.2.0.dist-info/METADATA +441 -0
- fc_data-0.2.0.dist-info/RECORD +87 -0
- fc_data-0.2.0.dist-info/WHEEL +4 -0
- fc_data-0.2.0.dist-info/entry_points.txt +2 -0
- fc_data-0.2.0.dist-info/licenses/LICENSE +28 -0
|
@@ -0,0 +1,91 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import io
|
|
4
|
+
from typing import Any
|
|
5
|
+
|
|
6
|
+
import pyarrow as pa # type: ignore[import-untyped]
|
|
7
|
+
import pyarrow.parquet as pq # type: ignore[import-untyped]
|
|
8
|
+
|
|
9
|
+
from datasmith.github.models import FormulaCodeRecord
|
|
10
|
+
from datasmith.utils import get_logger
|
|
11
|
+
from datasmith.utils.db import fetch_all
|
|
12
|
+
|
|
13
|
+
logger = get_logger("publish.records")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def records_to_parquet(records: list[FormulaCodeRecord]) -> bytes:
|
|
17
|
+
"""Serialize FormulaCodeRecords to Parquet bytes via pyarrow."""
|
|
18
|
+
if not records:
|
|
19
|
+
return b""
|
|
20
|
+
|
|
21
|
+
rows = [r.model_dump(mode="json") for r in records]
|
|
22
|
+
|
|
23
|
+
# Build schema from first record
|
|
24
|
+
table = pa.Table.from_pylist(rows)
|
|
25
|
+
buf = io.BytesIO()
|
|
26
|
+
pq.write_table(table, buf)
|
|
27
|
+
return buf.getvalue()
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def records_from_parquet(data: bytes) -> list[FormulaCodeRecord]:
|
|
31
|
+
"""Deserialize Parquet bytes back to FormulaCodeRecords."""
|
|
32
|
+
if not data:
|
|
33
|
+
return []
|
|
34
|
+
buf = io.BytesIO(data)
|
|
35
|
+
table = pq.read_table(buf)
|
|
36
|
+
rows = table.to_pylist()
|
|
37
|
+
return [FormulaCodeRecord(**row) for row in rows]
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def records_from_supabase(
|
|
41
|
+
start_date: str | None = None,
|
|
42
|
+
end_date: str | None = None,
|
|
43
|
+
unpublished_only: bool = True,
|
|
44
|
+
) -> list[FormulaCodeRecord]:
|
|
45
|
+
"""Query Supabase for FormulaCodeRecords, optionally filtered by date and publish status."""
|
|
46
|
+
filters: dict[str, Any] = {"is_performance_commit": True}
|
|
47
|
+
is_null: list[str] = []
|
|
48
|
+
gte_filters: dict[str, Any] = {}
|
|
49
|
+
lte_filters: dict[str, Any] = {}
|
|
50
|
+
|
|
51
|
+
if unpublished_only:
|
|
52
|
+
is_null.append("published_at")
|
|
53
|
+
if start_date:
|
|
54
|
+
gte_filters["merged_at"] = start_date
|
|
55
|
+
if end_date:
|
|
56
|
+
lte_filters["merged_at"] = end_date
|
|
57
|
+
|
|
58
|
+
rows = fetch_all(
|
|
59
|
+
"pull_requests",
|
|
60
|
+
select="*",
|
|
61
|
+
filters=filters,
|
|
62
|
+
is_null=is_null or None,
|
|
63
|
+
gte_filters=gte_filters or None,
|
|
64
|
+
lte_filters=lte_filters or None,
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
records: list[FormulaCodeRecord] = []
|
|
68
|
+
for row in rows:
|
|
69
|
+
try:
|
|
70
|
+
records.append(
|
|
71
|
+
FormulaCodeRecord(
|
|
72
|
+
owner=row["owner"],
|
|
73
|
+
repo=row["repo"],
|
|
74
|
+
issue_number=row["issue_number"],
|
|
75
|
+
task_id=f"{row['owner']}__{row['repo']}-{row['issue_number']}",
|
|
76
|
+
gt_hash=row.get("merge_commit_sha", ""),
|
|
77
|
+
base_commit=row.get("base_sha", ""),
|
|
78
|
+
date=row.get("merged_at"),
|
|
79
|
+
instructions=row.get("rendered_problem", ""),
|
|
80
|
+
classification=row.get("classification", ""),
|
|
81
|
+
difficulty=row.get("difficulty", ""),
|
|
82
|
+
container_name=row.get("container_name", ""),
|
|
83
|
+
patch=row.get("patch", ""),
|
|
84
|
+
)
|
|
85
|
+
)
|
|
86
|
+
except Exception:
|
|
87
|
+
logger.warning(
|
|
88
|
+
"Failed to create record for %s/%s#%s", row.get("owner"), row.get("repo"), row.get("issue_number")
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
return records
|
datasmith/py.typed
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# This file marks the datasmith package as typed for mypy and other type checkers
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
"""Resolution package for analyzing commits and resolving dependencies."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
__all__ = ["analyze_commit"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def analyze_commit(sha: str, repo_name: str, bypass_cache: bool = False) -> dict[str, Any] | None:
|
|
11
|
+
"""Lazy wrapper around :func:`datasmith.resolution.orchestrator.analyze_commit`."""
|
|
12
|
+
from .orchestrator import analyze_commit as _analyze_commit
|
|
13
|
+
|
|
14
|
+
return _analyze_commit(sha=sha, repo_name=repo_name, bypass_cache=bypass_cache)
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
"""Dynamic blocklist for packages that don't exist on PyPI or can't be resolved."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import json
|
|
6
|
+
import re
|
|
7
|
+
import threading
|
|
8
|
+
|
|
9
|
+
from datasmith.utils import get_logger
|
|
10
|
+
|
|
11
|
+
from .constants import GIT_CACHE_DIR
|
|
12
|
+
|
|
13
|
+
logger = get_logger("resolution.blocklist")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def normalize_package_name(name: str) -> str:
|
|
17
|
+
"""Normalize a package name according to PEP 503."""
|
|
18
|
+
return re.sub(r"[-_.]+", "-", name).lower()
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
BLOCKLIST_PATH = GIT_CACHE_DIR / "package_blocklist.json"
|
|
22
|
+
|
|
23
|
+
_blocklist_lock = threading.Lock()
|
|
24
|
+
_blocklist_cache: set[str] | None = None
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _load_blocklist() -> set[str]:
|
|
28
|
+
"""Load the blocklist from disk."""
|
|
29
|
+
if not BLOCKLIST_PATH.exists():
|
|
30
|
+
return set()
|
|
31
|
+
try:
|
|
32
|
+
with BLOCKLIST_PATH.open("r") as f:
|
|
33
|
+
data = json.load(f)
|
|
34
|
+
return set(data.get("blocked_packages", []))
|
|
35
|
+
except Exception as e:
|
|
36
|
+
logger.warning("Failed to load blocklist from %s: %s", BLOCKLIST_PATH, e)
|
|
37
|
+
return set()
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _save_blocklist(blocklist: set[str]) -> None:
|
|
41
|
+
"""Save the blocklist to disk."""
|
|
42
|
+
try:
|
|
43
|
+
BLOCKLIST_PATH.parent.mkdir(parents=True, exist_ok=True)
|
|
44
|
+
with BLOCKLIST_PATH.open("w") as f:
|
|
45
|
+
json.dump(
|
|
46
|
+
{
|
|
47
|
+
"blocked_packages": sorted(blocklist),
|
|
48
|
+
"description": "Packages that don't exist on PyPI or can't be resolved",
|
|
49
|
+
},
|
|
50
|
+
f,
|
|
51
|
+
indent=2,
|
|
52
|
+
)
|
|
53
|
+
except Exception as e:
|
|
54
|
+
logger.warning("Failed to save blocklist to %s: %s", BLOCKLIST_PATH, e)
|
|
55
|
+
|
|
56
|
+
|
|
57
|
+
def get_blocklist() -> set[str]:
|
|
58
|
+
"""Get the current blocklist of packages to filter out."""
|
|
59
|
+
global _blocklist_cache
|
|
60
|
+
|
|
61
|
+
with _blocklist_lock:
|
|
62
|
+
if _blocklist_cache is None:
|
|
63
|
+
_blocklist_cache = _load_blocklist()
|
|
64
|
+
return _blocklist_cache.copy()
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
def add_to_blocklist(package_name: str) -> bool:
|
|
68
|
+
"""Add a package to the blocklist. Returns True if newly added."""
|
|
69
|
+
global _blocklist_cache
|
|
70
|
+
|
|
71
|
+
if not package_name or not package_name.strip():
|
|
72
|
+
return False
|
|
73
|
+
|
|
74
|
+
package_name = normalize_package_name(package_name.strip())
|
|
75
|
+
|
|
76
|
+
with _blocklist_lock:
|
|
77
|
+
blocklist = _load_blocklist()
|
|
78
|
+
if package_name in blocklist:
|
|
79
|
+
return False
|
|
80
|
+
blocklist.add(package_name)
|
|
81
|
+
_save_blocklist(blocklist)
|
|
82
|
+
_blocklist_cache = blocklist.copy()
|
|
83
|
+
logger.info("Added '%s' to package blocklist", package_name)
|
|
84
|
+
return True
|
|
85
|
+
|
|
86
|
+
|
|
87
|
+
def extract_failing_package(error_log: str) -> str | None:
|
|
88
|
+
"""Extract the package name that caused a resolution failure from uv error logs."""
|
|
89
|
+
if not error_log:
|
|
90
|
+
return None
|
|
91
|
+
|
|
92
|
+
match = re.search(r"Because ([\w\-]+) was not found in the package registry", error_log)
|
|
93
|
+
if match:
|
|
94
|
+
return match.group(1)
|
|
95
|
+
|
|
96
|
+
match = re.search(r"Because there are no versions of ([\w\-]+)", error_log)
|
|
97
|
+
if match:
|
|
98
|
+
return match.group(1)
|
|
99
|
+
|
|
100
|
+
match = re.search(
|
|
101
|
+
r"Because you require ([\w\-]+)==[\d\.]+ and \1[><=!]+[\d\.]+, we can conclude",
|
|
102
|
+
error_log,
|
|
103
|
+
)
|
|
104
|
+
if match:
|
|
105
|
+
pkg = match.group(1)
|
|
106
|
+
if re.match(r"^\d+[\-\d]+$", pkg) or pkg in {"uninstall", "install"}:
|
|
107
|
+
return pkg
|
|
108
|
+
|
|
109
|
+
return None
|
|
110
|
+
|
|
111
|
+
|
|
112
|
+
def should_retry_without_package(error_log: str) -> bool:
|
|
113
|
+
"""Determine if a resolution failure should trigger a retry without the failing package."""
|
|
114
|
+
if not error_log:
|
|
115
|
+
return False
|
|
116
|
+
if "was not found in the package registry" in error_log:
|
|
117
|
+
return True
|
|
118
|
+
if "Because there are no versions of" in error_log:
|
|
119
|
+
return True
|
|
120
|
+
if "Failed to build" in error_log:
|
|
121
|
+
return False
|
|
122
|
+
if "Failed to download" in error_log:
|
|
123
|
+
return False
|
|
124
|
+
return False
|
|
125
|
+
|
|
126
|
+
|
|
127
|
+
def remove_package_from_requirements(requirements: list[str], package_name: str) -> tuple[list[str], bool]:
|
|
128
|
+
"""Remove all requirements for a given package from a list."""
|
|
129
|
+
if not package_name:
|
|
130
|
+
return requirements, False
|
|
131
|
+
|
|
132
|
+
package_name_normalized = normalize_package_name(package_name)
|
|
133
|
+
filtered: list[str] = []
|
|
134
|
+
was_removed = False
|
|
135
|
+
|
|
136
|
+
for req in requirements:
|
|
137
|
+
pkg_match = re.match(r"^([a-zA-Z0-9]([a-zA-Z0-9._-]*[a-zA-Z0-9])?)", req)
|
|
138
|
+
if pkg_match:
|
|
139
|
+
req_pkg_name = pkg_match.group(1)
|
|
140
|
+
if normalize_package_name(req_pkg_name) == package_name_normalized:
|
|
141
|
+
was_removed = True
|
|
142
|
+
continue
|
|
143
|
+
filtered.append(req)
|
|
144
|
+
|
|
145
|
+
return filtered, was_removed
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""SQLite-backed caching helpers for resolution results."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import contextlib
|
|
6
|
+
import functools
|
|
7
|
+
import os
|
|
8
|
+
import pickle
|
|
9
|
+
import re
|
|
10
|
+
import sqlite3
|
|
11
|
+
import threading
|
|
12
|
+
from collections.abc import Iterator
|
|
13
|
+
from typing import Callable, ParamSpec, TypeVar, cast
|
|
14
|
+
|
|
15
|
+
_cache_lock = threading.Lock()
|
|
16
|
+
_P = ParamSpec("_P")
|
|
17
|
+
_T = TypeVar("_T")
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@contextlib.contextmanager
|
|
21
|
+
def _file_lock(lock_path: str) -> Iterator[None]:
|
|
22
|
+
"""Cross-process exclusive lock using a sidecar .lock file."""
|
|
23
|
+
lockfile = lock_path + ".lock"
|
|
24
|
+
fd = os.open(lockfile, os.O_CREAT | os.O_RDWR, 0o644)
|
|
25
|
+
try:
|
|
26
|
+
try:
|
|
27
|
+
import fcntl # POSIX
|
|
28
|
+
|
|
29
|
+
fcntl.flock(fd, fcntl.LOCK_EX)
|
|
30
|
+
yield
|
|
31
|
+
fcntl.flock(fd, fcntl.LOCK_UN)
|
|
32
|
+
except ImportError: # pragma: no cover
|
|
33
|
+
import msvcrt
|
|
34
|
+
|
|
35
|
+
msvcrt.locking(fd, msvcrt.LK_LOCK, 1) # type: ignore[attr-defined]
|
|
36
|
+
yield
|
|
37
|
+
msvcrt.locking(fd, msvcrt.LK_UNLCK, 1) # type: ignore[attr-defined]
|
|
38
|
+
finally:
|
|
39
|
+
os.close(fd)
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
def _get_db_connection(db_loc: str) -> sqlite3.Connection:
|
|
43
|
+
"""Open a SQLite connection configured for concurrent workloads."""
|
|
44
|
+
conn = sqlite3.connect(db_loc, timeout=30, isolation_level=None)
|
|
45
|
+
jm = conn.execute("PRAGMA journal_mode=WAL;").fetchone()[0].lower()
|
|
46
|
+
if jm != "wal":
|
|
47
|
+
conn.execute("PRAGMA journal_mode=TRUNCATE;")
|
|
48
|
+
conn.execute("PRAGMA synchronous=FULL;")
|
|
49
|
+
else:
|
|
50
|
+
conn.execute("PRAGMA synchronous=NORMAL;")
|
|
51
|
+
conn.execute("PRAGMA busy_timeout=30000;")
|
|
52
|
+
return conn
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
def cache_completion(db_loc: str, table_name: str = "cache") -> Callable[[Callable[_P, _T]], Callable[_P, _T]]:
|
|
56
|
+
"""Cache function results in a SQLite table keyed by args/kwargs.
|
|
57
|
+
|
|
58
|
+
Passing ``bypass_cache=True`` to the wrapped function forces a refresh and
|
|
59
|
+
overwrites the cached result.
|
|
60
|
+
"""
|
|
61
|
+
if not re.match(r"^\w+$", table_name):
|
|
62
|
+
raise ValueError("table_name must be alphanumeric/underscore only")
|
|
63
|
+
|
|
64
|
+
def decorator(func: Callable[_P, _T]) -> Callable[_P, _T]:
|
|
65
|
+
@functools.wraps(func)
|
|
66
|
+
def wrapped(*args: _P.args, **kwargs: _P.kwargs) -> _T:
|
|
67
|
+
bypass = cast(bool, kwargs.pop("bypass_cache", False))
|
|
68
|
+
key_kwargs = dict(sorted(kwargs.items()))
|
|
69
|
+
|
|
70
|
+
conn = _get_db_connection(db_loc)
|
|
71
|
+
try:
|
|
72
|
+
function_name = func.__name__
|
|
73
|
+
|
|
74
|
+
create_sql = (
|
|
75
|
+
f"CREATE TABLE IF NOT EXISTS {table_name} ("
|
|
76
|
+
" function_name TEXT NOT NULL,"
|
|
77
|
+
" argument_blob BLOB NOT NULL,"
|
|
78
|
+
" result_blob BLOB,"
|
|
79
|
+
" created_at TEXT DEFAULT CURRENT_TIMESTAMP,"
|
|
80
|
+
" updated_at TEXT DEFAULT CURRENT_TIMESTAMP,"
|
|
81
|
+
" PRIMARY KEY (function_name, argument_blob)"
|
|
82
|
+
")"
|
|
83
|
+
)
|
|
84
|
+
with _file_lock(db_loc), _cache_lock:
|
|
85
|
+
conn.execute(create_sql)
|
|
86
|
+
|
|
87
|
+
set_clause = "result_blob=excluded.result_blob, updated_at=CURRENT_TIMESTAMP"
|
|
88
|
+
args_blob = pickle.dumps((function_name, args, key_kwargs))
|
|
89
|
+
|
|
90
|
+
if not bypass:
|
|
91
|
+
with _cache_lock:
|
|
92
|
+
row = conn.execute(
|
|
93
|
+
f"SELECT result_blob FROM {table_name} WHERE function_name=? AND argument_blob=?", # noqa: S608
|
|
94
|
+
(function_name, args_blob),
|
|
95
|
+
).fetchone()
|
|
96
|
+
if row is not None:
|
|
97
|
+
return cast(_T, pickle.loads(row[0])) # noqa: S301
|
|
98
|
+
|
|
99
|
+
if "bypass_cache" in func.__code__.co_varnames:
|
|
100
|
+
kwargs["bypass_cache"] = bypass
|
|
101
|
+
|
|
102
|
+
result = func(*args, **kwargs)
|
|
103
|
+
result_blob = pickle.dumps(result)
|
|
104
|
+
|
|
105
|
+
with _file_lock(db_loc), _cache_lock:
|
|
106
|
+
conn.execute(
|
|
107
|
+
f"""
|
|
108
|
+
INSERT INTO {table_name}(function_name, argument_blob, result_blob)
|
|
109
|
+
VALUES(?, ?, ?)
|
|
110
|
+
ON CONFLICT(function_name, argument_blob) DO UPDATE SET {set_clause}
|
|
111
|
+
""",
|
|
112
|
+
(function_name, args_blob, result_blob),
|
|
113
|
+
)
|
|
114
|
+
return result
|
|
115
|
+
finally:
|
|
116
|
+
conn.close()
|
|
117
|
+
|
|
118
|
+
return wrapped
|
|
119
|
+
|
|
120
|
+
return decorator
|
|
@@ -0,0 +1,277 @@
|
|
|
1
|
+
"""Constants used in dependency resolution."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
import re
|
|
7
|
+
import sys
|
|
8
|
+
from pathlib import Path
|
|
9
|
+
|
|
10
|
+
# Regular expressions
|
|
11
|
+
ASV_REGEX = re.compile(r"(^|/)\.?asv[^/]*\.jsonc?$")
|
|
12
|
+
REQ_TXT_REGEX = re.compile(r"(^|/)(?:constraints(?:\.[-\w]+)?|requirements.*)\.txt$")
|
|
13
|
+
ANSI_RE = re.compile(r"\x1B\[[0-?]*[ -/]*[@-~]")
|
|
14
|
+
EXTRA_MARKER_RE = re.compile(r';\s*extra\s*==\s*["\']([^"\']+)["\']')
|
|
15
|
+
|
|
16
|
+
# File names
|
|
17
|
+
PYPROJECT = "pyproject.toml"
|
|
18
|
+
SETUP_CFG = "setup.cfg"
|
|
19
|
+
SETUP_PY = "setup.py"
|
|
20
|
+
ENV_YML_NAMES = {"environment.yml", "environment.yaml"}
|
|
21
|
+
|
|
22
|
+
# Cache location for SQLite caches
|
|
23
|
+
CACHE_LOCATION: str = os.getenv("CACHE_LOCATION", "cache.db")
|
|
24
|
+
|
|
25
|
+
# Git cache directory
|
|
26
|
+
GIT_CACHE_DIR = Path(os.getenv("GIT_CACHE_DIR", str(Path(CACHE_LOCATION).parent / "git"))).expanduser()
|
|
27
|
+
GIT_CACHE_DIR.mkdir(parents=True, exist_ok=True)
|
|
28
|
+
|
|
29
|
+
# Import name to PyPI package name mapping
|
|
30
|
+
SPECIAL_IMPORT_TO_PYPI = {
|
|
31
|
+
"sklearn": "scikit-learn",
|
|
32
|
+
"PIL": "Pillow",
|
|
33
|
+
"cv2": "opencv-python",
|
|
34
|
+
"yaml": "PyYAML",
|
|
35
|
+
"bs4": "beautifulsoup4",
|
|
36
|
+
"Crypto": "pycryptodome",
|
|
37
|
+
}
|
|
38
|
+
|
|
39
|
+
# Conda-only and system packages that don't exist on PyPI
|
|
40
|
+
CONDA_SYSTEM_PACKAGES = {
|
|
41
|
+
"pkg-config",
|
|
42
|
+
"compilers",
|
|
43
|
+
"c-compiler",
|
|
44
|
+
"cxx-compiler",
|
|
45
|
+
"fortran-compiler",
|
|
46
|
+
"gcc",
|
|
47
|
+
"gxx",
|
|
48
|
+
"gfortran",
|
|
49
|
+
"clang",
|
|
50
|
+
"clangxx",
|
|
51
|
+
"make",
|
|
52
|
+
"cmake",
|
|
53
|
+
"autoconf",
|
|
54
|
+
"automake",
|
|
55
|
+
"libtool",
|
|
56
|
+
"m4",
|
|
57
|
+
"patch",
|
|
58
|
+
"bison",
|
|
59
|
+
"flex",
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
# Packages that are stdlib or not installable via PyPI
|
|
63
|
+
NOT_REQUIREMENTS = {
|
|
64
|
+
# py2 names / stdlib modules seen in repos
|
|
65
|
+
"configparser",
|
|
66
|
+
"ConfigParser",
|
|
67
|
+
"asyncore",
|
|
68
|
+
"distutils",
|
|
69
|
+
"sqlite3",
|
|
70
|
+
"tkinter",
|
|
71
|
+
"Tkinter",
|
|
72
|
+
"cStringIO",
|
|
73
|
+
"urllib",
|
|
74
|
+
"urllib2",
|
|
75
|
+
# setuptools internals
|
|
76
|
+
"pkg_resources",
|
|
77
|
+
# platform frameworks (macOS)
|
|
78
|
+
"AppKit",
|
|
79
|
+
"Foundation",
|
|
80
|
+
# Build/packaging tools (not runtime dependencies)
|
|
81
|
+
"py2exe",
|
|
82
|
+
"cx_Freeze",
|
|
83
|
+
"py2app",
|
|
84
|
+
"nuitka",
|
|
85
|
+
# CLI verbs / interpreter references that slip through tokenization
|
|
86
|
+
"python",
|
|
87
|
+
"Python",
|
|
88
|
+
"python3",
|
|
89
|
+
"install",
|
|
90
|
+
"0-29-32",
|
|
91
|
+
"1-0",
|
|
92
|
+
"1-2",
|
|
93
|
+
"1-22-0",
|
|
94
|
+
"1-3-2",
|
|
95
|
+
"2-18-4",
|
|
96
|
+
"2024-1-1",
|
|
97
|
+
"3-0-0a10",
|
|
98
|
+
"absl",
|
|
99
|
+
"afl",
|
|
100
|
+
"allel",
|
|
101
|
+
"cartopy-userconfig",
|
|
102
|
+
"closest-peak-direction-getter",
|
|
103
|
+
"conans",
|
|
104
|
+
"cprofile",
|
|
105
|
+
"dask-core",
|
|
106
|
+
"dateutil",
|
|
107
|
+
"dbe",
|
|
108
|
+
"deepchecks-metrics",
|
|
109
|
+
"geopandas-base",
|
|
110
|
+
"interpnd",
|
|
111
|
+
"jpeg-ls",
|
|
112
|
+
"libblas",
|
|
113
|
+
"libpantab",
|
|
114
|
+
"libwriter",
|
|
115
|
+
"mo-pack",
|
|
116
|
+
"mpl-toolkits",
|
|
117
|
+
"pylab",
|
|
118
|
+
"pyqt4",
|
|
119
|
+
"pytables",
|
|
120
|
+
"skbuild",
|
|
121
|
+
"sklearnex",
|
|
122
|
+
"skspatial",
|
|
123
|
+
"system",
|
|
124
|
+
"tunits",
|
|
125
|
+
"vcr",
|
|
126
|
+
"0-29-21",
|
|
127
|
+
"0-29-33",
|
|
128
|
+
"1-11-2",
|
|
129
|
+
"1-12",
|
|
130
|
+
"1-14-0",
|
|
131
|
+
"1-23-5",
|
|
132
|
+
"1-8-1",
|
|
133
|
+
"3-0",
|
|
134
|
+
"3-0-0a11",
|
|
135
|
+
"3-1-2",
|
|
136
|
+
"59-2-0",
|
|
137
|
+
"c-distances-openmp",
|
|
138
|
+
"column-parsers",
|
|
139
|
+
"copy-reg",
|
|
140
|
+
"cpickle",
|
|
141
|
+
"cryptodome",
|
|
142
|
+
"cupyx",
|
|
143
|
+
"h5r",
|
|
144
|
+
"h5s",
|
|
145
|
+
"h5t",
|
|
146
|
+
"givens-elimination",
|
|
147
|
+
"imp",
|
|
148
|
+
"libreader",
|
|
149
|
+
"nattype",
|
|
150
|
+
"omniscidbe",
|
|
151
|
+
"openjpeg",
|
|
152
|
+
"patoolib",
|
|
153
|
+
"peerplaysbase",
|
|
154
|
+
"probabilistic-direction-getter",
|
|
155
|
+
"pyhdk",
|
|
156
|
+
"pymake",
|
|
157
|
+
"pyqt",
|
|
158
|
+
"sksparse",
|
|
159
|
+
"splitting",
|
|
160
|
+
"stringio",
|
|
161
|
+
"uninstall",
|
|
162
|
+
"urlparse",
|
|
163
|
+
"0-29-30",
|
|
164
|
+
"1-9-1",
|
|
165
|
+
"2-2",
|
|
166
|
+
"3-0-5",
|
|
167
|
+
"3-2-0",
|
|
168
|
+
"backports",
|
|
169
|
+
"cdms2",
|
|
170
|
+
"flatted",
|
|
171
|
+
"h5z",
|
|
172
|
+
"pnetdicom",
|
|
173
|
+
"pypocketfft",
|
|
174
|
+
"vectorized",
|
|
175
|
+
"voyager-ext",
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
# Well-known PyPI packages (allowlist for common names)
|
|
179
|
+
ALLOWLIST_COMMON_PYPI = {
|
|
180
|
+
"numpy",
|
|
181
|
+
"scipy",
|
|
182
|
+
"pandas",
|
|
183
|
+
"matplotlib",
|
|
184
|
+
"xarray",
|
|
185
|
+
"shapely",
|
|
186
|
+
"fiona",
|
|
187
|
+
"pyproj",
|
|
188
|
+
"rtree",
|
|
189
|
+
"torch",
|
|
190
|
+
"functorch",
|
|
191
|
+
"pytest",
|
|
192
|
+
"ipython",
|
|
193
|
+
"IPython",
|
|
194
|
+
"ipykernel",
|
|
195
|
+
"ipywidgets",
|
|
196
|
+
"Cython",
|
|
197
|
+
"cython",
|
|
198
|
+
"numba",
|
|
199
|
+
"scikit-learn",
|
|
200
|
+
"sklearn",
|
|
201
|
+
"sympy",
|
|
202
|
+
"h5py",
|
|
203
|
+
"Pillow",
|
|
204
|
+
"pillow",
|
|
205
|
+
"networkx",
|
|
206
|
+
"dask",
|
|
207
|
+
"seaborn",
|
|
208
|
+
"xgboost",
|
|
209
|
+
"statsmodels",
|
|
210
|
+
"pyarrow",
|
|
211
|
+
"geopandas",
|
|
212
|
+
"cartopy",
|
|
213
|
+
"tqdm",
|
|
214
|
+
"psycopg2",
|
|
215
|
+
"sqlalchemy",
|
|
216
|
+
"SQLAlchemy",
|
|
217
|
+
"requests",
|
|
218
|
+
"setuptools",
|
|
219
|
+
"wheel",
|
|
220
|
+
"pip",
|
|
221
|
+
"sphinx",
|
|
222
|
+
"nbsphinx",
|
|
223
|
+
"sphinx-gallery",
|
|
224
|
+
"black",
|
|
225
|
+
"isort",
|
|
226
|
+
"flake8",
|
|
227
|
+
"hypothesis",
|
|
228
|
+
"pyqt5",
|
|
229
|
+
"pyqt4",
|
|
230
|
+
"qtpy",
|
|
231
|
+
"jupyter",
|
|
232
|
+
"pooch",
|
|
233
|
+
"graphviz",
|
|
234
|
+
"numexpr",
|
|
235
|
+
}
|
|
236
|
+
|
|
237
|
+
# Generic names that are likely local modules, not PyPI packages
|
|
238
|
+
GENERIC_LOCAL_NAMES = {
|
|
239
|
+
"lib",
|
|
240
|
+
"libs",
|
|
241
|
+
"utils",
|
|
242
|
+
"util",
|
|
243
|
+
"utilities",
|
|
244
|
+
"core",
|
|
245
|
+
"helpers",
|
|
246
|
+
"helper",
|
|
247
|
+
"common",
|
|
248
|
+
"base",
|
|
249
|
+
"tools",
|
|
250
|
+
"tool",
|
|
251
|
+
"config",
|
|
252
|
+
"configs",
|
|
253
|
+
"constants",
|
|
254
|
+
"const",
|
|
255
|
+
"types",
|
|
256
|
+
"models",
|
|
257
|
+
"model",
|
|
258
|
+
"tests",
|
|
259
|
+
"test",
|
|
260
|
+
"testing",
|
|
261
|
+
"benchmarks",
|
|
262
|
+
"benchmark",
|
|
263
|
+
"examples",
|
|
264
|
+
"example",
|
|
265
|
+
"scripts",
|
|
266
|
+
"script",
|
|
267
|
+
"data",
|
|
268
|
+
"docs",
|
|
269
|
+
"doc",
|
|
270
|
+
"documentation",
|
|
271
|
+
}
|
|
272
|
+
|
|
273
|
+
# Python stdlib modules (Python 3.10+)
|
|
274
|
+
try:
|
|
275
|
+
STDLIB = set(sys.stdlib_module_names)
|
|
276
|
+
except Exception: # pragma: no cover
|
|
277
|
+
STDLIB = set()
|