reaxkit 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- reaxkit/__init__.py +0 -0
- reaxkit/analysis/__init__.py +0 -0
- reaxkit/analysis/composed/RDF_analyzer.py +560 -0
- reaxkit/analysis/composed/__init__.py +0 -0
- reaxkit/analysis/composed/connectivity_analyzer.py +706 -0
- reaxkit/analysis/composed/coordination_analyzer.py +144 -0
- reaxkit/analysis/composed/electrostatics_analyzer.py +687 -0
- reaxkit/analysis/per_file/__init__.py +0 -0
- reaxkit/analysis/per_file/control_analyzer.py +165 -0
- reaxkit/analysis/per_file/eregime_analyzer.py +108 -0
- reaxkit/analysis/per_file/ffield_analyzer.py +305 -0
- reaxkit/analysis/per_file/fort13_analyzer.py +79 -0
- reaxkit/analysis/per_file/fort57_analyzer.py +106 -0
- reaxkit/analysis/per_file/fort73_analyzer.py +61 -0
- reaxkit/analysis/per_file/fort74_analyzer.py +65 -0
- reaxkit/analysis/per_file/fort76_analyzer.py +191 -0
- reaxkit/analysis/per_file/fort78_analyzer.py +154 -0
- reaxkit/analysis/per_file/fort79_analyzer.py +83 -0
- reaxkit/analysis/per_file/fort7_analyzer.py +393 -0
- reaxkit/analysis/per_file/fort99_analyzer.py +411 -0
- reaxkit/analysis/per_file/molfra_analyzer.py +359 -0
- reaxkit/analysis/per_file/params_analyzer.py +258 -0
- reaxkit/analysis/per_file/summary_analyzer.py +84 -0
- reaxkit/analysis/per_file/trainset_analyzer.py +84 -0
- reaxkit/analysis/per_file/vels_analyzer.py +95 -0
- reaxkit/analysis/per_file/xmolout_analyzer.py +528 -0
- reaxkit/cli.py +181 -0
- reaxkit/count_loc.py +276 -0
- reaxkit/data/alias.yaml +89 -0
- reaxkit/data/constants.yaml +27 -0
- reaxkit/data/reaxff_input_files_contents.yaml +186 -0
- reaxkit/data/reaxff_output_files_contents.yaml +301 -0
- reaxkit/data/units.yaml +38 -0
- reaxkit/help/__init__.py +0 -0
- reaxkit/help/help_index_loader.py +531 -0
- reaxkit/help/introspection_utils.py +131 -0
- reaxkit/io/__init__.py +0 -0
- reaxkit/io/base_handler.py +165 -0
- reaxkit/io/generators/__init__.py +0 -0
- reaxkit/io/generators/control_generator.py +123 -0
- reaxkit/io/generators/eregime_generator.py +341 -0
- reaxkit/io/generators/geo_generator.py +967 -0
- reaxkit/io/generators/trainset_generator.py +1758 -0
- reaxkit/io/generators/tregime_generator.py +113 -0
- reaxkit/io/generators/vregime_generator.py +164 -0
- reaxkit/io/generators/xmolout_generator.py +304 -0
- reaxkit/io/handlers/__init__.py +0 -0
- reaxkit/io/handlers/control_handler.py +209 -0
- reaxkit/io/handlers/eregime_handler.py +122 -0
- reaxkit/io/handlers/ffield_handler.py +812 -0
- reaxkit/io/handlers/fort13_handler.py +123 -0
- reaxkit/io/handlers/fort57_handler.py +143 -0
- reaxkit/io/handlers/fort73_handler.py +145 -0
- reaxkit/io/handlers/fort74_handler.py +155 -0
- reaxkit/io/handlers/fort76_handler.py +195 -0
- reaxkit/io/handlers/fort78_handler.py +142 -0
- reaxkit/io/handlers/fort79_handler.py +227 -0
- reaxkit/io/handlers/fort7_handler.py +264 -0
- reaxkit/io/handlers/fort99_handler.py +128 -0
- reaxkit/io/handlers/geo_handler.py +224 -0
- reaxkit/io/handlers/molfra_handler.py +184 -0
- reaxkit/io/handlers/params_handler.py +137 -0
- reaxkit/io/handlers/summary_handler.py +135 -0
- reaxkit/io/handlers/trainset_handler.py +658 -0
- reaxkit/io/handlers/vels_handler.py +293 -0
- reaxkit/io/handlers/xmolout_handler.py +174 -0
- reaxkit/utils/__init__.py +0 -0
- reaxkit/utils/alias.py +219 -0
- reaxkit/utils/cache.py +77 -0
- reaxkit/utils/constants.py +75 -0
- reaxkit/utils/equation_of_states.py +96 -0
- reaxkit/utils/exceptions.py +27 -0
- reaxkit/utils/frame_utils.py +175 -0
- reaxkit/utils/log.py +43 -0
- reaxkit/utils/media/__init__.py +0 -0
- reaxkit/utils/media/convert.py +90 -0
- reaxkit/utils/media/make_video.py +91 -0
- reaxkit/utils/media/plotter.py +812 -0
- reaxkit/utils/numerical/__init__.py +0 -0
- reaxkit/utils/numerical/extrema_finder.py +96 -0
- reaxkit/utils/numerical/moving_average.py +103 -0
- reaxkit/utils/numerical/numerical_calcs.py +75 -0
- reaxkit/utils/numerical/signal_ops.py +135 -0
- reaxkit/utils/path.py +55 -0
- reaxkit/utils/units.py +104 -0
- reaxkit/webui/__init__.py +0 -0
- reaxkit/webui/app.py +0 -0
- reaxkit/webui/components.py +0 -0
- reaxkit/webui/layouts.py +0 -0
- reaxkit/webui/utils.py +0 -0
- reaxkit/workflows/__init__.py +0 -0
- reaxkit/workflows/composed/__init__.py +0 -0
- reaxkit/workflows/composed/coordination_workflow.py +393 -0
- reaxkit/workflows/composed/electrostatics_workflow.py +587 -0
- reaxkit/workflows/composed/xmolout_fort7_workflow.py +343 -0
- reaxkit/workflows/meta/__init__.py +0 -0
- reaxkit/workflows/meta/help_workflow.py +136 -0
- reaxkit/workflows/meta/introspection_workflow.py +235 -0
- reaxkit/workflows/meta/make_video_workflow.py +61 -0
- reaxkit/workflows/meta/plotter_workflow.py +601 -0
- reaxkit/workflows/per_file/__init__.py +0 -0
- reaxkit/workflows/per_file/control_workflow.py +110 -0
- reaxkit/workflows/per_file/eregime_workflow.py +267 -0
- reaxkit/workflows/per_file/ffield_workflow.py +390 -0
- reaxkit/workflows/per_file/fort13_workflow.py +86 -0
- reaxkit/workflows/per_file/fort57_workflow.py +137 -0
- reaxkit/workflows/per_file/fort73_workflow.py +151 -0
- reaxkit/workflows/per_file/fort74_workflow.py +88 -0
- reaxkit/workflows/per_file/fort76_workflow.py +188 -0
- reaxkit/workflows/per_file/fort78_workflow.py +135 -0
- reaxkit/workflows/per_file/fort79_workflow.py +314 -0
- reaxkit/workflows/per_file/fort7_workflow.py +592 -0
- reaxkit/workflows/per_file/fort83_workflow.py +60 -0
- reaxkit/workflows/per_file/fort99_workflow.py +223 -0
- reaxkit/workflows/per_file/geo_workflow.py +554 -0
- reaxkit/workflows/per_file/molfra_workflow.py +577 -0
- reaxkit/workflows/per_file/params_workflow.py +135 -0
- reaxkit/workflows/per_file/summary_workflow.py +161 -0
- reaxkit/workflows/per_file/trainset_workflow.py +356 -0
- reaxkit/workflows/per_file/tregime_workflow.py +79 -0
- reaxkit/workflows/per_file/vels_workflow.py +309 -0
- reaxkit/workflows/per_file/vregime_workflow.py +75 -0
- reaxkit/workflows/per_file/xmolout_workflow.py +678 -0
- reaxkit-1.0.0.dist-info/METADATA +128 -0
- reaxkit-1.0.0.dist-info/RECORD +130 -0
- reaxkit-1.0.0.dist-info/WHEEL +5 -0
- reaxkit-1.0.0.dist-info/entry_points.txt +2 -0
- reaxkit-1.0.0.dist-info/licenses/AUTHORS.md +20 -0
- reaxkit-1.0.0.dist-info/licenses/LICENSE +21 -0
- reaxkit-1.0.0.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,531 @@
|
|
|
1
|
+
"""
|
|
2
|
+
ReaxKit help index search utilities.
|
|
3
|
+
|
|
4
|
+
This module provides a lightweight search engine used by the
|
|
5
|
+
``reaxkit help`` command to map natural-language queries
|
|
6
|
+
(e.g. "electric field", "bond order", "restraint")
|
|
7
|
+
to relevant ReaxFF input and output files.
|
|
8
|
+
|
|
9
|
+
The search operates on curated YAML indices shipped with ReaxKit
|
|
10
|
+
and ranks matches based on keyword overlap and fuzzy similarity.
|
|
11
|
+
|
|
12
|
+
Typical use cases include:
|
|
13
|
+
|
|
14
|
+
- discovering which ReaxFF file controls a given concept
|
|
15
|
+
- exploring available variables in input/output files
|
|
16
|
+
- guiding users toward the correct handler or workflow
|
|
17
|
+
"""
|
|
18
|
+
|
|
19
|
+
from __future__ import annotations
|
|
20
|
+
|
|
21
|
+
from dataclasses import dataclass
|
|
22
|
+
from typing import Any, Dict, Iterable, List, Optional, Tuple
|
|
23
|
+
from functools import lru_cache
|
|
24
|
+
import re
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
import yaml # pyyaml
|
|
28
|
+
except Exception as e: # pragma: no cover
|
|
29
|
+
raise ImportError("PyYAML is required to use ReaxKit help index. Install with: pip install pyyaml") from e
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
# ----------------------------
|
|
33
|
+
# Data access (package files)
|
|
34
|
+
# ----------------------------
|
|
35
|
+
|
|
36
|
+
@lru_cache(maxsize=1)
|
|
37
|
+
def load_input_index() -> Dict[str, Any]:
|
|
38
|
+
"""
|
|
39
|
+
Load the ReaxFF input-file help index.
|
|
40
|
+
|
|
41
|
+
Returns
|
|
42
|
+
-------
|
|
43
|
+
dict
|
|
44
|
+
Parsed contents of ``reaxff_input_files_contents.yaml``.
|
|
45
|
+
"""
|
|
46
|
+
return _read_yaml_from_pkg_data("reaxff_input_files_contents.yaml")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@lru_cache(maxsize=1)
|
|
50
|
+
def load_output_index() -> Dict[str, Any]:
|
|
51
|
+
"""
|
|
52
|
+
Load the ReaxFF output-file help index.
|
|
53
|
+
|
|
54
|
+
Returns
|
|
55
|
+
-------
|
|
56
|
+
dict
|
|
57
|
+
Parsed contents of ``reaxff_output_files_contents.yaml``.
|
|
58
|
+
"""
|
|
59
|
+
return _read_yaml_from_pkg_data("reaxff_output_files_contents.yaml")
|
|
60
|
+
|
|
61
|
+
@dataclass(frozen=True)
|
|
62
|
+
class PreparedEntry:
|
|
63
|
+
"""
|
|
64
|
+
Preprocessed help entry used for search.
|
|
65
|
+
|
|
66
|
+
This class stores precomputed text fields and tokenized
|
|
67
|
+
representations for a single ReaxFF file entry.
|
|
68
|
+
|
|
69
|
+
Attributes
|
|
70
|
+
----------
|
|
71
|
+
file : str
|
|
72
|
+
Canonical ReaxFF file name.
|
|
73
|
+
entry : dict
|
|
74
|
+
Raw YAML entry.
|
|
75
|
+
blobs : dict
|
|
76
|
+
Concatenated searchable text fields.
|
|
77
|
+
tokens : dict
|
|
78
|
+
Tokenized versions of searchable fields.
|
|
79
|
+
"""
|
|
80
|
+
file: str
|
|
81
|
+
entry: Dict[str, Any]
|
|
82
|
+
blobs: Dict[str, str]
|
|
83
|
+
tokens: Dict[str, set[str]]
|
|
84
|
+
|
|
85
|
+
def _prepare_index(idx: Dict[str, Any]) -> Dict[str, PreparedEntry]:
|
|
86
|
+
files = idx.get("files", {}) or {}
|
|
87
|
+
prepared = {}
|
|
88
|
+
|
|
89
|
+
for file_key, entry in files.items():
|
|
90
|
+
if not isinstance(entry, dict):
|
|
91
|
+
continue
|
|
92
|
+
|
|
93
|
+
blobs = _entry_search_blobs(file_key, entry)
|
|
94
|
+
tokens = {k: set(_tokens(v)) for k, v in blobs.items()}
|
|
95
|
+
|
|
96
|
+
prepared[file_key] = PreparedEntry(
|
|
97
|
+
file=file_key,
|
|
98
|
+
entry=entry,
|
|
99
|
+
blobs=blobs,
|
|
100
|
+
tokens=tokens,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
return prepared
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
@lru_cache(maxsize=1)
|
|
107
|
+
def load_prepared_input_index() -> Dict[str, PreparedEntry]:
|
|
108
|
+
return _prepare_index(_load_input_index())
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
@lru_cache(maxsize=1)
|
|
112
|
+
def load_prepared_output_index() -> Dict[str, PreparedEntry]:
|
|
113
|
+
return _prepare_index(_load_output_index())
|
|
114
|
+
|
|
115
|
+
|
|
116
|
+
def _read_yaml_from_pkg_data(filename: str) -> Dict[str, Any]:
|
|
117
|
+
"""
|
|
118
|
+
Load a YAML file bundled inside the ``reaxkit.data`` package.
|
|
119
|
+
|
|
120
|
+
Parameters
|
|
121
|
+
----------
|
|
122
|
+
filename : str
|
|
123
|
+
Name of the YAML file to load.
|
|
124
|
+
|
|
125
|
+
Returns
|
|
126
|
+
-------
|
|
127
|
+
dict
|
|
128
|
+
Parsed YAML contents.
|
|
129
|
+
"""
|
|
130
|
+
try:
|
|
131
|
+
from importlib import resources
|
|
132
|
+
data_pkg = resources.files("reaxkit.data")
|
|
133
|
+
path = data_pkg / filename
|
|
134
|
+
text = path.read_text(encoding="utf-8")
|
|
135
|
+
except Exception as e:
|
|
136
|
+
raise FileNotFoundError(
|
|
137
|
+
f"Could not read '{filename}' from package 'reaxkit.data'. "
|
|
138
|
+
f"Make sure it exists under src/reaxkit/data/ and is included in package data."
|
|
139
|
+
) from e
|
|
140
|
+
|
|
141
|
+
obj = yaml.safe_load(text) or {}
|
|
142
|
+
if not isinstance(obj, dict):
|
|
143
|
+
raise ValueError(f"YAML root must be a mapping/dict in '{filename}'.")
|
|
144
|
+
return obj
|
|
145
|
+
|
|
146
|
+
|
|
147
|
+
def _load_input_index() -> Dict[str, Any]:
|
|
148
|
+
return _read_yaml_from_pkg_data("reaxff_input_files_contents.yaml")
|
|
149
|
+
|
|
150
|
+
|
|
151
|
+
def _load_output_index() -> Dict[str, Any]:
|
|
152
|
+
return _read_yaml_from_pkg_data("reaxff_output_files_contents.yaml")
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
# ----------------------------
|
|
156
|
+
# Search / ranking
|
|
157
|
+
# ----------------------------
|
|
158
|
+
|
|
159
|
+
_WORD_RE = re.compile(r"[a-z0-9]+")
|
|
160
|
+
|
|
161
|
+
|
|
162
|
+
def _norm(s: str) -> str:
|
|
163
|
+
"""
|
|
164
|
+
Normalize a string for case-insensitive search matching.
|
|
165
|
+
"""
|
|
166
|
+
s = s.lower()
|
|
167
|
+
s = s.replace("_", " ").replace("-", " ")
|
|
168
|
+
s = re.sub(r"\s+", " ", s).strip()
|
|
169
|
+
return s
|
|
170
|
+
|
|
171
|
+
|
|
172
|
+
def _tokens(s: str) -> List[str]:
|
|
173
|
+
"""
|
|
174
|
+
Tokenize a normalized string into alphanumeric search terms.
|
|
175
|
+
"""
|
|
176
|
+
return _WORD_RE.findall(_norm(s))
|
|
177
|
+
|
|
178
|
+
|
|
179
|
+
def _fuzzy_ratio(a: str, b: str) -> float:
|
|
180
|
+
"""
|
|
181
|
+
Compute fuzzy similarity between two strings.
|
|
182
|
+
|
|
183
|
+
Returns
|
|
184
|
+
-------
|
|
185
|
+
float
|
|
186
|
+
Similarity score in the range 0–100.
|
|
187
|
+
"""
|
|
188
|
+
a = _norm(a)
|
|
189
|
+
b = _norm(b)
|
|
190
|
+
if not a or not b:
|
|
191
|
+
return 0.0
|
|
192
|
+
|
|
193
|
+
try:
|
|
194
|
+
from rapidfuzz.fuzz import ratio
|
|
195
|
+
return float(ratio(a, b))
|
|
196
|
+
except Exception:
|
|
197
|
+
import difflib
|
|
198
|
+
return 100.0 * difflib.SequenceMatcher(None, a, b).ratio()
|
|
199
|
+
|
|
200
|
+
|
|
201
|
+
def _as_list(v: Any) -> List[str]:
|
|
202
|
+
"""
|
|
203
|
+
Normalize a YAML value into a list of strings.
|
|
204
|
+
"""
|
|
205
|
+
if v is None:
|
|
206
|
+
return []
|
|
207
|
+
if isinstance(v, str):
|
|
208
|
+
return [v]
|
|
209
|
+
if isinstance(v, list):
|
|
210
|
+
return [str(x) for x in v if x is not None]
|
|
211
|
+
return [str(v)]
|
|
212
|
+
|
|
213
|
+
|
|
214
|
+
def _entry_search_blobs(file_key: str, entry: Dict[str, Any]) -> Dict[str, str]:
|
|
215
|
+
"""
|
|
216
|
+
Build concatenated searchable text fields for a help entry.
|
|
217
|
+
|
|
218
|
+
Parameters
|
|
219
|
+
----------
|
|
220
|
+
file_key : str
|
|
221
|
+
Canonical ReaxFF file name.
|
|
222
|
+
entry : dict
|
|
223
|
+
YAML entry describing the file.
|
|
224
|
+
|
|
225
|
+
Returns
|
|
226
|
+
-------
|
|
227
|
+
dict
|
|
228
|
+
Mapping of field name to searchable text.
|
|
229
|
+
"""
|
|
230
|
+
aliases = _as_list(entry.get("aliases"))
|
|
231
|
+
desc = str(entry.get("desc") or "")
|
|
232
|
+
tags = _as_list(entry.get("tags"))
|
|
233
|
+
core_vars = _as_list(entry.get("core_vars"))
|
|
234
|
+
optional_vars = _as_list(entry.get("optional_vars"))
|
|
235
|
+
derived_vars = _as_list(entry.get("derived_vars"))
|
|
236
|
+
best_for = _as_list(entry.get("best_for"))
|
|
237
|
+
# some YAMLs might use related_run or related_runs
|
|
238
|
+
related = _as_list(entry.get("related_runs") or entry.get("related_run"))
|
|
239
|
+
notes = _as_list(entry.get("notes"))
|
|
240
|
+
examples = _as_list(entry.get("file_templates"))
|
|
241
|
+
|
|
242
|
+
return {
|
|
243
|
+
"names": " ".join([file_key] + aliases),
|
|
244
|
+
"desc": desc,
|
|
245
|
+
"tags": " ".join(tags),
|
|
246
|
+
"core": " ".join(core_vars),
|
|
247
|
+
"optional": " ".join(optional_vars),
|
|
248
|
+
"derived": " ".join(derived_vars),
|
|
249
|
+
"best_for": " ".join(best_for),
|
|
250
|
+
"related": " ".join(related),
|
|
251
|
+
"notes": " ".join(notes),
|
|
252
|
+
"file_templates": " ".join(examples),
|
|
253
|
+
}
|
|
254
|
+
|
|
255
|
+
|
|
256
|
+
@dataclass(frozen=True)
|
|
257
|
+
class HelpHit:
|
|
258
|
+
"""
|
|
259
|
+
Ranked result returned by the help index search.
|
|
260
|
+
|
|
261
|
+
Attributes
|
|
262
|
+
----------
|
|
263
|
+
kind : str
|
|
264
|
+
Either ``"input"`` or ``"output"``.
|
|
265
|
+
file : str
|
|
266
|
+
ReaxFF file name.
|
|
267
|
+
score : float
|
|
268
|
+
Relevance score.
|
|
269
|
+
why : list of str
|
|
270
|
+
Short explanations for why the file matched.
|
|
271
|
+
entry : dict
|
|
272
|
+
Raw YAML entry.
|
|
273
|
+
"""
|
|
274
|
+
kind: str # "input" or "output"
|
|
275
|
+
file: str # key in YAML
|
|
276
|
+
score: float
|
|
277
|
+
why: List[str] # short reasons
|
|
278
|
+
entry: Dict[str, Any] # raw entry
|
|
279
|
+
|
|
280
|
+
|
|
281
|
+
def search_help_indices(
|
|
282
|
+
query: str,
|
|
283
|
+
*,
|
|
284
|
+
top_k: int = 8,
|
|
285
|
+
min_score: float = 35.0,
|
|
286
|
+
) -> List[HelpHit]:
|
|
287
|
+
"""
|
|
288
|
+
Search ReaxKit help indices for relevant ReaxFF files.
|
|
289
|
+
|
|
290
|
+
Parameters
|
|
291
|
+
----------
|
|
292
|
+
query : str
|
|
293
|
+
Natural-language search query.
|
|
294
|
+
top_k : int, optional
|
|
295
|
+
Maximum number of results to return.
|
|
296
|
+
min_score : float, optional
|
|
297
|
+
Minimum relevance score for a result to be included.
|
|
298
|
+
|
|
299
|
+
Returns
|
|
300
|
+
-------
|
|
301
|
+
list of HelpHit
|
|
302
|
+
Ranked search results across input and output files.
|
|
303
|
+
|
|
304
|
+
Examples
|
|
305
|
+
--------
|
|
306
|
+
>>> hits = search_help_indices("electric field")
|
|
307
|
+
>>> hits[0].file
|
|
308
|
+
'eregime.in'
|
|
309
|
+
"""
|
|
310
|
+
q = _norm(query)
|
|
311
|
+
q_toks = set(_tokens(query))
|
|
312
|
+
|
|
313
|
+
in_idx = load_prepared_input_index()
|
|
314
|
+
out_idx = load_prepared_output_index()
|
|
315
|
+
|
|
316
|
+
hits: List[HelpHit] = []
|
|
317
|
+
hits.extend(_search_one_index("input", in_idx, q, q_toks))
|
|
318
|
+
hits.extend(_search_one_index("output", out_idx, q, q_toks))
|
|
319
|
+
|
|
320
|
+
# overall top_k across both
|
|
321
|
+
hits.sort(key=lambda h: h.score, reverse=True)
|
|
322
|
+
hits = [h for h in hits if h.score >= min_score]
|
|
323
|
+
return hits[:top_k]
|
|
324
|
+
|
|
325
|
+
|
|
326
|
+
def _search_one_index(
|
|
327
|
+
kind: str,
|
|
328
|
+
idx: Dict[str, PreparedEntry],
|
|
329
|
+
q: str,
|
|
330
|
+
q_toks: set[str],
|
|
331
|
+
) -> List[HelpHit]:
|
|
332
|
+
"""
|
|
333
|
+
Search a single help index (input or output).
|
|
334
|
+
|
|
335
|
+
Parameters
|
|
336
|
+
----------
|
|
337
|
+
kind : str
|
|
338
|
+
Either ``"input"`` or ``"output"``.
|
|
339
|
+
idx : dict
|
|
340
|
+
Parsed YAML index.
|
|
341
|
+
q : str
|
|
342
|
+
Normalized query string.
|
|
343
|
+
q_toks : set of str
|
|
344
|
+
Tokenized query terms.
|
|
345
|
+
|
|
346
|
+
Returns
|
|
347
|
+
-------
|
|
348
|
+
list of HelpHit
|
|
349
|
+
Ranked matches from the given index.
|
|
350
|
+
"""
|
|
351
|
+
files = idx.get("files", {}) or {}
|
|
352
|
+
if not isinstance(files, dict):
|
|
353
|
+
return []
|
|
354
|
+
|
|
355
|
+
res: List[HelpHit] = []
|
|
356
|
+
|
|
357
|
+
for file_key, prep in idx.items():
|
|
358
|
+
fast_score = 0.0
|
|
359
|
+
|
|
360
|
+
entry = prep.entry
|
|
361
|
+
blobs = prep.blobs
|
|
362
|
+
tokens = prep.tokens
|
|
363
|
+
|
|
364
|
+
score = 0.0
|
|
365
|
+
why: List[str] = []
|
|
366
|
+
|
|
367
|
+
# 1) deterministic boosts
|
|
368
|
+
names_norm = _norm(blobs["names"])
|
|
369
|
+
if q and q in names_norm.split():
|
|
370
|
+
score += 120.0
|
|
371
|
+
why.append("exact file/alias match")
|
|
372
|
+
|
|
373
|
+
# token overlaps (fast and robust)
|
|
374
|
+
def _overlap(field_name: str, weight: float) -> None:
|
|
375
|
+
nonlocal fast_score
|
|
376
|
+
ov = q_toks & tokens[field_name]
|
|
377
|
+
if ov:
|
|
378
|
+
fast_score += weight + 4.0 * len(ov)
|
|
379
|
+
|
|
380
|
+
_overlap("tags", 30.0)
|
|
381
|
+
_overlap("best_for", 22.0)
|
|
382
|
+
_overlap("core", 18.0)
|
|
383
|
+
_overlap("optional", 8.0) # smaller boost; optional-only matches should rank lower
|
|
384
|
+
_overlap("derived", 18.0)
|
|
385
|
+
_overlap("related", 14.0)
|
|
386
|
+
_overlap("desc", 10.0)
|
|
387
|
+
|
|
388
|
+
if fast_score < 10.0:
|
|
389
|
+
continue
|
|
390
|
+
|
|
391
|
+
# 2) fuzzy matching over key fields (weighted)
|
|
392
|
+
score = fast_score
|
|
393
|
+
|
|
394
|
+
score += 0.35 * _fuzzy_ratio(q, blobs["tags"])
|
|
395
|
+
score += 0.30 * _fuzzy_ratio(q, blobs["names"])
|
|
396
|
+
score += 0.22 * _fuzzy_ratio(q, blobs["core"])
|
|
397
|
+
score += 0.10 * _fuzzy_ratio(q, blobs["desc"])
|
|
398
|
+
score += 0.06 * _fuzzy_ratio(q, blobs["optional"])
|
|
399
|
+
score += 0.12 * _fuzzy_ratio(q, blobs["derived"])
|
|
400
|
+
score += 0.04 * _fuzzy_ratio(q, blobs["notes"])
|
|
401
|
+
|
|
402
|
+
# small preference: if it matches best_for strongly, nudge up
|
|
403
|
+
if _fuzzy_ratio(q, blobs["best_for"]) >= 80:
|
|
404
|
+
score += 10.0
|
|
405
|
+
why.append("strong best_for match")
|
|
406
|
+
|
|
407
|
+
# add hit if not totally irrelevant (threshold handled later)
|
|
408
|
+
res.append(HelpHit(kind=kind, file=str(file_key), score=score, why=why, entry=entry))
|
|
409
|
+
|
|
410
|
+
return res
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
def _group_hits(hits: Iterable[HelpHit]) -> Tuple[List[HelpHit], List[HelpHit]]:
|
|
414
|
+
"""
|
|
415
|
+
Split search results into input and output file groups.
|
|
416
|
+
|
|
417
|
+
Parameters
|
|
418
|
+
----------
|
|
419
|
+
hits : iterable of HelpHit
|
|
420
|
+
Search results.
|
|
421
|
+
|
|
422
|
+
Returns
|
|
423
|
+
-------
|
|
424
|
+
tuple of list of HelpHit
|
|
425
|
+
``(input_hits, output_hits)`` sorted by score.
|
|
426
|
+
"""
|
|
427
|
+
|
|
428
|
+
ins = [h for h in hits if h.kind == "input"]
|
|
429
|
+
outs = [h for h in hits if h.kind == "output"]
|
|
430
|
+
ins.sort(key=lambda h: h.score, reverse=True)
|
|
431
|
+
outs.sort(key=lambda h: h.score, reverse=True)
|
|
432
|
+
return ins, outs
|
|
433
|
+
|
|
434
|
+
|
|
435
|
+
def _format_hits(
|
|
436
|
+
hits: List[HelpHit],
|
|
437
|
+
*,
|
|
438
|
+
show_why: bool = True,
|
|
439
|
+
show_examples: bool = False,
|
|
440
|
+
show_tags: bool = False,
|
|
441
|
+
show_core_vars: bool = False,
|
|
442
|
+
show_optional_vars: bool = False,
|
|
443
|
+
show_derived_vars: bool = False,
|
|
444
|
+
show_notes: bool = False,
|
|
445
|
+
) -> str:
|
|
446
|
+
"""
|
|
447
|
+
Format help search results for CLI display.
|
|
448
|
+
|
|
449
|
+
Parameters
|
|
450
|
+
----------
|
|
451
|
+
hits : list of HelpHit
|
|
452
|
+
Search results.
|
|
453
|
+
show_why, show_examples, show_tags, show_core_vars, show_optional_vars, show_derived_vars, show_notes : bool
|
|
454
|
+
Flags controlling which metadata fields are displayed.
|
|
455
|
+
|
|
456
|
+
Returns
|
|
457
|
+
-------
|
|
458
|
+
str
|
|
459
|
+
Human-readable formatted output.
|
|
460
|
+
"""
|
|
461
|
+
in_hits, out_hits = _group_hits(hits)
|
|
462
|
+
|
|
463
|
+
def _fmt_one(h: HelpHit) -> str:
|
|
464
|
+
e = h.entry
|
|
465
|
+
kind_flag = f" --{h.kind}"
|
|
466
|
+
related = e.get("related_runs") or e.get("related_run") or []
|
|
467
|
+
related_list = related if isinstance(related, list) else [related]
|
|
468
|
+
related_str = f" related_run: [{', '.join(related_list)}]" if related_list else ""
|
|
469
|
+
lines = [f"• {h.file}{kind_flag} (score={h.score:.1f}){related_str}"]
|
|
470
|
+
|
|
471
|
+
desc = e.get("desc")
|
|
472
|
+
if desc:
|
|
473
|
+
lines.append(f" {desc}")
|
|
474
|
+
if show_why and h.why:
|
|
475
|
+
lines.append(f" why: {', '.join(h.why[:3])}")
|
|
476
|
+
if show_examples:
|
|
477
|
+
ex = e.get("file_templates") or []
|
|
478
|
+
if ex:
|
|
479
|
+
lines.append(f" ex: {ex[0]}")
|
|
480
|
+
if show_tags:
|
|
481
|
+
tags = e.get("tags") or []
|
|
482
|
+
if tags:
|
|
483
|
+
lines.append(f" tags: {tags}")
|
|
484
|
+
|
|
485
|
+
if show_core_vars:
|
|
486
|
+
xs = e.get("core_vars") or []
|
|
487
|
+
if xs:
|
|
488
|
+
lines.append(" core_vars:")
|
|
489
|
+
lines.extend(f" - {v}" for v in xs)
|
|
490
|
+
|
|
491
|
+
if show_optional_vars:
|
|
492
|
+
xs = e.get("optional_vars") or []
|
|
493
|
+
if xs:
|
|
494
|
+
lines.append(" optional_vars:")
|
|
495
|
+
lines.extend(f" - {v}" for v in xs)
|
|
496
|
+
|
|
497
|
+
if show_derived_vars:
|
|
498
|
+
xs = e.get("derived_vars") or []
|
|
499
|
+
if xs:
|
|
500
|
+
lines.append(" derived_vars:")
|
|
501
|
+
lines.extend(f" - {v}" for v in xs)
|
|
502
|
+
|
|
503
|
+
if show_notes:
|
|
504
|
+
xs = e.get("notes") or []
|
|
505
|
+
if xs:
|
|
506
|
+
lines.append(" notes:")
|
|
507
|
+
lines.extend(f" - {v}" for v in xs)
|
|
508
|
+
|
|
509
|
+
return "\n".join(lines)
|
|
510
|
+
|
|
511
|
+
parts: List[str] = []
|
|
512
|
+
if in_hits:
|
|
513
|
+
parts.append("INPUT FILES")
|
|
514
|
+
parts.extend(_fmt_one(h) for h in in_hits)
|
|
515
|
+
if in_hits and out_hits:
|
|
516
|
+
parts.append("-------------")
|
|
517
|
+
if out_hits:
|
|
518
|
+
parts.append("OUTPUT FILES")
|
|
519
|
+
parts.extend(_fmt_one(h) for h in out_hits)
|
|
520
|
+
|
|
521
|
+
if not parts:
|
|
522
|
+
return "❌ No matches."
|
|
523
|
+
|
|
524
|
+
parts.append("")
|
|
525
|
+
parts.append(
|
|
526
|
+
"Tip: use `reaxkit <filename> -h` or `reaxkit <filename> <task> -h` "
|
|
527
|
+
"to see a more comprehensive description of available options, "
|
|
528
|
+
"file_templates, and usage details.\n"
|
|
529
|
+
)
|
|
530
|
+
|
|
531
|
+
return "\n".join(parts)
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
"""Tools for extracting one-line summaries from modules and their functions/classes.
|
|
2
|
+
|
|
3
|
+
This module is used by the `reaxkit intspec` workflow to:
|
|
4
|
+
- list all .py files under a folder (recursively) with module docstring summaries
|
|
5
|
+
- show a module's docstring and a table of public functions/classes with doc summaries
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import ast
|
|
11
|
+
import os
|
|
12
|
+
import importlib.util
|
|
13
|
+
from dataclasses import dataclass
|
|
14
|
+
from typing import Any, Iterable, List, Optional, Tuple
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
# ---------------------------- helpers ----------------------------
|
|
18
|
+
|
|
19
|
+
def _first_line(s: str | None) -> str:
|
|
20
|
+
if not s:
|
|
21
|
+
return "No description"
|
|
22
|
+
for line in s.strip().splitlines():
|
|
23
|
+
if line.strip():
|
|
24
|
+
return line.strip()
|
|
25
|
+
return "No description"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def _parse_ast_from_file(pyfile: str) -> ast.Module:
|
|
29
|
+
with open(pyfile, "r", encoding="utf-8") as f:
|
|
30
|
+
return ast.parse(f.read(), filename=pyfile)
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
def module_docstring_first_line_from_file(pyfile: str) -> str:
|
|
34
|
+
"""Return first non-empty line of a module's docstring (no import)."""
|
|
35
|
+
try:
|
|
36
|
+
tree = _parse_ast_from_file(pyfile)
|
|
37
|
+
return _first_line(ast.get_docstring(tree))
|
|
38
|
+
except Exception:
|
|
39
|
+
return "No description"
|
|
40
|
+
|
|
41
|
+
|
|
42
|
+
@dataclass(frozen=True)
|
|
43
|
+
class PublicSymbolSummary:
|
|
44
|
+
name: str
|
|
45
|
+
kind: str # "function" | "class"
|
|
46
|
+
summary: str
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
def public_symbols_from_file(pyfile: str) -> List[PublicSymbolSummary]:
|
|
50
|
+
"""Return public functions/classes defined in a .py file using AST (no import)."""
|
|
51
|
+
try:
|
|
52
|
+
tree = _parse_ast_from_file(pyfile)
|
|
53
|
+
except Exception:
|
|
54
|
+
return []
|
|
55
|
+
|
|
56
|
+
out: List[PublicSymbolSummary] = []
|
|
57
|
+
|
|
58
|
+
for node in tree.body:
|
|
59
|
+
if isinstance(node, (ast.FunctionDef, ast.AsyncFunctionDef)):
|
|
60
|
+
name = node.name
|
|
61
|
+
if name.startswith("_"):
|
|
62
|
+
continue
|
|
63
|
+
doc = ast.get_docstring(node)
|
|
64
|
+
out.append(PublicSymbolSummary(name=name, kind="function", summary=_first_line(doc)))
|
|
65
|
+
|
|
66
|
+
elif isinstance(node, ast.ClassDef):
|
|
67
|
+
name = node.name
|
|
68
|
+
if name.startswith("_"):
|
|
69
|
+
continue
|
|
70
|
+
doc = ast.get_docstring(node)
|
|
71
|
+
out.append(PublicSymbolSummary(name=name, kind="class", summary=_first_line(doc)))
|
|
72
|
+
|
|
73
|
+
out.sort(key=lambda r: (r.kind, r.name.lower()))
|
|
74
|
+
return out
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
# ---------------------------- folder scanning ----------------------------
|
|
78
|
+
|
|
79
|
+
def iter_py_files_recursive(
|
|
80
|
+
root_dir: str,
|
|
81
|
+
*,
|
|
82
|
+
skip_private: bool = True,
|
|
83
|
+
skip_init: bool = True,
|
|
84
|
+
skip_dirs: Optional[Iterable[str]] = None,
|
|
85
|
+
) -> List[str]:
|
|
86
|
+
"""Return a sorted list of .py files under root_dir (recursively)."""
|
|
87
|
+
root_dir = os.path.abspath(root_dir)
|
|
88
|
+
skip_dirs = set(skip_dirs or {"__pycache__", ".git", ".venv", "venv", "site-packages", "dist", "build"})
|
|
89
|
+
|
|
90
|
+
hits: List[str] = []
|
|
91
|
+
for cur, dirs, files in os.walk(root_dir):
|
|
92
|
+
# prune directories in-place
|
|
93
|
+
dirs[:] = [d for d in dirs if d not in skip_dirs and not (skip_private and d.startswith("_"))]
|
|
94
|
+
|
|
95
|
+
for fn in files:
|
|
96
|
+
if not fn.endswith(".py"):
|
|
97
|
+
continue
|
|
98
|
+
if skip_init and fn == "__init__.py":
|
|
99
|
+
continue
|
|
100
|
+
if skip_private and fn.startswith("_"):
|
|
101
|
+
continue
|
|
102
|
+
hits.append(os.path.join(cur, fn))
|
|
103
|
+
|
|
104
|
+
hits.sort(key=lambda p: p.lower())
|
|
105
|
+
return hits
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
def list_modules_recursive_with_summaries(pkg_dir: str) -> List[Tuple[str, str]]:
|
|
109
|
+
"""Return [(relative_path, module_docstring_first_line), ...] for all .py files under pkg_dir."""
|
|
110
|
+
rows: List[Tuple[str, str]] = []
|
|
111
|
+
for py in iter_py_files_recursive(pkg_dir):
|
|
112
|
+
rel = os.path.relpath(py, pkg_dir).replace("\\", "/")
|
|
113
|
+
summary = module_docstring_first_line_from_file(py)
|
|
114
|
+
rows.append((rel, summary))
|
|
115
|
+
return rows
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
# ---------------------------- resolving hints ----------------------------
|
|
119
|
+
|
|
120
|
+
def resolve_module_hint_to_file(module_hint: str) -> Optional[str]:
|
|
121
|
+
"""
|
|
122
|
+
Resolve a dotted module name to a filesystem .py path using importlib spec.
|
|
123
|
+
Returns None if it cannot be resolved.
|
|
124
|
+
"""
|
|
125
|
+
try:
|
|
126
|
+
spec = importlib.util.find_spec(module_hint)
|
|
127
|
+
if spec and spec.origin and spec.origin.endswith(".py"):
|
|
128
|
+
return spec.origin
|
|
129
|
+
except Exception:
|
|
130
|
+
return None
|
|
131
|
+
return None
|
reaxkit/io/__init__.py
ADDED
|
File without changes
|