cadence-skill-installer 0.2.11 → 0.2.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json
CHANGED
|
@@ -4,13 +4,28 @@
|
|
|
4
4
|
from __future__ import annotations
|
|
5
5
|
|
|
6
6
|
import argparse
|
|
7
|
+
from difflib import SequenceMatcher
|
|
7
8
|
import json
|
|
9
|
+
import re
|
|
8
10
|
import sys
|
|
9
11
|
from pathlib import Path
|
|
10
12
|
from typing import Any
|
|
11
13
|
|
|
12
14
|
from ideation_research import ResearchAgendaValidationError, normalize_ideation_research, slugify
|
|
13
15
|
|
|
16
|
+
FUZZY_TEXT_FIELDS: tuple[str, ...] = (
|
|
17
|
+
"block.title",
|
|
18
|
+
"block.rationale",
|
|
19
|
+
"block.tags",
|
|
20
|
+
"topic.title",
|
|
21
|
+
"topic.category",
|
|
22
|
+
"topic.why_it_matters",
|
|
23
|
+
"topic.research_questions",
|
|
24
|
+
"topic.keywords",
|
|
25
|
+
"topic.tags",
|
|
26
|
+
)
|
|
27
|
+
TOKEN_PATTERN = re.compile(r"[a-z0-9]+")
|
|
28
|
+
|
|
14
29
|
|
|
15
30
|
def parse_args() -> argparse.Namespace:
|
|
16
31
|
parser = argparse.ArgumentParser(
|
|
@@ -28,6 +43,25 @@ def parse_args() -> argparse.Namespace:
|
|
|
28
43
|
parser.add_argument("--tag", help="Filter by topic or block tag")
|
|
29
44
|
parser.add_argument("--priority", choices=["high", "medium", "low"], help="Filter by priority")
|
|
30
45
|
parser.add_argument("--text", help="Case-insensitive text search across topic and block fields")
|
|
46
|
+
parser.add_argument(
|
|
47
|
+
"--fuzzy-text",
|
|
48
|
+
action="store_true",
|
|
49
|
+
help="Enable fuzzy matching for --text instead of strict substring matching",
|
|
50
|
+
)
|
|
51
|
+
parser.add_argument(
|
|
52
|
+
"--fuzzy-threshold",
|
|
53
|
+
type=float,
|
|
54
|
+
default=0.72,
|
|
55
|
+
help="Fuzzy score threshold between 0.0 and 1.0 (default: 0.72)",
|
|
56
|
+
)
|
|
57
|
+
parser.add_argument(
|
|
58
|
+
"--fuzzy-fields",
|
|
59
|
+
help=(
|
|
60
|
+
"Comma-separated fuzzy field paths. "
|
|
61
|
+
f"Supported: {', '.join(FUZZY_TEXT_FIELDS)}. "
|
|
62
|
+
"If omitted, all supported fields are searched."
|
|
63
|
+
),
|
|
64
|
+
)
|
|
31
65
|
parser.add_argument(
|
|
32
66
|
"--include-related",
|
|
33
67
|
action="store_true",
|
|
@@ -56,6 +90,111 @@ def _lower(value: Any) -> str:
|
|
|
56
90
|
return str(value).strip().lower()
|
|
57
91
|
|
|
58
92
|
|
|
93
|
+
def _field_text(value: Any) -> str:
|
|
94
|
+
if value is None:
|
|
95
|
+
return ""
|
|
96
|
+
if isinstance(value, str):
|
|
97
|
+
return value.strip()
|
|
98
|
+
if isinstance(value, (list, tuple, set)):
|
|
99
|
+
return " ".join(part for part in (_field_text(item) for item in value) if part)
|
|
100
|
+
return str(value).strip()
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
def _entry_field_map(entry: dict[str, Any]) -> dict[str, str]:
|
|
104
|
+
topic = entry.get("topic", {})
|
|
105
|
+
return {
|
|
106
|
+
"block.title": _field_text(entry.get("block_title", "")),
|
|
107
|
+
"block.rationale": _field_text(entry.get("block_rationale", "")),
|
|
108
|
+
"block.tags": _field_text(entry.get("block_tags", [])),
|
|
109
|
+
"topic.title": _field_text(topic.get("title", "")),
|
|
110
|
+
"topic.category": _field_text(topic.get("category", "")),
|
|
111
|
+
"topic.why_it_matters": _field_text(topic.get("why_it_matters", "")),
|
|
112
|
+
"topic.research_questions": _field_text(topic.get("research_questions", [])),
|
|
113
|
+
"topic.keywords": _field_text(topic.get("keywords", [])),
|
|
114
|
+
"topic.tags": _field_text(topic.get("tags", [])),
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
|
|
118
|
+
def _parse_fuzzy_fields(raw_fields: str | None) -> list[str]:
|
|
119
|
+
if not raw_fields:
|
|
120
|
+
return []
|
|
121
|
+
fields = [value.strip() for value in raw_fields.split(",") if value.strip()]
|
|
122
|
+
if not fields:
|
|
123
|
+
raise ValueError("FUZZY_FIELDS_EMPTY")
|
|
124
|
+
|
|
125
|
+
invalid_fields = sorted({field for field in fields if field not in FUZZY_TEXT_FIELDS})
|
|
126
|
+
if invalid_fields:
|
|
127
|
+
supported = ", ".join(FUZZY_TEXT_FIELDS)
|
|
128
|
+
invalid = ", ".join(invalid_fields)
|
|
129
|
+
raise ValueError(f"UNKNOWN_FUZZY_FIELDS: {invalid}. Supported fields: {supported}")
|
|
130
|
+
|
|
131
|
+
unique_fields: list[str] = []
|
|
132
|
+
for field in fields:
|
|
133
|
+
if field not in unique_fields:
|
|
134
|
+
unique_fields.append(field)
|
|
135
|
+
return unique_fields
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def _tokenize(value: str) -> list[str]:
|
|
139
|
+
return TOKEN_PATTERN.findall(_lower(value))
|
|
140
|
+
|
|
141
|
+
|
|
142
|
+
def _token_overlap_ratio(query: str, candidate: str) -> float:
|
|
143
|
+
query_tokens = set(_tokenize(query))
|
|
144
|
+
candidate_tokens = set(_tokenize(candidate))
|
|
145
|
+
if not query_tokens or not candidate_tokens:
|
|
146
|
+
return 0.0
|
|
147
|
+
return len(query_tokens & candidate_tokens) / float(len(query_tokens))
|
|
148
|
+
|
|
149
|
+
|
|
150
|
+
def _fuzzy_score(query: str, candidate: str) -> float:
|
|
151
|
+
query_norm = _lower(query)
|
|
152
|
+
candidate_norm = _lower(candidate)
|
|
153
|
+
if not query_norm or not candidate_norm:
|
|
154
|
+
return 0.0
|
|
155
|
+
if query_norm in candidate_norm:
|
|
156
|
+
return 1.0
|
|
157
|
+
|
|
158
|
+
best = max(
|
|
159
|
+
SequenceMatcher(None, query_norm, candidate_norm).ratio(),
|
|
160
|
+
_token_overlap_ratio(query_norm, candidate_norm),
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
candidate_tokens = _tokenize(candidate_norm)
|
|
164
|
+
query_token_count = max(1, len(_tokenize(query_norm)))
|
|
165
|
+
max_span = min(len(candidate_tokens), max(query_token_count + 1, 3))
|
|
166
|
+
for span in range(1, max_span + 1):
|
|
167
|
+
for start in range(0, len(candidate_tokens) - span + 1):
|
|
168
|
+
phrase = " ".join(candidate_tokens[start : start + span])
|
|
169
|
+
score = SequenceMatcher(None, query_norm, phrase).ratio()
|
|
170
|
+
if score > best:
|
|
171
|
+
best = score
|
|
172
|
+
return best
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def _fuzzy_text_match(
|
|
176
|
+
query: str,
|
|
177
|
+
entry: dict[str, Any],
|
|
178
|
+
*,
|
|
179
|
+
threshold: float,
|
|
180
|
+
fields: list[str],
|
|
181
|
+
) -> tuple[bool, float, list[str]]:
|
|
182
|
+
field_map = _entry_field_map(entry)
|
|
183
|
+
target_fields = fields or list(FUZZY_TEXT_FIELDS)
|
|
184
|
+
|
|
185
|
+
best_score = 0.0
|
|
186
|
+
matched_fields: list[str] = []
|
|
187
|
+
for field in target_fields:
|
|
188
|
+
candidate = field_map.get(field, "")
|
|
189
|
+
score = _fuzzy_score(query, candidate)
|
|
190
|
+
if score > best_score:
|
|
191
|
+
best_score = score
|
|
192
|
+
if score >= threshold:
|
|
193
|
+
matched_fields.append(field)
|
|
194
|
+
|
|
195
|
+
return best_score >= threshold, best_score, sorted(set(matched_fields))
|
|
196
|
+
|
|
197
|
+
|
|
59
198
|
def _searchable_text(block: dict[str, Any], topic: dict[str, Any]) -> str:
|
|
60
199
|
fields = [
|
|
61
200
|
topic.get("title", ""),
|
|
@@ -99,6 +238,21 @@ def main() -> int:
|
|
|
99
238
|
args = parse_args()
|
|
100
239
|
payload_path = Path(args.file)
|
|
101
240
|
|
|
241
|
+
if not 0.0 <= args.fuzzy_threshold <= 1.0:
|
|
242
|
+
print("INVALID_FUZZY_THRESHOLD: must be between 0.0 and 1.0", file=sys.stderr)
|
|
243
|
+
return 2
|
|
244
|
+
if args.fuzzy_text and not args.text:
|
|
245
|
+
print("FUZZY_TEXT_REQUIRES_TEXT_FILTER: provide --text when using --fuzzy-text", file=sys.stderr)
|
|
246
|
+
return 2
|
|
247
|
+
if args.fuzzy_fields and not args.fuzzy_text:
|
|
248
|
+
print("FUZZY_FIELDS_REQUIRES_FUZZY_TEXT: use --fuzzy-text with --fuzzy-fields", file=sys.stderr)
|
|
249
|
+
return 2
|
|
250
|
+
try:
|
|
251
|
+
fuzzy_fields = _parse_fuzzy_fields(args.fuzzy_fields)
|
|
252
|
+
except ValueError as exc:
|
|
253
|
+
print(str(exc), file=sys.stderr)
|
|
254
|
+
return 2
|
|
255
|
+
|
|
102
256
|
try:
|
|
103
257
|
ideation_payload, source_type = read_payload(payload_path)
|
|
104
258
|
except ValueError as exc:
|
|
@@ -168,6 +322,7 @@ def main() -> int:
|
|
|
168
322
|
)
|
|
169
323
|
|
|
170
324
|
matched_topics: list[dict[str, Any]] = []
|
|
325
|
+
fuzzy_match_meta: dict[str, dict[str, Any]] = {}
|
|
171
326
|
for entry in flat_topics:
|
|
172
327
|
topic = entry["topic"]
|
|
173
328
|
topic_id = str(topic.get("topic_id", "")).strip()
|
|
@@ -189,15 +344,29 @@ def main() -> int:
|
|
|
189
344
|
if tag_value not in topic_tags and tag_value not in block_tags:
|
|
190
345
|
continue
|
|
191
346
|
if args.text:
|
|
192
|
-
if
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
347
|
+
if args.fuzzy_text:
|
|
348
|
+
is_match, score, matched_fields = _fuzzy_text_match(
|
|
349
|
+
args.text,
|
|
350
|
+
entry,
|
|
351
|
+
threshold=args.fuzzy_threshold,
|
|
352
|
+
fields=fuzzy_fields,
|
|
353
|
+
)
|
|
354
|
+
if not is_match:
|
|
355
|
+
continue
|
|
356
|
+
fuzzy_match_meta[topic_id] = {
|
|
357
|
+
"score": round(score, 4),
|
|
358
|
+
"matched_fields": matched_fields,
|
|
359
|
+
}
|
|
360
|
+
else:
|
|
361
|
+
if _lower(args.text) not in _searchable_text(
|
|
362
|
+
{
|
|
363
|
+
"title": entry["block_title"],
|
|
364
|
+
"rationale": entry["block_rationale"],
|
|
365
|
+
"tags": entry["block_tags"],
|
|
366
|
+
},
|
|
367
|
+
topic,
|
|
368
|
+
):
|
|
369
|
+
continue
|
|
201
370
|
|
|
202
371
|
matched_topics.append(entry)
|
|
203
372
|
|
|
@@ -284,6 +453,11 @@ def main() -> int:
|
|
|
284
453
|
related_entities.append(entity)
|
|
285
454
|
topic_payload["related_entity_details"] = related_entities
|
|
286
455
|
|
|
456
|
+
if args.fuzzy_text and args.text:
|
|
457
|
+
fuzzy_metadata = fuzzy_match_meta.get(str(topic_payload.get("topic_id", "")).strip())
|
|
458
|
+
if fuzzy_metadata is not None:
|
|
459
|
+
topic_payload["fuzzy_match"] = fuzzy_metadata
|
|
460
|
+
|
|
287
461
|
topics_result.append(topic_payload)
|
|
288
462
|
|
|
289
463
|
related_payload: dict[str, Any] = {}
|
|
@@ -319,6 +493,9 @@ def main() -> int:
|
|
|
319
493
|
"tag": args.tag or None,
|
|
320
494
|
"priority": args.priority or None,
|
|
321
495
|
"text": args.text or None,
|
|
496
|
+
"fuzzy_text": bool(args.fuzzy_text),
|
|
497
|
+
"fuzzy_threshold": args.fuzzy_threshold if args.fuzzy_text else None,
|
|
498
|
+
"fuzzy_fields": (fuzzy_fields or list(FUZZY_TEXT_FIELDS)) if args.fuzzy_text else None,
|
|
322
499
|
"include_related": bool(args.include_related),
|
|
323
500
|
},
|
|
324
501
|
"summary": {
|
|
@@ -333,6 +510,9 @@ def main() -> int:
|
|
|
333
510
|
},
|
|
334
511
|
}
|
|
335
512
|
|
|
513
|
+
if args.fuzzy_text and fuzzy_match_meta:
|
|
514
|
+
response["summary"]["best_fuzzy_score"] = max(meta["score"] for meta in fuzzy_match_meta.values())
|
|
515
|
+
|
|
336
516
|
if related_payload:
|
|
337
517
|
response["related"] = related_payload
|
|
338
518
|
|
|
@@ -0,0 +1,294 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""General-purpose fuzzy search over JSON scalar fields.
|
|
3
|
+
|
|
4
|
+
This script is intentionally standalone and not wired into any existing Cadence flow.
|
|
5
|
+
It searches recursively across arbitrary JSON structures and can target specific fields
|
|
6
|
+
using key/path patterns. Identifier-like keys are excluded by default.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
import argparse
|
|
12
|
+
from difflib import SequenceMatcher
|
|
13
|
+
from fnmatch import fnmatchcase
|
|
14
|
+
import json
|
|
15
|
+
import re
|
|
16
|
+
import sys
|
|
17
|
+
from pathlib import Path
|
|
18
|
+
from typing import Any
|
|
19
|
+
|
|
20
|
+
TOKEN_PATTERN = re.compile(r"[a-z0-9]+")
|
|
21
|
+
IDENTIFIER_KEY_PATTERN = re.compile(
|
|
22
|
+
r"(?:^|[_-])(id|ids|uuid|guid|slug|slugs|identifier|identifiers|key|keys|token|tokens|hash|checksum|fingerprint|ref|refs|code|codes|path|paths|url|urls|uri|uris|file|files|filepath|filepaths)$",
|
|
23
|
+
re.IGNORECASE,
|
|
24
|
+
)
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def parse_args() -> argparse.Namespace:
|
|
28
|
+
parser = argparse.ArgumentParser(
|
|
29
|
+
description="Fuzzy-search JSON scalar fields (all current/future fields by default)."
|
|
30
|
+
)
|
|
31
|
+
parser.add_argument(
|
|
32
|
+
"--file",
|
|
33
|
+
default=str(Path(".cadence") / "cadence.json"),
|
|
34
|
+
help="Path to any JSON file (default: .cadence/cadence.json)",
|
|
35
|
+
)
|
|
36
|
+
parser.add_argument("--text", required=True, help="Query text to fuzzy-match")
|
|
37
|
+
parser.add_argument(
|
|
38
|
+
"--threshold",
|
|
39
|
+
type=float,
|
|
40
|
+
default=0.72,
|
|
41
|
+
help="Minimum fuzzy score between 0.0 and 1.0 (default: 0.72)",
|
|
42
|
+
)
|
|
43
|
+
parser.add_argument(
|
|
44
|
+
"--limit",
|
|
45
|
+
type=int,
|
|
46
|
+
default=25,
|
|
47
|
+
help="Maximum matches returned, sorted by score descending (default: 25)",
|
|
48
|
+
)
|
|
49
|
+
parser.add_argument(
|
|
50
|
+
"--field",
|
|
51
|
+
action="append",
|
|
52
|
+
default=[],
|
|
53
|
+
help=(
|
|
54
|
+
"Include only matching fields (repeatable, supports * wildcard). "
|
|
55
|
+
"Matches against full path and terminal key. Example: --field 'ideation.*.title' --field title"
|
|
56
|
+
),
|
|
57
|
+
)
|
|
58
|
+
parser.add_argument(
|
|
59
|
+
"--exclude-field",
|
|
60
|
+
action="append",
|
|
61
|
+
default=[],
|
|
62
|
+
help=(
|
|
63
|
+
"Exclude matching fields (repeatable, supports * wildcard). "
|
|
64
|
+
"Matches against full path and terminal key."
|
|
65
|
+
),
|
|
66
|
+
)
|
|
67
|
+
parser.add_argument(
|
|
68
|
+
"--include-identifiers",
|
|
69
|
+
action="store_true",
|
|
70
|
+
help="Include normally excluded technical keys (ids, uuid, slug, key, path, url, file, etc.)",
|
|
71
|
+
)
|
|
72
|
+
parser.add_argument(
|
|
73
|
+
"--include-non-string",
|
|
74
|
+
action="store_true",
|
|
75
|
+
help="Also search numbers/booleans (strings are always searched)",
|
|
76
|
+
)
|
|
77
|
+
parser.add_argument(
|
|
78
|
+
"--min-length",
|
|
79
|
+
type=int,
|
|
80
|
+
default=2,
|
|
81
|
+
help="Skip string values shorter than this length (default: 2)",
|
|
82
|
+
)
|
|
83
|
+
return parser.parse_args()
|
|
84
|
+
|
|
85
|
+
|
|
86
|
+
def _lower(value: Any) -> str:
|
|
87
|
+
return str(value).strip().lower()
|
|
88
|
+
|
|
89
|
+
|
|
90
|
+
def _normalize_key(key: str) -> str:
|
|
91
|
+
# Convert camelCase to snake-like form before identifier checks.
|
|
92
|
+
snakeish = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", str(key))
|
|
93
|
+
return _lower(snakeish)
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
def _is_identifier_key(key: str) -> bool:
|
|
97
|
+
normalized = _normalize_key(key)
|
|
98
|
+
return bool(IDENTIFIER_KEY_PATTERN.search(normalized))
|
|
99
|
+
|
|
100
|
+
|
|
101
|
+
def _normalize_patterns(values: list[str]) -> list[str]:
|
|
102
|
+
patterns: list[str] = []
|
|
103
|
+
for raw_value in values:
|
|
104
|
+
for part in str(raw_value).split(","):
|
|
105
|
+
candidate = _lower(part)
|
|
106
|
+
if candidate and candidate not in patterns:
|
|
107
|
+
patterns.append(candidate)
|
|
108
|
+
return patterns
|
|
109
|
+
|
|
110
|
+
|
|
111
|
+
def _path_or_key_matches(patterns: list[str], path: str, key: str) -> bool:
|
|
112
|
+
path_norm = _lower(path)
|
|
113
|
+
key_norm = _normalize_key(key)
|
|
114
|
+
return any(fnmatchcase(path_norm, pattern) or fnmatchcase(key_norm, pattern) for pattern in patterns)
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def _tokenize(value: str) -> list[str]:
|
|
118
|
+
return TOKEN_PATTERN.findall(_lower(value))
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def _token_overlap_ratio(query: str, candidate: str) -> float:
|
|
122
|
+
query_tokens = set(_tokenize(query))
|
|
123
|
+
candidate_tokens = set(_tokenize(candidate))
|
|
124
|
+
if not query_tokens or not candidate_tokens:
|
|
125
|
+
return 0.0
|
|
126
|
+
return len(query_tokens & candidate_tokens) / float(len(query_tokens))
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
def _fuzzy_score(query: str, candidate: str) -> float:
|
|
130
|
+
query_norm = _lower(query)
|
|
131
|
+
candidate_norm = _lower(candidate)
|
|
132
|
+
if not query_norm or not candidate_norm:
|
|
133
|
+
return 0.0
|
|
134
|
+
if query_norm in candidate_norm:
|
|
135
|
+
return 1.0
|
|
136
|
+
|
|
137
|
+
best = max(
|
|
138
|
+
SequenceMatcher(None, query_norm, candidate_norm).ratio(),
|
|
139
|
+
_token_overlap_ratio(query_norm, candidate_norm),
|
|
140
|
+
)
|
|
141
|
+
|
|
142
|
+
candidate_tokens = _tokenize(candidate_norm)
|
|
143
|
+
query_token_count = max(1, len(_tokenize(query_norm)))
|
|
144
|
+
max_span = min(len(candidate_tokens), max(query_token_count + 1, 3))
|
|
145
|
+
for span in range(1, max_span + 1):
|
|
146
|
+
for start in range(0, len(candidate_tokens) - span + 1):
|
|
147
|
+
phrase = " ".join(candidate_tokens[start : start + span])
|
|
148
|
+
score = SequenceMatcher(None, query_norm, phrase).ratio()
|
|
149
|
+
if score > best:
|
|
150
|
+
best = score
|
|
151
|
+
return best
|
|
152
|
+
|
|
153
|
+
|
|
154
|
+
def _preview(value: str, limit: int = 220) -> str:
|
|
155
|
+
text = value.strip()
|
|
156
|
+
if len(text) <= limit:
|
|
157
|
+
return text
|
|
158
|
+
return text[: limit - 3] + "..."
|
|
159
|
+
|
|
160
|
+
|
|
161
|
+
def _iter_scalar_candidates(node: Any, *, path: str = "", key: str = ""):
|
|
162
|
+
if isinstance(node, dict):
|
|
163
|
+
for child_key, child_value in node.items():
|
|
164
|
+
child_key_text = str(child_key)
|
|
165
|
+
child_path = f"{path}.{child_key_text}" if path else child_key_text
|
|
166
|
+
yield from _iter_scalar_candidates(child_value, path=child_path, key=child_key_text)
|
|
167
|
+
return
|
|
168
|
+
|
|
169
|
+
if isinstance(node, list):
|
|
170
|
+
for index, item in enumerate(node):
|
|
171
|
+
child_path = f"{path}[{index}]"
|
|
172
|
+
yield from _iter_scalar_candidates(item, path=child_path, key=key)
|
|
173
|
+
return
|
|
174
|
+
|
|
175
|
+
yield {
|
|
176
|
+
"path": path,
|
|
177
|
+
"key": key,
|
|
178
|
+
"value": node,
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
|
|
182
|
+
def _load_json(path: Path) -> Any:
|
|
183
|
+
try:
|
|
184
|
+
return json.loads(path.read_text(encoding="utf-8"))
|
|
185
|
+
except OSError as exc:
|
|
186
|
+
raise ValueError(f"PAYLOAD_READ_FAILED: {exc}") from exc
|
|
187
|
+
except json.JSONDecodeError as exc:
|
|
188
|
+
raise ValueError(f"INVALID_PAYLOAD_JSON: {exc}") from exc
|
|
189
|
+
|
|
190
|
+
|
|
191
|
+
def main() -> int:
|
|
192
|
+
args = parse_args()
|
|
193
|
+
payload_path = Path(args.file)
|
|
194
|
+
|
|
195
|
+
if not 0.0 <= args.threshold <= 1.0:
|
|
196
|
+
print("INVALID_THRESHOLD: must be between 0.0 and 1.0", file=sys.stderr)
|
|
197
|
+
return 2
|
|
198
|
+
if args.limit < 1:
|
|
199
|
+
print("INVALID_LIMIT: must be >= 1", file=sys.stderr)
|
|
200
|
+
return 2
|
|
201
|
+
if args.min_length < 0:
|
|
202
|
+
print("INVALID_MIN_LENGTH: must be >= 0", file=sys.stderr)
|
|
203
|
+
return 2
|
|
204
|
+
|
|
205
|
+
include_patterns = _normalize_patterns(args.field)
|
|
206
|
+
exclude_patterns = _normalize_patterns(args.exclude_field)
|
|
207
|
+
|
|
208
|
+
try:
|
|
209
|
+
payload = _load_json(payload_path)
|
|
210
|
+
except ValueError as exc:
|
|
211
|
+
print(str(exc), file=sys.stderr)
|
|
212
|
+
return 2
|
|
213
|
+
|
|
214
|
+
candidates_scanned = 0
|
|
215
|
+
candidates_considered = 0
|
|
216
|
+
matches: list[dict[str, Any]] = []
|
|
217
|
+
|
|
218
|
+
for candidate in _iter_scalar_candidates(payload):
|
|
219
|
+
candidates_scanned += 1
|
|
220
|
+
field_path = str(candidate["path"])
|
|
221
|
+
field_key = str(candidate["key"])
|
|
222
|
+
|
|
223
|
+
if not args.include_identifiers and _is_identifier_key(field_key):
|
|
224
|
+
continue
|
|
225
|
+
if include_patterns and not _path_or_key_matches(include_patterns, field_path, field_key):
|
|
226
|
+
continue
|
|
227
|
+
if exclude_patterns and _path_or_key_matches(exclude_patterns, field_path, field_key):
|
|
228
|
+
continue
|
|
229
|
+
|
|
230
|
+
raw_value = candidate["value"]
|
|
231
|
+
if isinstance(raw_value, str):
|
|
232
|
+
text_value = raw_value.strip()
|
|
233
|
+
value_type = "string"
|
|
234
|
+
elif args.include_non_string and isinstance(raw_value, (int, float, bool)):
|
|
235
|
+
text_value = str(raw_value)
|
|
236
|
+
value_type = type(raw_value).__name__
|
|
237
|
+
else:
|
|
238
|
+
continue
|
|
239
|
+
|
|
240
|
+
if len(text_value) < args.min_length:
|
|
241
|
+
continue
|
|
242
|
+
|
|
243
|
+
candidates_considered += 1
|
|
244
|
+
score = _fuzzy_score(args.text, text_value)
|
|
245
|
+
if score < args.threshold:
|
|
246
|
+
continue
|
|
247
|
+
|
|
248
|
+
matches.append(
|
|
249
|
+
{
|
|
250
|
+
"path": field_path,
|
|
251
|
+
"field": field_key,
|
|
252
|
+
"value_type": value_type,
|
|
253
|
+
"score": round(score, 4),
|
|
254
|
+
"value_preview": _preview(text_value),
|
|
255
|
+
}
|
|
256
|
+
)
|
|
257
|
+
|
|
258
|
+
matches_sorted = sorted(
|
|
259
|
+
matches,
|
|
260
|
+
key=lambda item: (-item["score"], item["path"]),
|
|
261
|
+
)
|
|
262
|
+
results = matches_sorted[: args.limit]
|
|
263
|
+
|
|
264
|
+
response: dict[str, Any] = {
|
|
265
|
+
"status": "ok",
|
|
266
|
+
"path": str(payload_path),
|
|
267
|
+
"query": {
|
|
268
|
+
"text": args.text,
|
|
269
|
+
"threshold": args.threshold,
|
|
270
|
+
"limit": args.limit,
|
|
271
|
+
"field": include_patterns or None,
|
|
272
|
+
"exclude_field": exclude_patterns or None,
|
|
273
|
+
"include_identifiers": bool(args.include_identifiers),
|
|
274
|
+
"include_non_string": bool(args.include_non_string),
|
|
275
|
+
"min_length": args.min_length,
|
|
276
|
+
},
|
|
277
|
+
"summary": {
|
|
278
|
+
"candidates_scanned": candidates_scanned,
|
|
279
|
+
"candidates_considered": candidates_considered,
|
|
280
|
+
"matches_before_limit": len(matches_sorted),
|
|
281
|
+
"matches_returned": len(results),
|
|
282
|
+
},
|
|
283
|
+
"results": results,
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
if results:
|
|
287
|
+
response["summary"]["best_score"] = max(item["score"] for item in results)
|
|
288
|
+
|
|
289
|
+
print(json.dumps(response, indent=4))
|
|
290
|
+
return 0
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
if __name__ == "__main__":
|
|
294
|
+
raise SystemExit(main())
|
|
@@ -35,6 +35,8 @@ description: Guide users from a rough concept to a fully defined project idea th
|
|
|
35
35
|
- scope boundaries (in-scope vs out-of-scope)
|
|
36
36
|
- implementation approach (for example tools, tech stack, process, platforms)
|
|
37
37
|
- delivery shape (milestones, sequencing, constraints, risks, success signals)
|
|
38
|
+
- assume execution is AI-driven by default; if timeline expectations are discussed, calibrate estimates to roughly 10-100x faster than human-only delivery.
|
|
39
|
+
- do not force timeline-specific prompts just to apply this assumption.
|
|
38
40
|
13. Build a complete later-phase research agenda from the ideation conversation:
|
|
39
41
|
- Infer all relevant research topics that should be explored in later phases.
|
|
40
42
|
- Keep the agenda domain-agnostic and driven by what the user discussed.
|