cadence-skill-installer 0.2.11 → 0.2.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "cadence-skill-installer",
3
- "version": "0.2.11",
3
+ "version": "0.2.13",
4
4
  "description": "Install the Cadence skill into supported AI tool skill directories.",
5
5
  "repository": "https://github.com/snowdamiz/cadence",
6
6
  "private": false,
@@ -4,13 +4,28 @@
4
4
  from __future__ import annotations
5
5
 
6
6
  import argparse
7
+ from difflib import SequenceMatcher
7
8
  import json
9
+ import re
8
10
  import sys
9
11
  from pathlib import Path
10
12
  from typing import Any
11
13
 
12
14
  from ideation_research import ResearchAgendaValidationError, normalize_ideation_research, slugify
13
15
 
16
+ FUZZY_TEXT_FIELDS: tuple[str, ...] = (
17
+ "block.title",
18
+ "block.rationale",
19
+ "block.tags",
20
+ "topic.title",
21
+ "topic.category",
22
+ "topic.why_it_matters",
23
+ "topic.research_questions",
24
+ "topic.keywords",
25
+ "topic.tags",
26
+ )
27
+ TOKEN_PATTERN = re.compile(r"[a-z0-9]+")
28
+
14
29
 
15
30
  def parse_args() -> argparse.Namespace:
16
31
  parser = argparse.ArgumentParser(
@@ -28,6 +43,25 @@ def parse_args() -> argparse.Namespace:
28
43
  parser.add_argument("--tag", help="Filter by topic or block tag")
29
44
  parser.add_argument("--priority", choices=["high", "medium", "low"], help="Filter by priority")
30
45
  parser.add_argument("--text", help="Case-insensitive text search across topic and block fields")
46
+ parser.add_argument(
47
+ "--fuzzy-text",
48
+ action="store_true",
49
+ help="Enable fuzzy matching for --text instead of strict substring matching",
50
+ )
51
+ parser.add_argument(
52
+ "--fuzzy-threshold",
53
+ type=float,
54
+ default=0.72,
55
+ help="Fuzzy score threshold between 0.0 and 1.0 (default: 0.72)",
56
+ )
57
+ parser.add_argument(
58
+ "--fuzzy-fields",
59
+ help=(
60
+ "Comma-separated fuzzy field paths. "
61
+ f"Supported: {', '.join(FUZZY_TEXT_FIELDS)}. "
62
+ "If omitted, all supported fields are searched."
63
+ ),
64
+ )
31
65
  parser.add_argument(
32
66
  "--include-related",
33
67
  action="store_true",
@@ -56,6 +90,111 @@ def _lower(value: Any) -> str:
56
90
  return str(value).strip().lower()
57
91
 
58
92
 
93
+ def _field_text(value: Any) -> str:
94
+ if value is None:
95
+ return ""
96
+ if isinstance(value, str):
97
+ return value.strip()
98
+ if isinstance(value, (list, tuple, set)):
99
+ return " ".join(part for part in (_field_text(item) for item in value) if part)
100
+ return str(value).strip()
101
+
102
+
103
+ def _entry_field_map(entry: dict[str, Any]) -> dict[str, str]:
104
+ topic = entry.get("topic", {})
105
+ return {
106
+ "block.title": _field_text(entry.get("block_title", "")),
107
+ "block.rationale": _field_text(entry.get("block_rationale", "")),
108
+ "block.tags": _field_text(entry.get("block_tags", [])),
109
+ "topic.title": _field_text(topic.get("title", "")),
110
+ "topic.category": _field_text(topic.get("category", "")),
111
+ "topic.why_it_matters": _field_text(topic.get("why_it_matters", "")),
112
+ "topic.research_questions": _field_text(topic.get("research_questions", [])),
113
+ "topic.keywords": _field_text(topic.get("keywords", [])),
114
+ "topic.tags": _field_text(topic.get("tags", [])),
115
+ }
116
+
117
+
118
+ def _parse_fuzzy_fields(raw_fields: str | None) -> list[str]:
119
+ if not raw_fields:
120
+ return []
121
+ fields = [value.strip() for value in raw_fields.split(",") if value.strip()]
122
+ if not fields:
123
+ raise ValueError("FUZZY_FIELDS_EMPTY")
124
+
125
+ invalid_fields = sorted({field for field in fields if field not in FUZZY_TEXT_FIELDS})
126
+ if invalid_fields:
127
+ supported = ", ".join(FUZZY_TEXT_FIELDS)
128
+ invalid = ", ".join(invalid_fields)
129
+ raise ValueError(f"UNKNOWN_FUZZY_FIELDS: {invalid}. Supported fields: {supported}")
130
+
131
+ unique_fields: list[str] = []
132
+ for field in fields:
133
+ if field not in unique_fields:
134
+ unique_fields.append(field)
135
+ return unique_fields
136
+
137
+
138
+ def _tokenize(value: str) -> list[str]:
139
+ return TOKEN_PATTERN.findall(_lower(value))
140
+
141
+
142
+ def _token_overlap_ratio(query: str, candidate: str) -> float:
143
+ query_tokens = set(_tokenize(query))
144
+ candidate_tokens = set(_tokenize(candidate))
145
+ if not query_tokens or not candidate_tokens:
146
+ return 0.0
147
+ return len(query_tokens & candidate_tokens) / float(len(query_tokens))
148
+
149
+
150
+ def _fuzzy_score(query: str, candidate: str) -> float:
151
+ query_norm = _lower(query)
152
+ candidate_norm = _lower(candidate)
153
+ if not query_norm or not candidate_norm:
154
+ return 0.0
155
+ if query_norm in candidate_norm:
156
+ return 1.0
157
+
158
+ best = max(
159
+ SequenceMatcher(None, query_norm, candidate_norm).ratio(),
160
+ _token_overlap_ratio(query_norm, candidate_norm),
161
+ )
162
+
163
+ candidate_tokens = _tokenize(candidate_norm)
164
+ query_token_count = max(1, len(_tokenize(query_norm)))
165
+ max_span = min(len(candidate_tokens), max(query_token_count + 1, 3))
166
+ for span in range(1, max_span + 1):
167
+ for start in range(0, len(candidate_tokens) - span + 1):
168
+ phrase = " ".join(candidate_tokens[start : start + span])
169
+ score = SequenceMatcher(None, query_norm, phrase).ratio()
170
+ if score > best:
171
+ best = score
172
+ return best
173
+
174
+
175
+ def _fuzzy_text_match(
176
+ query: str,
177
+ entry: dict[str, Any],
178
+ *,
179
+ threshold: float,
180
+ fields: list[str],
181
+ ) -> tuple[bool, float, list[str]]:
182
+ field_map = _entry_field_map(entry)
183
+ target_fields = fields or list(FUZZY_TEXT_FIELDS)
184
+
185
+ best_score = 0.0
186
+ matched_fields: list[str] = []
187
+ for field in target_fields:
188
+ candidate = field_map.get(field, "")
189
+ score = _fuzzy_score(query, candidate)
190
+ if score > best_score:
191
+ best_score = score
192
+ if score >= threshold:
193
+ matched_fields.append(field)
194
+
195
+ return best_score >= threshold, best_score, sorted(set(matched_fields))
196
+
197
+
59
198
  def _searchable_text(block: dict[str, Any], topic: dict[str, Any]) -> str:
60
199
  fields = [
61
200
  topic.get("title", ""),
@@ -99,6 +238,21 @@ def main() -> int:
99
238
  args = parse_args()
100
239
  payload_path = Path(args.file)
101
240
 
241
+ if not 0.0 <= args.fuzzy_threshold <= 1.0:
242
+ print("INVALID_FUZZY_THRESHOLD: must be between 0.0 and 1.0", file=sys.stderr)
243
+ return 2
244
+ if args.fuzzy_text and not args.text:
245
+ print("FUZZY_TEXT_REQUIRES_TEXT_FILTER: provide --text when using --fuzzy-text", file=sys.stderr)
246
+ return 2
247
+ if args.fuzzy_fields and not args.fuzzy_text:
248
+ print("FUZZY_FIELDS_REQUIRES_FUZZY_TEXT: use --fuzzy-text with --fuzzy-fields", file=sys.stderr)
249
+ return 2
250
+ try:
251
+ fuzzy_fields = _parse_fuzzy_fields(args.fuzzy_fields)
252
+ except ValueError as exc:
253
+ print(str(exc), file=sys.stderr)
254
+ return 2
255
+
102
256
  try:
103
257
  ideation_payload, source_type = read_payload(payload_path)
104
258
  except ValueError as exc:
@@ -168,6 +322,7 @@ def main() -> int:
168
322
  )
169
323
 
170
324
  matched_topics: list[dict[str, Any]] = []
325
+ fuzzy_match_meta: dict[str, dict[str, Any]] = {}
171
326
  for entry in flat_topics:
172
327
  topic = entry["topic"]
173
328
  topic_id = str(topic.get("topic_id", "")).strip()
@@ -189,15 +344,29 @@ def main() -> int:
189
344
  if tag_value not in topic_tags and tag_value not in block_tags:
190
345
  continue
191
346
  if args.text:
192
- if _lower(args.text) not in _searchable_text(
193
- {
194
- "title": entry["block_title"],
195
- "rationale": entry["block_rationale"],
196
- "tags": entry["block_tags"],
197
- },
198
- topic,
199
- ):
200
- continue
347
+ if args.fuzzy_text:
348
+ is_match, score, matched_fields = _fuzzy_text_match(
349
+ args.text,
350
+ entry,
351
+ threshold=args.fuzzy_threshold,
352
+ fields=fuzzy_fields,
353
+ )
354
+ if not is_match:
355
+ continue
356
+ fuzzy_match_meta[topic_id] = {
357
+ "score": round(score, 4),
358
+ "matched_fields": matched_fields,
359
+ }
360
+ else:
361
+ if _lower(args.text) not in _searchable_text(
362
+ {
363
+ "title": entry["block_title"],
364
+ "rationale": entry["block_rationale"],
365
+ "tags": entry["block_tags"],
366
+ },
367
+ topic,
368
+ ):
369
+ continue
201
370
 
202
371
  matched_topics.append(entry)
203
372
 
@@ -284,6 +453,11 @@ def main() -> int:
284
453
  related_entities.append(entity)
285
454
  topic_payload["related_entity_details"] = related_entities
286
455
 
456
+ if args.fuzzy_text and args.text:
457
+ fuzzy_metadata = fuzzy_match_meta.get(str(topic_payload.get("topic_id", "")).strip())
458
+ if fuzzy_metadata is not None:
459
+ topic_payload["fuzzy_match"] = fuzzy_metadata
460
+
287
461
  topics_result.append(topic_payload)
288
462
 
289
463
  related_payload: dict[str, Any] = {}
@@ -319,6 +493,9 @@ def main() -> int:
319
493
  "tag": args.tag or None,
320
494
  "priority": args.priority or None,
321
495
  "text": args.text or None,
496
+ "fuzzy_text": bool(args.fuzzy_text),
497
+ "fuzzy_threshold": args.fuzzy_threshold if args.fuzzy_text else None,
498
+ "fuzzy_fields": (fuzzy_fields or list(FUZZY_TEXT_FIELDS)) if args.fuzzy_text else None,
322
499
  "include_related": bool(args.include_related),
323
500
  },
324
501
  "summary": {
@@ -333,6 +510,9 @@ def main() -> int:
333
510
  },
334
511
  }
335
512
 
513
+ if args.fuzzy_text and fuzzy_match_meta:
514
+ response["summary"]["best_fuzzy_score"] = max(meta["score"] for meta in fuzzy_match_meta.values())
515
+
336
516
  if related_payload:
337
517
  response["related"] = related_payload
338
518
 
@@ -0,0 +1,294 @@
1
+ #!/usr/bin/env python3
2
+ """General-purpose fuzzy search over JSON scalar fields.
3
+
4
+ This script is intentionally standalone and not wired into any existing Cadence flow.
5
+ It searches recursively across arbitrary JSON structures and can target specific fields
6
+ using key/path patterns. Identifier-like keys are excluded by default.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ import argparse
12
+ from difflib import SequenceMatcher
13
+ from fnmatch import fnmatchcase
14
+ import json
15
+ import re
16
+ import sys
17
+ from pathlib import Path
18
+ from typing import Any
19
+
20
+ TOKEN_PATTERN = re.compile(r"[a-z0-9]+")
21
+ IDENTIFIER_KEY_PATTERN = re.compile(
22
+ r"(?:^|[_-])(id|ids|uuid|guid|slug|slugs|identifier|identifiers|key|keys|token|tokens|hash|checksum|fingerprint|ref|refs|code|codes|path|paths|url|urls|uri|uris|file|files|filepath|filepaths)$",
23
+ re.IGNORECASE,
24
+ )
25
+
26
+
27
+ def parse_args() -> argparse.Namespace:
28
+ parser = argparse.ArgumentParser(
29
+ description="Fuzzy-search JSON scalar fields (all current/future fields by default)."
30
+ )
31
+ parser.add_argument(
32
+ "--file",
33
+ default=str(Path(".cadence") / "cadence.json"),
34
+ help="Path to any JSON file (default: .cadence/cadence.json)",
35
+ )
36
+ parser.add_argument("--text", required=True, help="Query text to fuzzy-match")
37
+ parser.add_argument(
38
+ "--threshold",
39
+ type=float,
40
+ default=0.72,
41
+ help="Minimum fuzzy score between 0.0 and 1.0 (default: 0.72)",
42
+ )
43
+ parser.add_argument(
44
+ "--limit",
45
+ type=int,
46
+ default=25,
47
+ help="Maximum matches returned, sorted by score descending (default: 25)",
48
+ )
49
+ parser.add_argument(
50
+ "--field",
51
+ action="append",
52
+ default=[],
53
+ help=(
54
+ "Include only matching fields (repeatable, supports * wildcard). "
55
+ "Matches against full path and terminal key. Example: --field 'ideation.*.title' --field title"
56
+ ),
57
+ )
58
+ parser.add_argument(
59
+ "--exclude-field",
60
+ action="append",
61
+ default=[],
62
+ help=(
63
+ "Exclude matching fields (repeatable, supports * wildcard). "
64
+ "Matches against full path and terminal key."
65
+ ),
66
+ )
67
+ parser.add_argument(
68
+ "--include-identifiers",
69
+ action="store_true",
70
+ help="Include normally excluded technical keys (ids, uuid, slug, key, path, url, file, etc.)",
71
+ )
72
+ parser.add_argument(
73
+ "--include-non-string",
74
+ action="store_true",
75
+ help="Also search numbers/booleans (strings are always searched)",
76
+ )
77
+ parser.add_argument(
78
+ "--min-length",
79
+ type=int,
80
+ default=2,
81
+ help="Skip string values shorter than this length (default: 2)",
82
+ )
83
+ return parser.parse_args()
84
+
85
+
86
+ def _lower(value: Any) -> str:
87
+ return str(value).strip().lower()
88
+
89
+
90
+ def _normalize_key(key: str) -> str:
91
+ # Convert camelCase to snake-like form before identifier checks.
92
+ snakeish = re.sub(r"([a-z0-9])([A-Z])", r"\1_\2", str(key))
93
+ return _lower(snakeish)
94
+
95
+
96
+ def _is_identifier_key(key: str) -> bool:
97
+ normalized = _normalize_key(key)
98
+ return bool(IDENTIFIER_KEY_PATTERN.search(normalized))
99
+
100
+
101
+ def _normalize_patterns(values: list[str]) -> list[str]:
102
+ patterns: list[str] = []
103
+ for raw_value in values:
104
+ for part in str(raw_value).split(","):
105
+ candidate = _lower(part)
106
+ if candidate and candidate not in patterns:
107
+ patterns.append(candidate)
108
+ return patterns
109
+
110
+
111
+ def _path_or_key_matches(patterns: list[str], path: str, key: str) -> bool:
112
+ path_norm = _lower(path)
113
+ key_norm = _normalize_key(key)
114
+ return any(fnmatchcase(path_norm, pattern) or fnmatchcase(key_norm, pattern) for pattern in patterns)
115
+
116
+
117
+ def _tokenize(value: str) -> list[str]:
118
+ return TOKEN_PATTERN.findall(_lower(value))
119
+
120
+
121
+ def _token_overlap_ratio(query: str, candidate: str) -> float:
122
+ query_tokens = set(_tokenize(query))
123
+ candidate_tokens = set(_tokenize(candidate))
124
+ if not query_tokens or not candidate_tokens:
125
+ return 0.0
126
+ return len(query_tokens & candidate_tokens) / float(len(query_tokens))
127
+
128
+
129
+ def _fuzzy_score(query: str, candidate: str) -> float:
130
+ query_norm = _lower(query)
131
+ candidate_norm = _lower(candidate)
132
+ if not query_norm or not candidate_norm:
133
+ return 0.0
134
+ if query_norm in candidate_norm:
135
+ return 1.0
136
+
137
+ best = max(
138
+ SequenceMatcher(None, query_norm, candidate_norm).ratio(),
139
+ _token_overlap_ratio(query_norm, candidate_norm),
140
+ )
141
+
142
+ candidate_tokens = _tokenize(candidate_norm)
143
+ query_token_count = max(1, len(_tokenize(query_norm)))
144
+ max_span = min(len(candidate_tokens), max(query_token_count + 1, 3))
145
+ for span in range(1, max_span + 1):
146
+ for start in range(0, len(candidate_tokens) - span + 1):
147
+ phrase = " ".join(candidate_tokens[start : start + span])
148
+ score = SequenceMatcher(None, query_norm, phrase).ratio()
149
+ if score > best:
150
+ best = score
151
+ return best
152
+
153
+
154
+ def _preview(value: str, limit: int = 220) -> str:
155
+ text = value.strip()
156
+ if len(text) <= limit:
157
+ return text
158
+ return text[: limit - 3] + "..."
159
+
160
+
161
+ def _iter_scalar_candidates(node: Any, *, path: str = "", key: str = ""):
162
+ if isinstance(node, dict):
163
+ for child_key, child_value in node.items():
164
+ child_key_text = str(child_key)
165
+ child_path = f"{path}.{child_key_text}" if path else child_key_text
166
+ yield from _iter_scalar_candidates(child_value, path=child_path, key=child_key_text)
167
+ return
168
+
169
+ if isinstance(node, list):
170
+ for index, item in enumerate(node):
171
+ child_path = f"{path}[{index}]"
172
+ yield from _iter_scalar_candidates(item, path=child_path, key=key)
173
+ return
174
+
175
+ yield {
176
+ "path": path,
177
+ "key": key,
178
+ "value": node,
179
+ }
180
+
181
+
182
+ def _load_json(path: Path) -> Any:
183
+ try:
184
+ return json.loads(path.read_text(encoding="utf-8"))
185
+ except OSError as exc:
186
+ raise ValueError(f"PAYLOAD_READ_FAILED: {exc}") from exc
187
+ except json.JSONDecodeError as exc:
188
+ raise ValueError(f"INVALID_PAYLOAD_JSON: {exc}") from exc
189
+
190
+
191
+ def main() -> int:
192
+ args = parse_args()
193
+ payload_path = Path(args.file)
194
+
195
+ if not 0.0 <= args.threshold <= 1.0:
196
+ print("INVALID_THRESHOLD: must be between 0.0 and 1.0", file=sys.stderr)
197
+ return 2
198
+ if args.limit < 1:
199
+ print("INVALID_LIMIT: must be >= 1", file=sys.stderr)
200
+ return 2
201
+ if args.min_length < 0:
202
+ print("INVALID_MIN_LENGTH: must be >= 0", file=sys.stderr)
203
+ return 2
204
+
205
+ include_patterns = _normalize_patterns(args.field)
206
+ exclude_patterns = _normalize_patterns(args.exclude_field)
207
+
208
+ try:
209
+ payload = _load_json(payload_path)
210
+ except ValueError as exc:
211
+ print(str(exc), file=sys.stderr)
212
+ return 2
213
+
214
+ candidates_scanned = 0
215
+ candidates_considered = 0
216
+ matches: list[dict[str, Any]] = []
217
+
218
+ for candidate in _iter_scalar_candidates(payload):
219
+ candidates_scanned += 1
220
+ field_path = str(candidate["path"])
221
+ field_key = str(candidate["key"])
222
+
223
+ if not args.include_identifiers and _is_identifier_key(field_key):
224
+ continue
225
+ if include_patterns and not _path_or_key_matches(include_patterns, field_path, field_key):
226
+ continue
227
+ if exclude_patterns and _path_or_key_matches(exclude_patterns, field_path, field_key):
228
+ continue
229
+
230
+ raw_value = candidate["value"]
231
+ if isinstance(raw_value, str):
232
+ text_value = raw_value.strip()
233
+ value_type = "string"
234
+ elif args.include_non_string and isinstance(raw_value, (int, float, bool)):
235
+ text_value = str(raw_value)
236
+ value_type = type(raw_value).__name__
237
+ else:
238
+ continue
239
+
240
+ if len(text_value) < args.min_length:
241
+ continue
242
+
243
+ candidates_considered += 1
244
+ score = _fuzzy_score(args.text, text_value)
245
+ if score < args.threshold:
246
+ continue
247
+
248
+ matches.append(
249
+ {
250
+ "path": field_path,
251
+ "field": field_key,
252
+ "value_type": value_type,
253
+ "score": round(score, 4),
254
+ "value_preview": _preview(text_value),
255
+ }
256
+ )
257
+
258
+ matches_sorted = sorted(
259
+ matches,
260
+ key=lambda item: (-item["score"], item["path"]),
261
+ )
262
+ results = matches_sorted[: args.limit]
263
+
264
+ response: dict[str, Any] = {
265
+ "status": "ok",
266
+ "path": str(payload_path),
267
+ "query": {
268
+ "text": args.text,
269
+ "threshold": args.threshold,
270
+ "limit": args.limit,
271
+ "field": include_patterns or None,
272
+ "exclude_field": exclude_patterns or None,
273
+ "include_identifiers": bool(args.include_identifiers),
274
+ "include_non_string": bool(args.include_non_string),
275
+ "min_length": args.min_length,
276
+ },
277
+ "summary": {
278
+ "candidates_scanned": candidates_scanned,
279
+ "candidates_considered": candidates_considered,
280
+ "matches_before_limit": len(matches_sorted),
281
+ "matches_returned": len(results),
282
+ },
283
+ "results": results,
284
+ }
285
+
286
+ if results:
287
+ response["summary"]["best_score"] = max(item["score"] for item in results)
288
+
289
+ print(json.dumps(response, indent=4))
290
+ return 0
291
+
292
+
293
+ if __name__ == "__main__":
294
+ raise SystemExit(main())
@@ -35,6 +35,8 @@ description: Guide users from a rough concept to a fully defined project idea th
35
35
  - scope boundaries (in-scope vs out-of-scope)
36
36
  - implementation approach (for example tools, tech stack, process, platforms)
37
37
  - delivery shape (milestones, sequencing, constraints, risks, success signals)
38
+ - assume execution is AI-driven by default; if timeline expectations are discussed, calibrate estimates to roughly 10-100x faster than human-only delivery.
39
+ - do not force timeline-specific prompts just to apply this assumption.
38
40
  13. Build a complete later-phase research agenda from the ideation conversation:
39
41
  - Infer all relevant research topics that should be explored in later phases.
40
42
  - Keep the agenda domain-agnostic and driven by what the user discussed.