skill-seekers 2.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. skill_seekers/__init__.py +22 -0
  2. skill_seekers/cli/__init__.py +39 -0
  3. skill_seekers/cli/adaptors/__init__.py +120 -0
  4. skill_seekers/cli/adaptors/base.py +221 -0
  5. skill_seekers/cli/adaptors/claude.py +485 -0
  6. skill_seekers/cli/adaptors/gemini.py +453 -0
  7. skill_seekers/cli/adaptors/markdown.py +269 -0
  8. skill_seekers/cli/adaptors/openai.py +503 -0
  9. skill_seekers/cli/ai_enhancer.py +310 -0
  10. skill_seekers/cli/api_reference_builder.py +373 -0
  11. skill_seekers/cli/architectural_pattern_detector.py +525 -0
  12. skill_seekers/cli/code_analyzer.py +1462 -0
  13. skill_seekers/cli/codebase_scraper.py +1225 -0
  14. skill_seekers/cli/config_command.py +563 -0
  15. skill_seekers/cli/config_enhancer.py +431 -0
  16. skill_seekers/cli/config_extractor.py +871 -0
  17. skill_seekers/cli/config_manager.py +452 -0
  18. skill_seekers/cli/config_validator.py +394 -0
  19. skill_seekers/cli/conflict_detector.py +528 -0
  20. skill_seekers/cli/constants.py +72 -0
  21. skill_seekers/cli/dependency_analyzer.py +757 -0
  22. skill_seekers/cli/doc_scraper.py +2332 -0
  23. skill_seekers/cli/enhance_skill.py +488 -0
  24. skill_seekers/cli/enhance_skill_local.py +1096 -0
  25. skill_seekers/cli/enhance_status.py +194 -0
  26. skill_seekers/cli/estimate_pages.py +433 -0
  27. skill_seekers/cli/generate_router.py +1209 -0
  28. skill_seekers/cli/github_fetcher.py +534 -0
  29. skill_seekers/cli/github_scraper.py +1466 -0
  30. skill_seekers/cli/guide_enhancer.py +723 -0
  31. skill_seekers/cli/how_to_guide_builder.py +1267 -0
  32. skill_seekers/cli/install_agent.py +461 -0
  33. skill_seekers/cli/install_skill.py +178 -0
  34. skill_seekers/cli/language_detector.py +614 -0
  35. skill_seekers/cli/llms_txt_detector.py +60 -0
  36. skill_seekers/cli/llms_txt_downloader.py +104 -0
  37. skill_seekers/cli/llms_txt_parser.py +150 -0
  38. skill_seekers/cli/main.py +558 -0
  39. skill_seekers/cli/markdown_cleaner.py +132 -0
  40. skill_seekers/cli/merge_sources.py +806 -0
  41. skill_seekers/cli/package_multi.py +77 -0
  42. skill_seekers/cli/package_skill.py +241 -0
  43. skill_seekers/cli/pattern_recognizer.py +1825 -0
  44. skill_seekers/cli/pdf_extractor_poc.py +1166 -0
  45. skill_seekers/cli/pdf_scraper.py +617 -0
  46. skill_seekers/cli/quality_checker.py +519 -0
  47. skill_seekers/cli/rate_limit_handler.py +438 -0
  48. skill_seekers/cli/resume_command.py +160 -0
  49. skill_seekers/cli/run_tests.py +230 -0
  50. skill_seekers/cli/setup_wizard.py +93 -0
  51. skill_seekers/cli/split_config.py +390 -0
  52. skill_seekers/cli/swift_patterns.py +560 -0
  53. skill_seekers/cli/test_example_extractor.py +1081 -0
  54. skill_seekers/cli/test_unified_simple.py +179 -0
  55. skill_seekers/cli/unified_codebase_analyzer.py +572 -0
  56. skill_seekers/cli/unified_scraper.py +932 -0
  57. skill_seekers/cli/unified_skill_builder.py +1605 -0
  58. skill_seekers/cli/upload_skill.py +162 -0
  59. skill_seekers/cli/utils.py +432 -0
  60. skill_seekers/mcp/__init__.py +33 -0
  61. skill_seekers/mcp/agent_detector.py +316 -0
  62. skill_seekers/mcp/git_repo.py +273 -0
  63. skill_seekers/mcp/server.py +231 -0
  64. skill_seekers/mcp/server_fastmcp.py +1249 -0
  65. skill_seekers/mcp/server_legacy.py +2302 -0
  66. skill_seekers/mcp/source_manager.py +285 -0
  67. skill_seekers/mcp/tools/__init__.py +115 -0
  68. skill_seekers/mcp/tools/config_tools.py +251 -0
  69. skill_seekers/mcp/tools/packaging_tools.py +826 -0
  70. skill_seekers/mcp/tools/scraping_tools.py +842 -0
  71. skill_seekers/mcp/tools/source_tools.py +828 -0
  72. skill_seekers/mcp/tools/splitting_tools.py +212 -0
  73. skill_seekers/py.typed +0 -0
  74. skill_seekers-2.7.3.dist-info/METADATA +2027 -0
  75. skill_seekers-2.7.3.dist-info/RECORD +79 -0
  76. skill_seekers-2.7.3.dist-info/WHEEL +5 -0
  77. skill_seekers-2.7.3.dist-info/entry_points.txt +19 -0
  78. skill_seekers-2.7.3.dist-info/licenses/LICENSE +21 -0
  79. skill_seekers-2.7.3.dist-info/top_level.txt +1 -0
@@ -0,0 +1,528 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Conflict Detector for Multi-Source Skills
4
+
5
+ Detects conflicts between documentation and code:
6
+ - missing_in_docs: API exists in code but not documented
7
+ - missing_in_code: API documented but doesn't exist in code
8
+ - signature_mismatch: Different parameters/types between docs and code
9
+ - description_mismatch: Docs say one thing, code comments say another
10
+
11
+ Used by unified scraper to identify discrepancies before merging.
12
+ """
13
+
14
+ import json
15
+ import logging
16
+ from dataclasses import asdict, dataclass
17
+ from difflib import SequenceMatcher
18
+ from typing import Any
19
+
20
+ logging.basicConfig(level=logging.INFO)
21
+ logger = logging.getLogger(__name__)
22
+
23
+
24
+ @dataclass
25
+ class Conflict:
26
+ """Represents a conflict between documentation and code."""
27
+
28
+ type: str # 'missing_in_docs', 'missing_in_code', 'signature_mismatch', 'description_mismatch'
29
+ severity: str # 'low', 'medium', 'high'
30
+ api_name: str
31
+ docs_info: dict[str, Any] | None = None
32
+ code_info: dict[str, Any] | None = None
33
+ difference: str | None = None
34
+ suggestion: str | None = None
35
+
36
+
37
+ class ConflictDetector:
38
+ """
39
+ Detects conflicts between documentation and code sources.
40
+ """
41
+
42
+ def __init__(self, docs_data: dict[str, Any], github_data: dict[str, Any]):
43
+ """
44
+ Initialize conflict detector.
45
+
46
+ Args:
47
+ docs_data: Data from documentation scraper
48
+ github_data: Data from GitHub scraper with code analysis
49
+ """
50
+ self.docs_data = docs_data
51
+ self.github_data = github_data
52
+
53
+ # Extract API information from both sources
54
+ self.docs_apis = self._extract_docs_apis()
55
+ self.code_apis = self._extract_code_apis()
56
+
57
+ logger.info(f"Loaded {len(self.docs_apis)} APIs from documentation")
58
+ logger.info(f"Loaded {len(self.code_apis)} APIs from code")
59
+
60
+ def _extract_docs_apis(self) -> dict[str, dict[str, Any]]:
61
+ """
62
+ Extract API information from documentation data.
63
+
64
+ Returns:
65
+ Dict mapping API name to API info
66
+ """
67
+ apis = {}
68
+
69
+ # Documentation structure varies, but typically has 'pages' or 'references'
70
+ pages = self.docs_data.get("pages", {})
71
+
72
+ # Handle both dict and list formats
73
+ if isinstance(pages, dict):
74
+ # Format: {url: page_data, ...}
75
+ for url, page_data in pages.items():
76
+ content = page_data.get("content", "")
77
+ title = page_data.get("title", "")
78
+
79
+ # Simple heuristic: if title or URL contains "api", "reference", "class", "function"
80
+ # it might be an API page
81
+ if any(
82
+ keyword in title.lower() or keyword in url.lower()
83
+ for keyword in ["api", "reference", "class", "function", "method"]
84
+ ):
85
+ # Extract API signatures from content (simplified)
86
+ extracted_apis = self._parse_doc_content_for_apis(content, url)
87
+ apis.update(extracted_apis)
88
+ elif isinstance(pages, list):
89
+ # Format: [{url: '...', apis: [...]}, ...]
90
+ for page in pages:
91
+ url = page.get("url", "")
92
+ page_apis = page.get("apis", [])
93
+
94
+ # If APIs are already extracted in the page data
95
+ for api in page_apis:
96
+ api_name = api.get("name", "")
97
+ if api_name:
98
+ apis[api_name] = {
99
+ "parameters": api.get("parameters", []),
100
+ "return_type": api.get("return_type", "Any"),
101
+ "source_url": url,
102
+ }
103
+
104
+ return apis
105
+
106
+ def _parse_doc_content_for_apis(self, content: str, source_url: str) -> dict[str, dict]:
107
+ """
108
+ Parse documentation content to extract API signatures.
109
+
110
+ This is a simplified approach - real implementation would need
111
+ to understand the documentation format (Sphinx, JSDoc, etc.)
112
+ """
113
+ apis = {}
114
+
115
+ # Look for function/method signatures in code blocks
116
+ # Common patterns:
117
+ # - function_name(param1, param2)
118
+ # - ClassName.method_name(param1, param2)
119
+ # - def function_name(param1: type, param2: type) -> return_type
120
+
121
+ import re
122
+
123
+ # Pattern for common API signatures
124
+ patterns = [
125
+ # Python style: def name(params) -> return
126
+ r"def\s+(\w+)\s*\(([^)]*)\)(?:\s*->\s*(\w+))?",
127
+ # JavaScript style: function name(params)
128
+ r"function\s+(\w+)\s*\(([^)]*)\)",
129
+ # C++ style: return_type name(params)
130
+ r"(\w+)\s+(\w+)\s*\(([^)]*)\)",
131
+ # Method style: ClassName.method_name(params)
132
+ r"(\w+)\.(\w+)\s*\(([^)]*)\)",
133
+ ]
134
+
135
+ for pattern in patterns:
136
+ for match in re.finditer(pattern, content):
137
+ groups = match.groups()
138
+
139
+ # Parse based on pattern matched
140
+ if "def" in pattern:
141
+ # Python function
142
+ name = groups[0]
143
+ params_str = groups[1]
144
+ return_type = groups[2] if len(groups) > 2 else None
145
+ elif "function" in pattern:
146
+ # JavaScript function
147
+ name = groups[0]
148
+ params_str = groups[1]
149
+ return_type = None
150
+ elif "." in pattern:
151
+ # Class method
152
+ class_name = groups[0]
153
+ method_name = groups[1]
154
+ name = f"{class_name}.{method_name}"
155
+ params_str = groups[2] if len(groups) > 2 else groups[1]
156
+ return_type = None
157
+ else:
158
+ # C++ function
159
+ return_type = groups[0]
160
+ name = groups[1]
161
+ params_str = groups[2]
162
+
163
+ # Parse parameters
164
+ params = self._parse_param_string(params_str)
165
+
166
+ apis[name] = {
167
+ "name": name,
168
+ "parameters": params,
169
+ "return_type": return_type,
170
+ "source": source_url,
171
+ "raw_signature": match.group(0),
172
+ }
173
+
174
+ return apis
175
+
176
+ def _parse_param_string(self, params_str: str) -> list[dict]:
177
+ """Parse parameter string into list of parameter dicts."""
178
+ if not params_str.strip():
179
+ return []
180
+
181
+ params = []
182
+ for param in params_str.split(","):
183
+ param = param.strip()
184
+ if not param:
185
+ continue
186
+
187
+ # Try to extract name and type
188
+ param_info = {"name": param, "type": None, "default": None}
189
+
190
+ # Check for type annotation (: type)
191
+ if ":" in param:
192
+ parts = param.split(":", 1)
193
+ param_info["name"] = parts[0].strip()
194
+ type_part = parts[1].strip()
195
+
196
+ # Check for default value (= value)
197
+ if "=" in type_part:
198
+ type_str, default_str = type_part.split("=", 1)
199
+ param_info["type"] = type_str.strip()
200
+ param_info["default"] = default_str.strip()
201
+ else:
202
+ param_info["type"] = type_part
203
+
204
+ # Check for default without type (= value)
205
+ elif "=" in param:
206
+ parts = param.split("=", 1)
207
+ param_info["name"] = parts[0].strip()
208
+ param_info["default"] = parts[1].strip()
209
+
210
+ params.append(param_info)
211
+
212
+ return params
213
+
214
+ def _extract_code_apis(self) -> dict[str, dict[str, Any]]:
215
+ """
216
+ Extract API information from GitHub code analysis.
217
+
218
+ Returns:
219
+ Dict mapping API name to API info
220
+ """
221
+ apis = {}
222
+
223
+ code_analysis = self.github_data.get("code_analysis", {})
224
+ if not code_analysis:
225
+ return apis
226
+
227
+ # Support both 'files' and 'analyzed_files' keys
228
+ files = code_analysis.get("files", code_analysis.get("analyzed_files", []))
229
+
230
+ for file_info in files:
231
+ file_path = file_info.get("file", "unknown")
232
+
233
+ # Extract classes and their methods
234
+ for class_info in file_info.get("classes", []):
235
+ class_name = class_info["name"]
236
+
237
+ # Add class itself
238
+ apis[class_name] = {
239
+ "name": class_name,
240
+ "type": "class",
241
+ "source": file_path,
242
+ "line": class_info.get("line_number"),
243
+ "base_classes": class_info.get("base_classes", []),
244
+ "docstring": class_info.get("docstring"),
245
+ }
246
+
247
+ # Add methods
248
+ for method in class_info.get("methods", []):
249
+ method_name = f"{class_name}.{method['name']}"
250
+ apis[method_name] = {
251
+ "name": method_name,
252
+ "type": "method",
253
+ "parameters": method.get("parameters", []),
254
+ "return_type": method.get("return_type"),
255
+ "source": file_path,
256
+ "line": method.get("line_number"),
257
+ "docstring": method.get("docstring"),
258
+ "is_async": method.get("is_async", False),
259
+ }
260
+
261
+ # Extract standalone functions
262
+ for func_info in file_info.get("functions", []):
263
+ func_name = func_info["name"]
264
+ apis[func_name] = {
265
+ "name": func_name,
266
+ "type": "function",
267
+ "parameters": func_info.get("parameters", []),
268
+ "return_type": func_info.get("return_type"),
269
+ "source": file_path,
270
+ "line": func_info.get("line_number"),
271
+ "docstring": func_info.get("docstring"),
272
+ "is_async": func_info.get("is_async", False),
273
+ }
274
+
275
+ return apis
276
+
277
+ def detect_all_conflicts(self) -> list[Conflict]:
278
+ """
279
+ Detect all types of conflicts.
280
+
281
+ Returns:
282
+ List of Conflict objects
283
+ """
284
+ logger.info("Detecting conflicts between documentation and code...")
285
+
286
+ conflicts = []
287
+
288
+ # 1. Find APIs missing in documentation
289
+ conflicts.extend(self._find_missing_in_docs())
290
+
291
+ # 2. Find APIs missing in code
292
+ conflicts.extend(self._find_missing_in_code())
293
+
294
+ # 3. Find signature mismatches
295
+ conflicts.extend(self._find_signature_mismatches())
296
+
297
+ logger.info(f"Found {len(conflicts)} conflicts total")
298
+
299
+ return conflicts
300
+
301
+ def _find_missing_in_docs(self) -> list[Conflict]:
302
+ """Find APIs that exist in code but not in documentation."""
303
+ conflicts = []
304
+
305
+ for api_name, code_info in self.code_apis.items():
306
+ # Simple name matching (can be enhanced with fuzzy matching)
307
+ if api_name not in self.docs_apis:
308
+ # Check if it's a private/internal API (often not documented)
309
+ is_private = api_name.startswith("_") or "__" in api_name
310
+ severity = "low" if is_private else "medium"
311
+
312
+ conflicts.append(
313
+ Conflict(
314
+ type="missing_in_docs",
315
+ severity=severity,
316
+ api_name=api_name,
317
+ code_info=code_info,
318
+ difference=f"API exists in code ({code_info['source']}) but not found in documentation",
319
+ suggestion="Add documentation for this API"
320
+ if not is_private
321
+ else "Consider if this internal API should be documented",
322
+ )
323
+ )
324
+
325
+ logger.info(f"Found {len(conflicts)} APIs missing in documentation")
326
+ return conflicts
327
+
328
+ def _find_missing_in_code(self) -> list[Conflict]:
329
+ """Find APIs that are documented but don't exist in code."""
330
+ conflicts = []
331
+
332
+ for api_name, docs_info in self.docs_apis.items():
333
+ if api_name not in self.code_apis:
334
+ conflicts.append(
335
+ Conflict(
336
+ type="missing_in_code",
337
+ severity="high", # This is serious - documented but doesn't exist
338
+ api_name=api_name,
339
+ docs_info=docs_info,
340
+ difference=f"API documented ({docs_info.get('source', 'unknown')}) but not found in code",
341
+ suggestion="Update documentation to remove this API, or add it to codebase",
342
+ )
343
+ )
344
+
345
+ logger.info(f"Found {len(conflicts)} APIs missing in code")
346
+ return conflicts
347
+
348
+ def _find_signature_mismatches(self) -> list[Conflict]:
349
+ """Find APIs where signature differs between docs and code."""
350
+ conflicts = []
351
+
352
+ # Find APIs that exist in both
353
+ common_apis = set(self.docs_apis.keys()) & set(self.code_apis.keys())
354
+
355
+ for api_name in common_apis:
356
+ docs_info = self.docs_apis[api_name]
357
+ code_info = self.code_apis[api_name]
358
+
359
+ # Compare signatures
360
+ mismatch = self._compare_signatures(docs_info, code_info)
361
+
362
+ if mismatch:
363
+ conflicts.append(
364
+ Conflict(
365
+ type="signature_mismatch",
366
+ severity=mismatch["severity"],
367
+ api_name=api_name,
368
+ docs_info=docs_info,
369
+ code_info=code_info,
370
+ difference=mismatch["difference"],
371
+ suggestion=mismatch["suggestion"],
372
+ )
373
+ )
374
+
375
+ logger.info(f"Found {len(conflicts)} signature mismatches")
376
+ return conflicts
377
+
378
+ def _compare_signatures(self, docs_info: dict, code_info: dict) -> dict | None:
379
+ """
380
+ Compare signatures between docs and code.
381
+
382
+ Returns:
383
+ Dict with mismatch details if conflict found, None otherwise
384
+ """
385
+ docs_params = docs_info.get("parameters", [])
386
+ code_params = code_info.get("parameters", [])
387
+
388
+ # Compare parameter counts
389
+ if len(docs_params) != len(code_params):
390
+ return {
391
+ "severity": "medium",
392
+ "difference": f"Parameter count mismatch: docs has {len(docs_params)}, code has {len(code_params)}",
393
+ "suggestion": f"Documentation shows {len(docs_params)} parameters, but code has {len(code_params)}",
394
+ }
395
+
396
+ # Compare parameter names and types
397
+ for i, (doc_param, code_param) in enumerate(zip(docs_params, code_params, strict=False)):
398
+ doc_name = doc_param.get("name", "")
399
+ code_name = code_param.get("name", "")
400
+
401
+ # Parameter name mismatch
402
+ if doc_name != code_name:
403
+ # Use fuzzy matching for slight variations
404
+ similarity = SequenceMatcher(None, doc_name, code_name).ratio()
405
+ if similarity < 0.8: # Not similar enough
406
+ return {
407
+ "severity": "medium",
408
+ "difference": f"Parameter {i + 1} name mismatch: '{doc_name}' in docs vs '{code_name}' in code",
409
+ "suggestion": f"Update documentation to use parameter name '{code_name}'",
410
+ }
411
+
412
+ # Type mismatch
413
+ doc_type = doc_param.get("type")
414
+ code_type = code_param.get("type_hint")
415
+
416
+ if doc_type and code_type and doc_type != code_type:
417
+ return {
418
+ "severity": "low",
419
+ "difference": f"Parameter '{doc_name}' type mismatch: '{doc_type}' in docs vs '{code_type}' in code",
420
+ "suggestion": f"Verify correct type for parameter '{doc_name}'",
421
+ }
422
+
423
+ # Compare return types if both have them
424
+ docs_return = docs_info.get("return_type")
425
+ code_return = code_info.get("return_type")
426
+
427
+ if docs_return and code_return and docs_return != code_return:
428
+ return {
429
+ "severity": "low",
430
+ "difference": f"Return type mismatch: '{docs_return}' in docs vs '{code_return}' in code",
431
+ "suggestion": "Verify correct return type",
432
+ }
433
+
434
+ return None
435
+
436
+ def generate_summary(self, conflicts: list[Conflict]) -> dict[str, Any]:
437
+ """
438
+ Generate summary statistics for conflicts.
439
+
440
+ Args:
441
+ conflicts: List of Conflict objects
442
+
443
+ Returns:
444
+ Summary dict with statistics
445
+ """
446
+ summary = {
447
+ "total": len(conflicts),
448
+ "by_type": {},
449
+ "by_severity": {},
450
+ "apis_affected": len({c.api_name for c in conflicts}),
451
+ }
452
+
453
+ # Count by type
454
+ for conflict_type in [
455
+ "missing_in_docs",
456
+ "missing_in_code",
457
+ "signature_mismatch",
458
+ "description_mismatch",
459
+ ]:
460
+ count = sum(1 for c in conflicts if c.type == conflict_type)
461
+ summary["by_type"][conflict_type] = count
462
+
463
+ # Count by severity
464
+ for severity in ["low", "medium", "high"]:
465
+ count = sum(1 for c in conflicts if c.severity == severity)
466
+ summary["by_severity"][severity] = count
467
+
468
+ return summary
469
+
470
+ def save_conflicts(self, conflicts: list[Conflict], output_path: str):
471
+ """
472
+ Save conflicts to JSON file.
473
+
474
+ Args:
475
+ conflicts: List of Conflict objects
476
+ output_path: Path to output JSON file
477
+ """
478
+ data = {
479
+ "conflicts": [asdict(c) for c in conflicts],
480
+ "summary": self.generate_summary(conflicts),
481
+ }
482
+
483
+ with open(output_path, "w", encoding="utf-8") as f:
484
+ json.dump(data, f, indent=2, ensure_ascii=False)
485
+
486
+ logger.info(f"Conflicts saved to: {output_path}")
487
+
488
+
489
+ if __name__ == "__main__":
490
+ import sys
491
+
492
+ if len(sys.argv) < 3:
493
+ print("Usage: python conflict_detector.py <docs_data.json> <github_data.json>")
494
+ sys.exit(1)
495
+
496
+ docs_file = sys.argv[1]
497
+ github_file = sys.argv[2]
498
+
499
+ # Load data
500
+ with open(docs_file) as f:
501
+ docs_data = json.load(f)
502
+
503
+ with open(github_file) as f:
504
+ github_data = json.load(f)
505
+
506
+ # Detect conflicts
507
+ detector = ConflictDetector(docs_data, github_data)
508
+ conflicts = detector.detect_all_conflicts()
509
+
510
+ # Print summary
511
+ summary = detector.generate_summary(conflicts)
512
+ print("\nšŸ“Š Conflict Summary:")
513
+ print(f" Total conflicts: {summary['total']}")
514
+ print(f" APIs affected: {summary['apis_affected']}")
515
+ print("\n By Type:")
516
+ for conflict_type, count in summary["by_type"].items():
517
+ if count > 0:
518
+ print(f" {conflict_type}: {count}")
519
+ print("\n By Severity:")
520
+ for severity, count in summary["by_severity"].items():
521
+ if count > 0:
522
+ emoji = "šŸ”“" if severity == "high" else "🟔" if severity == "medium" else "🟢"
523
+ print(f" {emoji} {severity}: {count}")
524
+
525
+ # Save to file
526
+ output_file = "conflicts.json"
527
+ detector.save_conflicts(conflicts, output_file)
528
+ print(f"\nāœ… Full report saved to: {output_file}")
@@ -0,0 +1,72 @@
1
+ """Configuration constants for Skill Seekers CLI.
2
+
3
+ This module centralizes all magic numbers and configuration values used
4
+ across the CLI tools to improve maintainability and clarity.
5
+ """
6
+
7
+ # ===== SCRAPING CONFIGURATION =====
8
+
9
+ # Default scraping limits
10
+ DEFAULT_RATE_LIMIT = 0.5 # seconds between requests
11
+ DEFAULT_MAX_PAGES = 500 # maximum pages to scrape
12
+ DEFAULT_CHECKPOINT_INTERVAL = 1000 # pages between checkpoints
13
+ DEFAULT_ASYNC_MODE = False # use async mode for parallel scraping (opt-in)
14
+
15
+ # Content analysis limits
16
+ CONTENT_PREVIEW_LENGTH = 500 # characters to check for categorization
17
+ MAX_PAGES_WARNING_THRESHOLD = 10000 # warn if config exceeds this
18
+
19
+ # Quality thresholds
20
+ MIN_CATEGORIZATION_SCORE = 2 # minimum score for category assignment
21
+ URL_MATCH_POINTS = 3 # points for URL keyword match
22
+ TITLE_MATCH_POINTS = 2 # points for title keyword match
23
+ CONTENT_MATCH_POINTS = 1 # points for content keyword match
24
+
25
+ # ===== ENHANCEMENT CONFIGURATION =====
26
+
27
+ # API-based enhancement limits (uses Anthropic API)
28
+ API_CONTENT_LIMIT = 100000 # max characters for API enhancement
29
+ API_PREVIEW_LIMIT = 40000 # max characters for preview
30
+
31
+ # Local enhancement limits (uses Claude Code Max)
32
+ LOCAL_CONTENT_LIMIT = 50000 # max characters for local enhancement
33
+ LOCAL_PREVIEW_LIMIT = 20000 # max characters for preview
34
+
35
+ # ===== PAGE ESTIMATION =====
36
+
37
+ # Estimation and discovery settings
38
+ DEFAULT_MAX_DISCOVERY = 1000 # default max pages to discover
39
+ DISCOVERY_THRESHOLD = 10000 # threshold for warnings
40
+
41
+ # ===== FILE LIMITS =====
42
+
43
+ # Output and processing limits
44
+ MAX_REFERENCE_FILES = 100 # maximum reference files per skill
45
+ MAX_CODE_BLOCKS_PER_PAGE = 5 # maximum code blocks to extract per page
46
+
47
+ # ===== EXPORT CONSTANTS =====
48
+
49
+ __all__ = [
50
+ # Scraping
51
+ "DEFAULT_RATE_LIMIT",
52
+ "DEFAULT_MAX_PAGES",
53
+ "DEFAULT_CHECKPOINT_INTERVAL",
54
+ "DEFAULT_ASYNC_MODE",
55
+ "CONTENT_PREVIEW_LENGTH",
56
+ "MAX_PAGES_WARNING_THRESHOLD",
57
+ "MIN_CATEGORIZATION_SCORE",
58
+ "URL_MATCH_POINTS",
59
+ "TITLE_MATCH_POINTS",
60
+ "CONTENT_MATCH_POINTS",
61
+ # Enhancement
62
+ "API_CONTENT_LIMIT",
63
+ "API_PREVIEW_LIMIT",
64
+ "LOCAL_CONTENT_LIMIT",
65
+ "LOCAL_PREVIEW_LIMIT",
66
+ # Estimation
67
+ "DEFAULT_MAX_DISCOVERY",
68
+ "DISCOVERY_THRESHOLD",
69
+ # Limits
70
+ "MAX_REFERENCE_FILES",
71
+ "MAX_CODE_BLOCKS_PER_PAGE",
72
+ ]