skill-seekers 2.7.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. skill_seekers/__init__.py +22 -0
  2. skill_seekers/cli/__init__.py +39 -0
  3. skill_seekers/cli/adaptors/__init__.py +120 -0
  4. skill_seekers/cli/adaptors/base.py +221 -0
  5. skill_seekers/cli/adaptors/claude.py +485 -0
  6. skill_seekers/cli/adaptors/gemini.py +453 -0
  7. skill_seekers/cli/adaptors/markdown.py +269 -0
  8. skill_seekers/cli/adaptors/openai.py +503 -0
  9. skill_seekers/cli/ai_enhancer.py +310 -0
  10. skill_seekers/cli/api_reference_builder.py +373 -0
  11. skill_seekers/cli/architectural_pattern_detector.py +525 -0
  12. skill_seekers/cli/code_analyzer.py +1462 -0
  13. skill_seekers/cli/codebase_scraper.py +1225 -0
  14. skill_seekers/cli/config_command.py +563 -0
  15. skill_seekers/cli/config_enhancer.py +431 -0
  16. skill_seekers/cli/config_extractor.py +871 -0
  17. skill_seekers/cli/config_manager.py +452 -0
  18. skill_seekers/cli/config_validator.py +394 -0
  19. skill_seekers/cli/conflict_detector.py +528 -0
  20. skill_seekers/cli/constants.py +72 -0
  21. skill_seekers/cli/dependency_analyzer.py +757 -0
  22. skill_seekers/cli/doc_scraper.py +2332 -0
  23. skill_seekers/cli/enhance_skill.py +488 -0
  24. skill_seekers/cli/enhance_skill_local.py +1096 -0
  25. skill_seekers/cli/enhance_status.py +194 -0
  26. skill_seekers/cli/estimate_pages.py +433 -0
  27. skill_seekers/cli/generate_router.py +1209 -0
  28. skill_seekers/cli/github_fetcher.py +534 -0
  29. skill_seekers/cli/github_scraper.py +1466 -0
  30. skill_seekers/cli/guide_enhancer.py +723 -0
  31. skill_seekers/cli/how_to_guide_builder.py +1267 -0
  32. skill_seekers/cli/install_agent.py +461 -0
  33. skill_seekers/cli/install_skill.py +178 -0
  34. skill_seekers/cli/language_detector.py +614 -0
  35. skill_seekers/cli/llms_txt_detector.py +60 -0
  36. skill_seekers/cli/llms_txt_downloader.py +104 -0
  37. skill_seekers/cli/llms_txt_parser.py +150 -0
  38. skill_seekers/cli/main.py +558 -0
  39. skill_seekers/cli/markdown_cleaner.py +132 -0
  40. skill_seekers/cli/merge_sources.py +806 -0
  41. skill_seekers/cli/package_multi.py +77 -0
  42. skill_seekers/cli/package_skill.py +241 -0
  43. skill_seekers/cli/pattern_recognizer.py +1825 -0
  44. skill_seekers/cli/pdf_extractor_poc.py +1166 -0
  45. skill_seekers/cli/pdf_scraper.py +617 -0
  46. skill_seekers/cli/quality_checker.py +519 -0
  47. skill_seekers/cli/rate_limit_handler.py +438 -0
  48. skill_seekers/cli/resume_command.py +160 -0
  49. skill_seekers/cli/run_tests.py +230 -0
  50. skill_seekers/cli/setup_wizard.py +93 -0
  51. skill_seekers/cli/split_config.py +390 -0
  52. skill_seekers/cli/swift_patterns.py +560 -0
  53. skill_seekers/cli/test_example_extractor.py +1081 -0
  54. skill_seekers/cli/test_unified_simple.py +179 -0
  55. skill_seekers/cli/unified_codebase_analyzer.py +572 -0
  56. skill_seekers/cli/unified_scraper.py +932 -0
  57. skill_seekers/cli/unified_skill_builder.py +1605 -0
  58. skill_seekers/cli/upload_skill.py +162 -0
  59. skill_seekers/cli/utils.py +432 -0
  60. skill_seekers/mcp/__init__.py +33 -0
  61. skill_seekers/mcp/agent_detector.py +316 -0
  62. skill_seekers/mcp/git_repo.py +273 -0
  63. skill_seekers/mcp/server.py +231 -0
  64. skill_seekers/mcp/server_fastmcp.py +1249 -0
  65. skill_seekers/mcp/server_legacy.py +2302 -0
  66. skill_seekers/mcp/source_manager.py +285 -0
  67. skill_seekers/mcp/tools/__init__.py +115 -0
  68. skill_seekers/mcp/tools/config_tools.py +251 -0
  69. skill_seekers/mcp/tools/packaging_tools.py +826 -0
  70. skill_seekers/mcp/tools/scraping_tools.py +842 -0
  71. skill_seekers/mcp/tools/source_tools.py +828 -0
  72. skill_seekers/mcp/tools/splitting_tools.py +212 -0
  73. skill_seekers/py.typed +0 -0
  74. skill_seekers-2.7.3.dist-info/METADATA +2027 -0
  75. skill_seekers-2.7.3.dist-info/RECORD +79 -0
  76. skill_seekers-2.7.3.dist-info/WHEEL +5 -0
  77. skill_seekers-2.7.3.dist-info/entry_points.txt +19 -0
  78. skill_seekers-2.7.3.dist-info/licenses/LICENSE +21 -0
  79. skill_seekers-2.7.3.dist-info/top_level.txt +1 -0
@@ -0,0 +1,558 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Skill Seekers - Unified CLI Entry Point
4
+
5
+ Provides a git-style unified command-line interface for all Skill Seekers tools.
6
+
7
+ Usage:
8
+ skill-seekers <command> [options]
9
+
10
+ Commands:
11
+ config Configure GitHub tokens, API keys, and settings
12
+ scrape Scrape documentation website
13
+ github Scrape GitHub repository
14
+ pdf Extract from PDF file
15
+ unified Multi-source scraping (docs + GitHub + PDF)
16
+ enhance AI-powered enhancement (local, no API key)
17
+ enhance-status Check enhancement status (for background/daemon modes)
18
+ package Package skill into .zip file
19
+ upload Upload skill to Claude
20
+ estimate Estimate page count before scraping
21
+ extract-test-examples Extract usage examples from test files
22
+ install-agent Install skill to AI agent directories
23
+ resume Resume interrupted scraping job
24
+
25
+ Examples:
26
+ skill-seekers scrape --config configs/react.json
27
+ skill-seekers github --repo microsoft/TypeScript
28
+ skill-seekers unified --config configs/react_unified.json
29
+ skill-seekers extract-test-examples tests/ --language python
30
+ skill-seekers package output/react/
31
+ skill-seekers install-agent output/react/ --agent cursor
32
+ """
33
+
34
+ import argparse
35
+ import sys
36
+
37
+ from skill_seekers.cli import __version__
38
+
39
+
40
+ def create_parser() -> argparse.ArgumentParser:
41
+ """Create the main argument parser with subcommands."""
42
+ parser = argparse.ArgumentParser(
43
+ prog="skill-seekers",
44
+ description="Convert documentation, GitHub repos, and PDFs into Claude AI skills",
45
+ formatter_class=argparse.RawDescriptionHelpFormatter,
46
+ epilog="""
47
+ Examples:
48
+ # Scrape documentation
49
+ skill-seekers scrape --config configs/react.json
50
+
51
+ # Scrape GitHub repository
52
+ skill-seekers github --repo microsoft/TypeScript --name typescript
53
+
54
+ # Multi-source scraping (unified)
55
+ skill-seekers unified --config configs/react_unified.json
56
+
57
+ # AI-powered enhancement
58
+ skill-seekers enhance output/react/
59
+
60
+ # Package and upload
61
+ skill-seekers package output/react/
62
+ skill-seekers upload output/react.zip
63
+
64
+ For more information: https://github.com/yusufkaraaslan/Skill_Seekers
65
+ """,
66
+ )
67
+
68
+ parser.add_argument("--version", action="version", version=f"%(prog)s {__version__}")
69
+
70
+ subparsers = parser.add_subparsers(
71
+ dest="command",
72
+ title="commands",
73
+ description="Available Skill Seekers commands",
74
+ help="Command to run",
75
+ )
76
+
77
+ # === config subcommand ===
78
+ config_parser = subparsers.add_parser(
79
+ "config",
80
+ help="Configure GitHub tokens, API keys, and settings",
81
+ description="Interactive configuration wizard",
82
+ )
83
+ config_parser.add_argument(
84
+ "--github", action="store_true", help="Go directly to GitHub token setup"
85
+ )
86
+ config_parser.add_argument(
87
+ "--api-keys", action="store_true", help="Go directly to API keys setup"
88
+ )
89
+ config_parser.add_argument(
90
+ "--show", action="store_true", help="Show current configuration and exit"
91
+ )
92
+ config_parser.add_argument("--test", action="store_true", help="Test connections and exit")
93
+
94
+ # === scrape subcommand ===
95
+ scrape_parser = subparsers.add_parser(
96
+ "scrape",
97
+ help="Scrape documentation website",
98
+ description="Scrape documentation website and generate skill",
99
+ )
100
+ scrape_parser.add_argument("url", nargs="?", help="Documentation URL (positional argument)")
101
+ scrape_parser.add_argument("--config", help="Config JSON file")
102
+ scrape_parser.add_argument("--name", help="Skill name")
103
+ scrape_parser.add_argument("--description", help="Skill description")
104
+ scrape_parser.add_argument("--max-pages", type=int, dest="max_pages", help="Maximum pages to scrape (override config)")
105
+ scrape_parser.add_argument(
106
+ "--skip-scrape", action="store_true", help="Skip scraping, use cached data"
107
+ )
108
+ scrape_parser.add_argument("--enhance", action="store_true", help="AI enhancement (API)")
109
+ scrape_parser.add_argument(
110
+ "--enhance-local", action="store_true", help="AI enhancement (local)"
111
+ )
112
+ scrape_parser.add_argument("--dry-run", action="store_true", help="Dry run mode")
113
+ scrape_parser.add_argument(
114
+ "--async", dest="async_mode", action="store_true", help="Use async scraping"
115
+ )
116
+ scrape_parser.add_argument("--workers", type=int, help="Number of async workers")
117
+
118
+ # === github subcommand ===
119
+ github_parser = subparsers.add_parser(
120
+ "github",
121
+ help="Scrape GitHub repository",
122
+ description="Scrape GitHub repository and generate skill",
123
+ )
124
+ github_parser.add_argument("--config", help="Config JSON file")
125
+ github_parser.add_argument("--repo", help="GitHub repo (owner/repo)")
126
+ github_parser.add_argument("--name", help="Skill name")
127
+ github_parser.add_argument("--description", help="Skill description")
128
+ github_parser.add_argument("--enhance", action="store_true", help="AI enhancement (API)")
129
+ github_parser.add_argument(
130
+ "--enhance-local", action="store_true", help="AI enhancement (local)"
131
+ )
132
+ github_parser.add_argument("--api-key", type=str, help="Anthropic API key for --enhance")
133
+ github_parser.add_argument(
134
+ "--non-interactive",
135
+ action="store_true",
136
+ help="Non-interactive mode (fail fast on rate limits)",
137
+ )
138
+ github_parser.add_argument("--profile", type=str, help="GitHub profile name from config")
139
+
140
+ # === pdf subcommand ===
141
+ pdf_parser = subparsers.add_parser(
142
+ "pdf",
143
+ help="Extract from PDF file",
144
+ description="Extract content from PDF and generate skill",
145
+ )
146
+ pdf_parser.add_argument("--config", help="Config JSON file")
147
+ pdf_parser.add_argument("--pdf", help="PDF file path")
148
+ pdf_parser.add_argument("--name", help="Skill name")
149
+ pdf_parser.add_argument("--description", help="Skill description")
150
+ pdf_parser.add_argument("--from-json", help="Build from extracted JSON")
151
+
152
+ # === unified subcommand ===
153
+ unified_parser = subparsers.add_parser(
154
+ "unified",
155
+ help="Multi-source scraping (docs + GitHub + PDF)",
156
+ description="Combine multiple sources into one skill",
157
+ )
158
+ unified_parser.add_argument("--config", required=True, help="Unified config JSON file")
159
+ unified_parser.add_argument("--merge-mode", help="Merge mode (rule-based, claude-enhanced)")
160
+ unified_parser.add_argument("--fresh", action="store_true", help="Clear existing data and start fresh")
161
+ unified_parser.add_argument("--dry-run", action="store_true", help="Dry run mode")
162
+
163
+ # === enhance subcommand ===
164
+ enhance_parser = subparsers.add_parser(
165
+ "enhance",
166
+ help="AI-powered enhancement (local, no API key)",
167
+ description="Enhance SKILL.md using Claude Code (local)",
168
+ )
169
+ enhance_parser.add_argument("skill_directory", help="Skill directory path")
170
+ enhance_parser.add_argument("--background", action="store_true", help="Run in background")
171
+ enhance_parser.add_argument("--daemon", action="store_true", help="Run as daemon")
172
+ enhance_parser.add_argument(
173
+ "--no-force", action="store_true", help="Disable force mode (enable confirmations)"
174
+ )
175
+ enhance_parser.add_argument("--timeout", type=int, default=600, help="Timeout in seconds")
176
+
177
+ # === enhance-status subcommand ===
178
+ enhance_status_parser = subparsers.add_parser(
179
+ "enhance-status",
180
+ help="Check enhancement status (for background/daemon modes)",
181
+ description="Monitor background enhancement processes",
182
+ )
183
+ enhance_status_parser.add_argument("skill_directory", help="Skill directory path")
184
+ enhance_status_parser.add_argument(
185
+ "--watch", "-w", action="store_true", help="Watch in real-time"
186
+ )
187
+ enhance_status_parser.add_argument("--json", action="store_true", help="JSON output")
188
+ enhance_status_parser.add_argument(
189
+ "--interval", type=int, default=2, help="Watch interval in seconds"
190
+ )
191
+
192
+ # === package subcommand ===
193
+ package_parser = subparsers.add_parser(
194
+ "package",
195
+ help="Package skill into .zip file",
196
+ description="Package skill directory into uploadable .zip",
197
+ )
198
+ package_parser.add_argument("skill_directory", help="Skill directory path")
199
+ package_parser.add_argument("--no-open", action="store_true", help="Don't open output folder")
200
+ package_parser.add_argument("--upload", action="store_true", help="Auto-upload after packaging")
201
+
202
+ # === upload subcommand ===
203
+ upload_parser = subparsers.add_parser(
204
+ "upload",
205
+ help="Upload skill to Claude",
206
+ description="Upload .zip file to Claude via Anthropic API",
207
+ )
208
+ upload_parser.add_argument("zip_file", help=".zip file to upload")
209
+ upload_parser.add_argument("--api-key", help="Anthropic API key")
210
+
211
+ # === estimate subcommand ===
212
+ estimate_parser = subparsers.add_parser(
213
+ "estimate",
214
+ help="Estimate page count before scraping",
215
+ description="Estimate total pages for documentation scraping",
216
+ )
217
+ estimate_parser.add_argument("config", nargs="?", help="Config JSON file")
218
+ estimate_parser.add_argument("--all", action="store_true", help="List all available configs")
219
+ estimate_parser.add_argument("--max-discovery", type=int, help="Max pages to discover")
220
+
221
+ # === extract-test-examples subcommand ===
222
+ test_examples_parser = subparsers.add_parser(
223
+ "extract-test-examples",
224
+ help="Extract usage examples from test files",
225
+ description="Analyze test files to extract real API usage patterns",
226
+ )
227
+ test_examples_parser.add_argument(
228
+ "directory", nargs="?", help="Directory containing test files"
229
+ )
230
+ test_examples_parser.add_argument("--file", help="Single test file to analyze")
231
+ test_examples_parser.add_argument(
232
+ "--language", help="Filter by programming language (python, javascript, etc.)"
233
+ )
234
+ test_examples_parser.add_argument(
235
+ "--min-confidence",
236
+ type=float,
237
+ default=0.5,
238
+ help="Minimum confidence threshold (0.0-1.0, default: 0.5)",
239
+ )
240
+ test_examples_parser.add_argument(
241
+ "--max-per-file", type=int, default=10, help="Maximum examples per file (default: 10)"
242
+ )
243
+ test_examples_parser.add_argument("--json", action="store_true", help="Output JSON format")
244
+ test_examples_parser.add_argument(
245
+ "--markdown", action="store_true", help="Output Markdown format"
246
+ )
247
+
248
+ # === install-agent subcommand ===
249
+ install_agent_parser = subparsers.add_parser(
250
+ "install-agent",
251
+ help="Install skill to AI agent directories",
252
+ description="Copy skill to agent-specific installation directories",
253
+ )
254
+ install_agent_parser.add_argument(
255
+ "skill_directory", help="Skill directory path (e.g., output/react/)"
256
+ )
257
+ install_agent_parser.add_argument(
258
+ "--agent",
259
+ required=True,
260
+ help="Agent name (claude, cursor, vscode, amp, goose, opencode, all)",
261
+ )
262
+ install_agent_parser.add_argument(
263
+ "--force", action="store_true", help="Overwrite existing installation without asking"
264
+ )
265
+ install_agent_parser.add_argument(
266
+ "--dry-run", action="store_true", help="Preview installation without making changes"
267
+ )
268
+
269
+ # === install subcommand ===
270
+ install_parser = subparsers.add_parser(
271
+ "install",
272
+ help="Complete workflow: fetch → scrape → enhance → package → upload",
273
+ description="One-command skill installation (AI enhancement MANDATORY)",
274
+ )
275
+ install_parser.add_argument(
276
+ "--config",
277
+ required=True,
278
+ help="Config name (e.g., 'react') or path (e.g., 'configs/custom.json')",
279
+ )
280
+ install_parser.add_argument(
281
+ "--destination", default="output", help="Output directory (default: output/)"
282
+ )
283
+ install_parser.add_argument(
284
+ "--no-upload", action="store_true", help="Skip automatic upload to Claude"
285
+ )
286
+ install_parser.add_argument(
287
+ "--unlimited", action="store_true", help="Remove page limits during scraping"
288
+ )
289
+ install_parser.add_argument(
290
+ "--dry-run", action="store_true", help="Preview workflow without executing"
291
+ )
292
+
293
+ # === resume subcommand ===
294
+ resume_parser = subparsers.add_parser(
295
+ "resume",
296
+ help="Resume interrupted scraping job",
297
+ description="Continue from saved progress checkpoint",
298
+ )
299
+ resume_parser.add_argument(
300
+ "job_id", nargs="?", help="Job ID to resume (or use --list to see available jobs)"
301
+ )
302
+ resume_parser.add_argument("--list", action="store_true", help="List all resumable jobs")
303
+ resume_parser.add_argument("--clean", action="store_true", help="Clean up old progress files")
304
+
305
+ return parser
306
+
307
+
308
+ def main(argv: list[str] | None = None) -> int:
309
+ """Main entry point for the unified CLI.
310
+
311
+ Args:
312
+ argv: Command-line arguments (defaults to sys.argv)
313
+
314
+ Returns:
315
+ Exit code (0 for success, non-zero for error)
316
+ """
317
+ parser = create_parser()
318
+ args = parser.parse_args(argv)
319
+
320
+ if not args.command:
321
+ parser.print_help()
322
+ return 1
323
+
324
+ # Delegate to the appropriate tool
325
+ try:
326
+ if args.command == "config":
327
+ from skill_seekers.cli.config_command import main as config_main
328
+
329
+ sys.argv = ["config_command.py"]
330
+ if args.github:
331
+ sys.argv.append("--github")
332
+ if args.api_keys:
333
+ sys.argv.append("--api-keys")
334
+ if args.show:
335
+ sys.argv.append("--show")
336
+ if args.test:
337
+ sys.argv.append("--test")
338
+ return config_main() or 0
339
+
340
+ elif args.command == "scrape":
341
+ from skill_seekers.cli.doc_scraper import main as scrape_main
342
+
343
+ # Convert args namespace to sys.argv format for doc_scraper
344
+ sys.argv = ["doc_scraper.py"]
345
+ # Add positional URL if provided (positional arg has priority)
346
+ if hasattr(args, 'url') and args.url:
347
+ sys.argv.append(args.url)
348
+ if args.config:
349
+ sys.argv.extend(["--config", args.config])
350
+ if args.name:
351
+ sys.argv.extend(["--name", args.name])
352
+ if args.description:
353
+ sys.argv.extend(["--description", args.description])
354
+ if hasattr(args, 'max_pages') and args.max_pages:
355
+ sys.argv.extend(["--max-pages", str(args.max_pages)])
356
+ if args.skip_scrape:
357
+ sys.argv.append("--skip-scrape")
358
+ if args.enhance:
359
+ sys.argv.append("--enhance")
360
+ if args.enhance_local:
361
+ sys.argv.append("--enhance-local")
362
+ if args.dry_run:
363
+ sys.argv.append("--dry-run")
364
+ if args.async_mode:
365
+ sys.argv.append("--async")
366
+ if args.workers:
367
+ sys.argv.extend(["--workers", str(args.workers)])
368
+ return scrape_main() or 0
369
+
370
+ elif args.command == "github":
371
+ from skill_seekers.cli.github_scraper import main as github_main
372
+
373
+ sys.argv = ["github_scraper.py"]
374
+ if args.config:
375
+ sys.argv.extend(["--config", args.config])
376
+ if args.repo:
377
+ sys.argv.extend(["--repo", args.repo])
378
+ if args.name:
379
+ sys.argv.extend(["--name", args.name])
380
+ if args.description:
381
+ sys.argv.extend(["--description", args.description])
382
+ if args.enhance:
383
+ sys.argv.append("--enhance")
384
+ if args.enhance_local:
385
+ sys.argv.append("--enhance-local")
386
+ if args.api_key:
387
+ sys.argv.extend(["--api-key", args.api_key])
388
+ if args.non_interactive:
389
+ sys.argv.append("--non-interactive")
390
+ if args.profile:
391
+ sys.argv.extend(["--profile", args.profile])
392
+ return github_main() or 0
393
+
394
+ elif args.command == "pdf":
395
+ from skill_seekers.cli.pdf_scraper import main as pdf_main
396
+
397
+ sys.argv = ["pdf_scraper.py"]
398
+ if args.config:
399
+ sys.argv.extend(["--config", args.config])
400
+ if args.pdf:
401
+ sys.argv.extend(["--pdf", args.pdf])
402
+ if args.name:
403
+ sys.argv.extend(["--name", args.name])
404
+ if args.description:
405
+ sys.argv.extend(["--description", args.description])
406
+ if args.from_json:
407
+ sys.argv.extend(["--from-json", args.from_json])
408
+ return pdf_main() or 0
409
+
410
+ elif args.command == "unified":
411
+ from skill_seekers.cli.unified_scraper import main as unified_main
412
+
413
+ sys.argv = ["unified_scraper.py", "--config", args.config]
414
+ if args.merge_mode:
415
+ sys.argv.extend(["--merge-mode", args.merge_mode])
416
+ if args.fresh:
417
+ sys.argv.append("--fresh")
418
+ if args.dry_run:
419
+ sys.argv.append("--dry-run")
420
+ return unified_main() or 0
421
+
422
+ elif args.command == "enhance":
423
+ from skill_seekers.cli.enhance_skill_local import main as enhance_main
424
+
425
+ sys.argv = ["enhance_skill_local.py", args.skill_directory]
426
+ if args.background:
427
+ sys.argv.append("--background")
428
+ if args.daemon:
429
+ sys.argv.append("--daemon")
430
+ if args.no_force:
431
+ sys.argv.append("--no-force")
432
+ if args.timeout:
433
+ sys.argv.extend(["--timeout", str(args.timeout)])
434
+ return enhance_main() or 0
435
+
436
+ elif args.command == "enhance-status":
437
+ from skill_seekers.cli.enhance_status import main as enhance_status_main
438
+
439
+ sys.argv = ["enhance_status.py", args.skill_directory]
440
+ if args.watch:
441
+ sys.argv.append("--watch")
442
+ if args.json:
443
+ sys.argv.append("--json")
444
+ if args.interval:
445
+ sys.argv.extend(["--interval", str(args.interval)])
446
+ return enhance_status_main() or 0
447
+
448
+ elif args.command == "package":
449
+ from skill_seekers.cli.package_skill import main as package_main
450
+
451
+ sys.argv = ["package_skill.py", args.skill_directory]
452
+ if args.no_open:
453
+ sys.argv.append("--no-open")
454
+ if args.upload:
455
+ sys.argv.append("--upload")
456
+ return package_main() or 0
457
+
458
+ elif args.command == "upload":
459
+ from skill_seekers.cli.upload_skill import main as upload_main
460
+
461
+ sys.argv = ["upload_skill.py", args.zip_file]
462
+ if args.api_key:
463
+ sys.argv.extend(["--api-key", args.api_key])
464
+ return upload_main() or 0
465
+
466
+ elif args.command == "estimate":
467
+ from skill_seekers.cli.estimate_pages import main as estimate_main
468
+
469
+ sys.argv = ["estimate_pages.py"]
470
+ if args.all:
471
+ sys.argv.append("--all")
472
+ elif args.config:
473
+ sys.argv.append(args.config)
474
+ if args.max_discovery:
475
+ sys.argv.extend(["--max-discovery", str(args.max_discovery)])
476
+ return estimate_main() or 0
477
+
478
+ elif args.command == "extract-test-examples":
479
+ from skill_seekers.cli.test_example_extractor import main as test_examples_main
480
+
481
+ sys.argv = ["test_example_extractor.py"]
482
+ if args.directory:
483
+ sys.argv.append(args.directory)
484
+ if args.file:
485
+ sys.argv.extend(["--file", args.file])
486
+ if args.language:
487
+ sys.argv.extend(["--language", args.language])
488
+ if args.min_confidence:
489
+ sys.argv.extend(["--min-confidence", str(args.min_confidence)])
490
+ if args.max_per_file:
491
+ sys.argv.extend(["--max-per-file", str(args.max_per_file)])
492
+ if args.json:
493
+ sys.argv.append("--json")
494
+ if args.markdown:
495
+ sys.argv.append("--markdown")
496
+ return test_examples_main() or 0
497
+
498
+ elif args.command == "install-agent":
499
+ from skill_seekers.cli.install_agent import main as install_agent_main
500
+
501
+ sys.argv = ["install_agent.py", args.skill_directory, "--agent", args.agent]
502
+ if args.force:
503
+ sys.argv.append("--force")
504
+ if args.dry_run:
505
+ sys.argv.append("--dry-run")
506
+ return install_agent_main() or 0
507
+
508
+ elif args.command == "install":
509
+ from skill_seekers.cli.install_skill import main as install_main
510
+
511
+ sys.argv = ["install_skill.py"]
512
+ if args.config:
513
+ sys.argv.extend(["--config", args.config])
514
+ if args.destination:
515
+ sys.argv.extend(["--destination", args.destination])
516
+ if args.no_upload:
517
+ sys.argv.append("--no-upload")
518
+ if args.unlimited:
519
+ sys.argv.append("--unlimited")
520
+ if args.dry_run:
521
+ sys.argv.append("--dry-run")
522
+ return install_main() or 0
523
+
524
+ elif args.command == "resume":
525
+ from skill_seekers.cli.resume_command import main as resume_main
526
+
527
+ sys.argv = ["resume_command.py"]
528
+ if args.job_id:
529
+ sys.argv.append(args.job_id)
530
+ if args.list:
531
+ sys.argv.append("--list")
532
+ if args.clean:
533
+ sys.argv.append("--clean")
534
+ return resume_main() or 0
535
+
536
+ else:
537
+ print(f"Error: Unknown command '{args.command}'", file=sys.stderr)
538
+ parser.print_help()
539
+ return 1
540
+
541
+ except KeyboardInterrupt:
542
+ print("\n\nInterrupted by user", file=sys.stderr)
543
+ return 130
544
+ except Exception as e:
545
+ # Provide helpful error message
546
+ error_msg = str(e) if str(e) else f"{type(e).__name__} occurred"
547
+ print(f"Error: {error_msg}", file=sys.stderr)
548
+
549
+ # Show traceback in verbose mode (if -v flag exists in args)
550
+ import traceback
551
+ if hasattr(args, 'verbose') and getattr(args, 'verbose', False):
552
+ traceback.print_exc()
553
+
554
+ return 1
555
+
556
+
557
+ if __name__ == "__main__":
558
+ sys.exit(main())
@@ -0,0 +1,132 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Markdown Cleaner Utility
4
+
5
+ Removes HTML tags and bloat from markdown content while preserving structure.
6
+ Used to clean README files and other documentation for skill generation.
7
+ """
8
+
9
+ import re
10
+
11
+
12
+ class MarkdownCleaner:
13
+ """Clean HTML from markdown while preserving structure"""
14
+
15
+ @staticmethod
16
+ def remove_html_tags(text: str) -> str:
17
+ """
18
+ Remove HTML tags while preserving text content.
19
+
20
+ Args:
21
+ text: Markdown text possibly containing HTML
22
+
23
+ Returns:
24
+ Cleaned markdown with HTML tags removed
25
+ """
26
+ # Remove HTML comments
27
+ text = re.sub(r"<!--.*?-->", "", text, flags=re.DOTALL)
28
+
29
+ # Remove HTML tags but keep content
30
+ text = re.sub(r"<[^>]+>", "", text)
31
+
32
+ # Remove empty lines created by HTML removal
33
+ text = re.sub(r"\n\s*\n\s*\n+", "\n\n", text)
34
+
35
+ return text.strip()
36
+
37
+ @staticmethod
38
+ def extract_first_section(text: str, max_chars: int = 500) -> str:
39
+ """
40
+ Extract first meaningful content, respecting markdown structure.
41
+
42
+ Captures content including section headings up to max_chars.
43
+ For short READMEs, includes everything. For longer ones, extracts
44
+ intro + first few sections (e.g., installation, quick start).
45
+
46
+ Args:
47
+ text: Full markdown text
48
+ max_chars: Maximum characters to extract
49
+
50
+ Returns:
51
+ First section content (cleaned, including headings)
52
+ """
53
+ # Remove HTML first
54
+ text = MarkdownCleaner.remove_html_tags(text)
55
+
56
+ # If text is short, return it all
57
+ if len(text) <= max_chars:
58
+ return text.strip()
59
+
60
+ # For longer text, extract smartly
61
+ lines = text.split("\n")
62
+ content_lines = []
63
+ char_count = 0
64
+ section_count = 0
65
+ in_code_block = False # Track code fence state to avoid truncating mid-block
66
+
67
+ for line in lines:
68
+ # Check for code fence (```)
69
+ if line.strip().startswith("```"):
70
+ in_code_block = not in_code_block
71
+
72
+ # Check for any heading (H1-H6)
73
+ is_heading = re.match(r"^#{1,6}\s+", line)
74
+
75
+ if is_heading:
76
+ section_count += 1
77
+ # Include first 4 sections (title + 3 sections like Installation, Quick Start, Features)
78
+ if section_count <= 4:
79
+ content_lines.append(line)
80
+ char_count += len(line)
81
+ else:
82
+ # Stop after 4 sections (but not if in code block)
83
+ if not in_code_block:
84
+ break
85
+ else:
86
+ # Include content
87
+ content_lines.append(line)
88
+ char_count += len(line)
89
+
90
+ # Stop if we have enough content (but not if in code block)
91
+ if char_count >= max_chars and not in_code_block:
92
+ break
93
+
94
+ result = "\n".join(content_lines).strip()
95
+
96
+ # If we truncated, ensure we don't break markdown (only if not in code block)
97
+ if char_count >= max_chars and not in_code_block:
98
+ # Find last complete sentence
99
+ result = MarkdownCleaner._truncate_at_sentence(result, max_chars)
100
+
101
+ return result
102
+
103
+ @staticmethod
104
+ def _truncate_at_sentence(text: str, max_chars: int) -> str:
105
+ """
106
+ Truncate at last complete sentence before max_chars.
107
+
108
+ Args:
109
+ text: Text to truncate
110
+ max_chars: Maximum character count
111
+
112
+ Returns:
113
+ Truncated text ending at sentence boundary
114
+ """
115
+ if len(text) <= max_chars:
116
+ return text
117
+
118
+ # Find last sentence boundary before max_chars
119
+ truncated = text[:max_chars]
120
+
121
+ # Look for last period, exclamation, or question mark
122
+ last_sentence = max(truncated.rfind(". "), truncated.rfind("! "), truncated.rfind("? "))
123
+
124
+ if last_sentence > max_chars // 2: # At least half the content
125
+ return truncated[: last_sentence + 1]
126
+
127
+ # Fall back to word boundary
128
+ last_space = truncated.rfind(" ")
129
+ if last_space > 0:
130
+ return truncated[:last_space] + "..."
131
+
132
+ return truncated + "..."