bmad-plus 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. package/CHANGELOG.md +75 -0
  2. package/README.md +482 -0
  3. package/osint-agent-package/README.md +88 -0
  4. package/osint-agent-package/SETUP_KEYS.md +108 -0
  5. package/osint-agent-package/agents/osint-investigator.md +80 -0
  6. package/osint-agent-package/install.ps1 +87 -0
  7. package/osint-agent-package/install.sh +76 -0
  8. package/osint-agent-package/skills/bmad-osint-investigate/SKILL.md +147 -0
  9. package/osint-agent-package/skills/bmad-osint-investigate/osint/SKILL.md +452 -0
  10. package/osint-agent-package/skills/bmad-osint-investigate/osint/assets/dossier-template.md +116 -0
  11. package/osint-agent-package/skills/bmad-osint-investigate/osint/references/content-extraction.md +100 -0
  12. package/osint-agent-package/skills/bmad-osint-investigate/osint/references/enrichment-databases-fr.md +148 -0
  13. package/osint-agent-package/skills/bmad-osint-investigate/osint/references/platforms.md +130 -0
  14. package/osint-agent-package/skills/bmad-osint-investigate/osint/references/psychoprofile.md +69 -0
  15. package/osint-agent-package/skills/bmad-osint-investigate/osint/references/tools.md +281 -0
  16. package/osint-agent-package/skills/bmad-osint-investigate/osint/scripts/_http.py +101 -0
  17. package/osint-agent-package/skills/bmad-osint-investigate/osint/scripts/apify.py +260 -0
  18. package/osint-agent-package/skills/bmad-osint-investigate/osint/scripts/brightdata.py +101 -0
  19. package/osint-agent-package/skills/bmad-osint-investigate/osint/scripts/diagnose.py +141 -0
  20. package/osint-agent-package/skills/bmad-osint-investigate/osint/scripts/exa.py +79 -0
  21. package/osint-agent-package/skills/bmad-osint-investigate/osint/scripts/jina.py +71 -0
  22. package/osint-agent-package/skills/bmad-osint-investigate/osint/scripts/mcp-client.py +136 -0
  23. package/osint-agent-package/skills/bmad-osint-investigate/osint/scripts/parallel.py +85 -0
  24. package/osint-agent-package/skills/bmad-osint-investigate/osint/scripts/perplexity.py +102 -0
  25. package/osint-agent-package/skills/bmad-osint-investigate/osint/scripts/tavily.py +72 -0
  26. package/osint-agent-package/skills/bmad-osint-investigate/osint/scripts/volley.py +208 -0
  27. package/osint-agent-package/skills/bmad-osint-investigator/SKILL.md +15 -0
  28. package/package.json +51 -0
  29. package/readme-international/README.de.md +392 -0
  30. package/readme-international/README.es.md +484 -0
  31. package/readme-international/README.fr.md +482 -0
  32. package/src/bmad-plus/agents/agent-architect-dev/SKILL.md +96 -0
  33. package/src/bmad-plus/agents/agent-architect-dev/bmad-skill-manifest.yaml +13 -0
  34. package/src/bmad-plus/agents/agent-maker/SKILL.md +201 -0
  35. package/src/bmad-plus/agents/agent-maker/bmad-skill-manifest.yaml +13 -0
  36. package/src/bmad-plus/agents/agent-orchestrator/SKILL.md +137 -0
  37. package/src/bmad-plus/agents/agent-orchestrator/bmad-skill-manifest.yaml +13 -0
  38. package/src/bmad-plus/agents/agent-quality/SKILL.md +83 -0
  39. package/src/bmad-plus/agents/agent-quality/bmad-skill-manifest.yaml +13 -0
  40. package/src/bmad-plus/agents/agent-shadow/SKILL.md +71 -0
  41. package/src/bmad-plus/agents/agent-shadow/bmad-skill-manifest.yaml +13 -0
  42. package/src/bmad-plus/agents/agent-strategist/SKILL.md +80 -0
  43. package/src/bmad-plus/agents/agent-strategist/bmad-skill-manifest.yaml +13 -0
  44. package/src/bmad-plus/data/role-triggers.yaml +209 -0
  45. package/src/bmad-plus/module-help.csv +10 -0
  46. package/src/bmad-plus/module.yaml +174 -0
  47. package/src/bmad-plus/skills/bmad-plus-autopilot/SKILL.md +99 -0
  48. package/src/bmad-plus/skills/bmad-plus-parallel/SKILL.md +93 -0
  49. package/src/bmad-plus/skills/bmad-plus-sync/SKILL.md +69 -0
  50. package/tools/bmad-plus-npx.js +33 -0
  51. package/tools/cli/bmad-plus-cli.js +50 -0
  52. package/tools/cli/commands/install.js +437 -0
  53. package/tools/cli/commands/uninstall.js +70 -0
@@ -0,0 +1,101 @@
1
+ """Shared HTTP utilities for OSINT scripts — stdlib only, zero dependencies."""
2
+
3
+ import http.client
4
+ import json
5
+ import os
6
+ import ssl
7
+ import sys
8
+ from urllib.parse import urlparse, urlencode, quote
9
+
10
+
11
+ def https_request(method, url, headers=None, body=None, timeout=120):
12
+ """Make an HTTPS request and return (status, headers, body_str).
13
+
14
+ Handles both http and https URLs.
15
+ """
16
+ parsed = urlparse(url)
17
+ scheme = parsed.scheme or "https"
18
+ host = parsed.hostname
19
+ port = parsed.port or (443 if scheme == "https" else 80)
20
+ path = parsed.path or "/"
21
+ if parsed.query:
22
+ path += "?" + parsed.query
23
+
24
+ if scheme == "https":
25
+ ctx = ssl.create_default_context()
26
+ conn = http.client.HTTPSConnection(host, port, timeout=timeout, context=ctx)
27
+ else:
28
+ conn = http.client.HTTPConnection(host, port, timeout=timeout)
29
+
30
+ hdrs = {"User-Agent": "osint-skill/3.2-python"}
31
+ if headers:
32
+ hdrs.update(headers)
33
+
34
+ conn.request(method, path, body=body, headers=hdrs)
35
+ resp = conn.getresponse()
36
+ data = resp.read().decode("utf-8", errors="replace")
37
+ status = resp.status
38
+ resp_headers = dict(resp.getheaders())
39
+ conn.close()
40
+ return status, resp_headers, data
41
+
42
+
43
+ def api_post(url, payload, headers=None, timeout=120):
44
+ """POST JSON payload and return parsed JSON response."""
45
+ hdrs = {"Content-Type": "application/json"}
46
+ if headers:
47
+ hdrs.update(headers)
48
+ body = json.dumps(payload) if isinstance(payload, dict) else payload
49
+ status, _, data = https_request("POST", url, headers=hdrs, body=body, timeout=timeout)
50
+ if status >= 400:
51
+ print(f"ERROR: HTTP {status}: {data[:300]}", file=sys.stderr)
52
+ return None
53
+ try:
54
+ return json.loads(data)
55
+ except json.JSONDecodeError:
56
+ print(f"ERROR: Invalid JSON response: {data[:300]}", file=sys.stderr)
57
+ return None
58
+
59
+
60
+ def api_get(url, headers=None, timeout=120):
61
+ """GET request and return parsed JSON response."""
62
+ status, _, data = https_request("GET", url, headers=headers, timeout=timeout)
63
+ if status >= 400:
64
+ print(f"ERROR: HTTP {status}: {data[:300]}", file=sys.stderr)
65
+ return None
66
+ try:
67
+ return json.loads(data)
68
+ except json.JSONDecodeError:
69
+ # Return raw text if not JSON
70
+ return {"raw": data[:5000]}
71
+
72
+
73
+ def get_key(env_var, file_fallback=None, required=True, help_url=""):
74
+ """Load API key from environment or fallback file."""
75
+ val = os.environ.get(env_var, "")
76
+ if val:
77
+ return val
78
+ if file_fallback and os.path.isfile(file_fallback):
79
+ with open(file_fallback, "r") as f:
80
+ return f.readline().strip()
81
+ if required:
82
+ print(f"ERROR: {env_var} not set.", file=sys.stderr)
83
+ if help_url:
84
+ print(f"Get one at: {help_url}", file=sys.stderr)
85
+ sys.exit(1)
86
+ return ""
87
+
88
+
89
+ def get_workspace():
90
+ """Get workspace root (2 levels up from scripts dir)."""
91
+ scripts_dir = os.path.dirname(os.path.abspath(__file__))
92
+ skill_dir = os.path.dirname(scripts_dir)
93
+ workspace = os.path.dirname(os.path.dirname(skill_dir))
94
+ return workspace, skill_dir, scripts_dir
95
+
96
+
97
+ def truncate(text, max_len=200):
98
+ """Truncate text with ellipsis."""
99
+ if not text:
100
+ return ""
101
+ return text[:max_len] + "..." if len(text) > max_len else text
@@ -0,0 +1,260 @@
1
+ #!/usr/bin/env python3
2
+ """Apify API — universal actor runner + LinkedIn/Instagram shortcuts.
3
+
4
+ Replaces: apify.sh, run-actor.sh, run_actor.js
5
+ Stdlib only, zero dependencies.
6
+
7
+ Usage:
8
+ python apify.py run <actor_id> '<json_input>' [--output file] [--format csv|json]
9
+ python apify.py linkedin <profile_url>
10
+ python apify.py instagram <username>
11
+ python apify.py results <run_id>
12
+ python apify.py status <run_id>
13
+ python apify.py store-search <query>
14
+ """
15
+
16
+ import json
17
+ import os
18
+ import sys
19
+ import time
20
+
21
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
22
+ from _http import api_post, api_get, get_key, get_workspace
23
+
24
+ BASE = "https://api.apify.com/v2"
25
+ USER_AGENT = "osint-skill/3.2-python"
26
+
27
+
28
+ def init():
29
+ workspace, _, _ = get_workspace()
30
+ token = os.environ.get("APIFY_TOKEN") or os.environ.get("APIFY_API_TOKEN")
31
+ if not token:
32
+ fallback = os.path.join(workspace, "scripts", "apify-api-token.txt")
33
+ if os.path.isfile(fallback):
34
+ with open(fallback) as f:
35
+ token = f.readline().strip()
36
+ if not token:
37
+ print("ERROR: No Apify token found.", file=sys.stderr)
38
+ print("Set APIFY_API_TOKEN env var, or put token in scripts/apify-api-token.txt", file=sys.stderr)
39
+ print("Get one at: https://console.apify.com/account/integrations", file=sys.stderr)
40
+ sys.exit(1)
41
+ return token
42
+
43
+
44
+ def start_actor(token, actor_id, input_json):
45
+ """Start an Apify actor and return (run_id, dataset_id)."""
46
+ api_actor = actor_id.replace("/", "~")
47
+ url = f"{BASE}/acts/{api_actor}/runs?token={token}"
48
+ try:
49
+ input_data = json.loads(input_json) if isinstance(input_json, str) else input_json
50
+ except json.JSONDecodeError as e:
51
+ print(f"ERROR: Invalid JSON input: {e}", file=sys.stderr)
52
+ sys.exit(1)
53
+ data = api_post(url, input_data)
54
+ if not data:
55
+ sys.exit(1)
56
+ run_data = data.get("data", data)
57
+ return run_data.get("id"), run_data.get("defaultDatasetId")
58
+
59
+
60
+ def poll_until_complete(token, run_id, timeout=600, interval=5):
61
+ """Poll actor run until complete. Returns final status."""
62
+ url = f"{BASE}/actor-runs/{run_id}?token={token}"
63
+ start_time = time.time()
64
+ last_status = None
65
+
66
+ while True:
67
+ data = api_get(url)
68
+ if not data:
69
+ print("ERROR: Failed to get run status", file=sys.stderr)
70
+ sys.exit(1)
71
+ status = data.get("data", data).get("status", "UNKNOWN")
72
+ if status != last_status:
73
+ print(f"Status: {status}")
74
+ last_status = status
75
+ if status in ("SUCCEEDED", "FAILED", "ABORTED", "TIMED-OUT"):
76
+ return status
77
+ elapsed = time.time() - start_time
78
+ if elapsed > timeout:
79
+ print(f"WARNING: Timeout after {timeout}s, actor still running")
80
+ return "TIMED-OUT"
81
+ time.sleep(interval)
82
+
83
+
84
+ def download_results(token, dataset_id, output_path=None, fmt="json"):
85
+ """Download actor results. If no output_path, display top 5."""
86
+ url = f"{BASE}/datasets/{dataset_id}/items?token={token}&format=json"
87
+ data = api_get(url)
88
+ if not data:
89
+ return
90
+ items = data if isinstance(data, list) else [data]
91
+
92
+ if output_path:
93
+ if fmt == "json":
94
+ with open(output_path, "w", encoding="utf-8") as f:
95
+ json.dump(items, f, indent=2, ensure_ascii=False)
96
+ else:
97
+ # CSV
98
+ if items:
99
+ fields = list(items[0].keys())
100
+ lines = [",".join(fields)]
101
+ for row in items:
102
+ vals = []
103
+ for k in fields:
104
+ v = row.get(k, "")
105
+ if isinstance(v, str) and len(v) > 200:
106
+ v = v[:200] + "..."
107
+ elif isinstance(v, (list, dict)):
108
+ v = json.dumps(v, ensure_ascii=False)
109
+ if v is None:
110
+ v = ""
111
+ v = str(v)
112
+ if "," in v or '"' in v or "\n" in v:
113
+ v = f'"{v.replace(chr(34), chr(34)+chr(34))}"'
114
+ vals.append(v)
115
+ lines.append(",".join(vals))
116
+ with open(output_path, "w", encoding="utf-8") as f:
117
+ f.write("\n".join(lines))
118
+ print(f"Saved to: {output_path}")
119
+ print(f"Records: {len(items)}")
120
+ else:
121
+ # Display top 5
122
+ total = len(items)
123
+ if total == 0:
124
+ print("\nNo results found.")
125
+ return
126
+ print(f"\n{'='*60}")
127
+ print(f"TOP 5 RESULTS (of {total} total)")
128
+ print("=" * 60)
129
+ for i, item in enumerate(items[:5]):
130
+ print(f"\n--- Result {i+1} ---")
131
+ for key, value in item.items():
132
+ display = value
133
+ if isinstance(value, str) and len(value) > 100:
134
+ display = value[:100] + "..."
135
+ elif isinstance(value, (list, dict)):
136
+ s = json.dumps(value, ensure_ascii=False)
137
+ display = s[:100] + "..." if len(s) > 100 else s
138
+ print(f" {key}: {display}")
139
+ print(f"\n{'='*60}")
140
+ if total > 5:
141
+ print(f"Showing 5 of {total} results.")
142
+ print(f"Full data: https://console.apify.com/storage/datasets/{dataset_id}")
143
+
144
+
145
+ def run_actor(token, actor_id, input_json, output=None, fmt="json", timeout=600):
146
+ """Full pipeline: start → poll → download."""
147
+ print(f"Starting actor: {actor_id}")
148
+ run_id, dataset_id = start_actor(token, actor_id, input_json)
149
+ print(f"Run ID: {run_id}")
150
+ print(f"Dataset ID: {dataset_id}")
151
+ status = poll_until_complete(token, run_id, timeout=timeout)
152
+ if status != "SUCCEEDED":
153
+ print(f"ERROR: Actor run {status}", file=sys.stderr)
154
+ print(f"Details: https://console.apify.com/actors/runs/{run_id}", file=sys.stderr)
155
+ sys.exit(1)
156
+ download_results(token, dataset_id, output, fmt)
157
+
158
+
159
+ def get_results(token, run_id):
160
+ """Get results from a completed run."""
161
+ data = api_get(f"{BASE}/actor-runs/{run_id}/dataset/items?token={token}")
162
+ if data:
163
+ print(json.dumps(data, indent=2, ensure_ascii=False)[:5000])
164
+
165
+
166
+ def get_status(token, run_id):
167
+ """Check run status."""
168
+ data = api_get(f"{BASE}/actor-runs/{run_id}?token={token}")
169
+ if data:
170
+ print(json.dumps(data, indent=2, ensure_ascii=False)[:2000])
171
+
172
+
173
+ def store_search(token, query):
174
+ """Search the Apify actor store."""
175
+ from urllib.parse import quote
176
+ data = api_get(f"{BASE}/store?token={token}&limit=10&search={quote(query)}")
177
+ if data:
178
+ items = data.get("data", {}).get("items", data if isinstance(data, list) else [])
179
+ for item in items[:10]:
180
+ print(f'📦 {item.get("title", item.get("name", ""))}')
181
+ print(f' ID: {item.get("id", item.get("actorId", ""))}')
182
+ desc = item.get("description", "")[:150]
183
+ if desc:
184
+ print(f" {desc}")
185
+ print()
186
+
187
+
188
+ def main():
189
+ if len(sys.argv) < 2:
190
+ print(__doc__)
191
+ sys.exit(1)
192
+ token = init()
193
+ cmd = sys.argv[1]
194
+
195
+ if cmd == "run":
196
+ if len(sys.argv) < 4:
197
+ print("Usage: apify.py run <actor_id> '<json_input>' [--output file] [--format csv|json]")
198
+ sys.exit(1)
199
+ actor_id = sys.argv[2]
200
+ input_json = sys.argv[3]
201
+ # Parse optional args
202
+ output = None
203
+ fmt = "json"
204
+ i = 4
205
+ while i < len(sys.argv):
206
+ if sys.argv[i] == "--output" and i + 1 < len(sys.argv):
207
+ output = sys.argv[i + 1]
208
+ i += 2
209
+ elif sys.argv[i] == "--format" and i + 1 < len(sys.argv):
210
+ fmt = sys.argv[i + 1]
211
+ i += 2
212
+ else:
213
+ i += 1
214
+ run_actor(token, actor_id, input_json, output, fmt)
215
+
216
+ elif cmd == "linkedin":
217
+ url = sys.argv[2] if len(sys.argv) > 2 else ""
218
+ if not url:
219
+ print("Usage: apify.py linkedin <profile_url>", file=sys.stderr)
220
+ sys.exit(1)
221
+ run_actor(token, "supreme_coder/linkedin-profile-scraper",
222
+ json.dumps({"urls": [{"url": url}]}))
223
+
224
+ elif cmd == "instagram":
225
+ username = sys.argv[2] if len(sys.argv) > 2 else ""
226
+ if not username:
227
+ print("Usage: apify.py instagram <username>", file=sys.stderr)
228
+ sys.exit(1)
229
+ run_actor(token, "apify/instagram-profile-scraper",
230
+ json.dumps({"usernames": [username]}))
231
+
232
+ elif cmd == "results":
233
+ run_id = sys.argv[2] if len(sys.argv) > 2 else ""
234
+ if not run_id:
235
+ print("Usage: apify.py results <run_id>", file=sys.stderr)
236
+ sys.exit(1)
237
+ get_results(token, run_id)
238
+
239
+ elif cmd == "status":
240
+ run_id = sys.argv[2] if len(sys.argv) > 2 else ""
241
+ if not run_id:
242
+ print("Usage: apify.py status <run_id>", file=sys.stderr)
243
+ sys.exit(1)
244
+ get_status(token, run_id)
245
+
246
+ elif cmd == "store-search":
247
+ query = " ".join(sys.argv[2:])
248
+ if not query:
249
+ print("Usage: apify.py store-search <query>", file=sys.stderr)
250
+ sys.exit(1)
251
+ store_search(token, query)
252
+
253
+ else:
254
+ print(f"Unknown command: {cmd}", file=sys.stderr)
255
+ print(__doc__)
256
+ sys.exit(1)
257
+
258
+
259
+ if __name__ == "__main__":
260
+ main()
@@ -0,0 +1,101 @@
1
+ #!/usr/bin/env python3
2
+ """Bright Data MCP wrapper — scrape, search, search-geo, search-yandex. Stdlib only.
3
+
4
+ Uses mcp-client.py for MCP JSON-RPC calls.
5
+ """
6
+
7
+ import json
8
+ import os
9
+ import subprocess
10
+ import sys
11
+
12
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
13
+ from _http import get_key, get_workspace
14
+
15
+
16
+ def init():
17
+ workspace, _, _ = get_workspace()
18
+ return get_key("BRIGHTDATA_MCP_URL",
19
+ file_fallback=os.path.join(workspace, "scripts", "brightdata-mcp-url.txt"),
20
+ help_url="https://brightdata.com/products/web-scraper/mcp")
21
+
22
+
23
+ def mcp_call(mcp_url, tool_name, arguments):
24
+ """Call MCP tool via mcp-client.py."""
25
+ scripts_dir = os.path.dirname(os.path.abspath(__file__))
26
+ mcp_client = os.path.join(scripts_dir, "mcp-client.py")
27
+ args_json = json.dumps(arguments)
28
+ result = subprocess.run(
29
+ [sys.executable, mcp_client, mcp_url, tool_name, args_json],
30
+ capture_output=True, text=True, timeout=120
31
+ )
32
+ if result.stdout:
33
+ print(result.stdout)
34
+ if result.stderr:
35
+ print(result.stderr, file=sys.stderr)
36
+ return result.returncode
37
+
38
+
39
+ def list_tools(mcp_url):
40
+ """List available MCP tools."""
41
+ scripts_dir = os.path.dirname(os.path.abspath(__file__))
42
+ mcp_client = os.path.join(scripts_dir, "mcp-client.py")
43
+ result = subprocess.run(
44
+ [sys.executable, mcp_client, mcp_url, "--list-tools"],
45
+ capture_output=True, text=True, timeout=30
46
+ )
47
+ if result.stdout:
48
+ print(result.stdout)
49
+ if result.stderr:
50
+ print(result.stderr, file=sys.stderr)
51
+
52
+
53
+ def main():
54
+ if len(sys.argv) < 2:
55
+ print("Usage: brightdata.py {tools|scrape|scrape-batch|search|search-geo|search-yandex} <args>")
56
+ print()
57
+ print(" tools - list available MCP tools")
58
+ print(" scrape <url> - any URL → markdown (bypasses CAPTCHA)")
59
+ print(" scrape-batch <url1> <url2> - batch scrape up to 10 URLs")
60
+ print(" search <query> - Google search via Bright Data")
61
+ print(" search-geo <cc> <query> - geo-targeted search")
62
+ print(" search-yandex <query> - Yandex search")
63
+ sys.exit(1)
64
+
65
+ mcp_url = init()
66
+ cmd = sys.argv[1]
67
+
68
+ if cmd == "tools":
69
+ list_tools(mcp_url)
70
+ elif cmd == "scrape":
71
+ url = sys.argv[2] if len(sys.argv) > 2 else ""
72
+ if not url:
73
+ print("Usage: brightdata.py scrape <url>", file=sys.stderr)
74
+ sys.exit(1)
75
+ mcp_call(mcp_url, "scrape_as_markdown", {"url": url})
76
+ elif cmd == "scrape-batch":
77
+ urls = sys.argv[2:]
78
+ if not urls:
79
+ print("Usage: brightdata.py scrape-batch <url1> <url2>...", file=sys.stderr)
80
+ sys.exit(1)
81
+ mcp_call(mcp_url, "scrape_as_markdown_batch", {"urls": urls})
82
+ elif cmd == "search":
83
+ query = " ".join(sys.argv[2:])
84
+ mcp_call(mcp_url, "web_data_search_engine", {"query": query})
85
+ elif cmd == "search-geo":
86
+ if len(sys.argv) < 4:
87
+ print("Usage: brightdata.py search-geo <country_code> <query>", file=sys.stderr)
88
+ sys.exit(1)
89
+ geo = sys.argv[2]
90
+ query = " ".join(sys.argv[3:])
91
+ mcp_call(mcp_url, "web_data_search_engine", {"query": query, "country": geo})
92
+ elif cmd == "search-yandex":
93
+ query = " ".join(sys.argv[2:])
94
+ mcp_call(mcp_url, "web_data_search_engine", {"query": query, "engine": "yandex"})
95
+ else:
96
+ print(f"Unknown: {cmd}", file=sys.stderr)
97
+ sys.exit(1)
98
+
99
+
100
+ if __name__ == "__main__":
101
+ main()
@@ -0,0 +1,141 @@
1
+ #!/usr/bin/env python3
2
+ """OSINT Toolkit Diagnostic — checks available API keys, CLI tools, and capabilities."""
3
+
4
+ import os
5
+ import shutil
6
+ import sys
7
+
8
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
9
+ from _http import get_workspace
10
+
11
+
12
+ def check_key(env_var, file_path=None):
13
+ """Check if API key is available via env or file."""
14
+ if os.environ.get(env_var):
15
+ return True, f"(env)"
16
+ if file_path and os.path.isfile(file_path):
17
+ return True, f"(file: {os.path.basename(file_path)})"
18
+ return False, ""
19
+
20
+
21
+ def main():
22
+ workspace, skill_dir, scripts_dir = get_workspace()
23
+
24
+ print("=== OSINT TOOLKIT DIAGNOSTIC ===")
25
+ print()
26
+
27
+ # 1. API Tokens
28
+ print("📡 API Tokens:")
29
+ keys = [
30
+ ("APIFY_API_TOKEN", os.path.join(workspace, "scripts", "apify-api-token.txt"),
31
+ "https://console.apify.com/account/integrations"),
32
+ ("JINA_API_KEY", os.path.join(workspace, "scripts", "jina-api-key.txt"),
33
+ "https://jina.ai/api-key"),
34
+ ("PERPLEXITY_API_KEY", None, "https://perplexity.ai/settings/api"),
35
+ ("PARALLEL_API_KEY", os.path.join(workspace, "scripts", "parallel-api-key.txt"),
36
+ "https://platform.parallel.ai"),
37
+ ("EXA_API_KEY", None, "https://dashboard.exa.ai"),
38
+ ("TAVILY_API_KEY", None, "https://app.tavily.com/home"),
39
+ ("BRIGHTDATA_MCP_URL", os.path.join(workspace, "scripts", "brightdata-mcp-url.txt"),
40
+ "https://brightdata.com/products/web-scraper/mcp"),
41
+ ]
42
+ for env_var, file_path, url in keys:
43
+ found, source = check_key(env_var, file_path)
44
+ if found:
45
+ print(f" ✅ {env_var} {source}")
46
+ else:
47
+ print(f" ❌ {env_var} — get one at {url}")
48
+ print()
49
+
50
+ # 2. CLI Tools
51
+ print("🔧 CLI Tools:")
52
+ tools = [
53
+ ("python3", "python"),
54
+ ("node", None),
55
+ ("jq", None),
56
+ ("curl", None),
57
+ ("git", None),
58
+ ]
59
+ for tool_name, alt_name in tools:
60
+ path = shutil.which(tool_name) or (shutil.which(alt_name) if alt_name else None)
61
+ if path:
62
+ print(f" ✅ {tool_name}")
63
+ else:
64
+ print(f" ❌ {tool_name}")
65
+
66
+ # Check run_actor.js
67
+ run_actor_js = os.path.join(scripts_dir, "run_actor.js")
68
+ if os.path.isfile(run_actor_js):
69
+ print(f" ✅ run_actor.js (embedded, 55+ actors)")
70
+ else:
71
+ print(f" ❌ run_actor.js (missing)")
72
+
73
+ # Check Python scripts
74
+ py_scripts = ["perplexity.py", "tavily.py", "exa.py", "jina.py",
75
+ "parallel.py", "apify.py", "brightdata.py", "volley.py"]
76
+ for script in py_scripts:
77
+ path = os.path.join(scripts_dir, script)
78
+ if os.path.isfile(path):
79
+ print(f" ✅ {script}")
80
+ else:
81
+ print(f" ⚠️ {script} (not found)")
82
+ print()
83
+
84
+ # 3. Internal Intelligence
85
+ print("📱 Internal Intelligence:")
86
+ tg_path = os.path.join(workspace, "skills", "telegram", "scripts", "tg.py")
87
+ if os.path.isfile(tg_path):
88
+ print(" ✅ tg.py (Telegram history/search)")
89
+ else:
90
+ print(" ❌ tg.py (no Telegram access)")
91
+
92
+ himalaya = shutil.which("himalaya")
93
+ himalaya_local = os.path.expanduser("~/.local/bin/himalaya")
94
+ if himalaya or os.path.isfile(himalaya_local):
95
+ print(" ✅ himalaya (email search)")
96
+ else:
97
+ print(" ❌ himalaya (no email access)")
98
+
99
+ vault_crm = os.path.join(workspace, "vault", "crm")
100
+ if os.path.isdir(vault_crm):
101
+ count = sum(1 for f in os.listdir(vault_crm) if f.endswith(".md"))
102
+ print(f" ✅ vault/crm ({count} cards)")
103
+ else:
104
+ print(" ❌ vault/crm (no CRM vault)")
105
+ print()
106
+
107
+ # 4. Capability Summary
108
+ print("📊 Capabilities:")
109
+ apify_ok = check_key("APIFY_API_TOKEN", os.path.join(workspace, "scripts", "apify-api-token.txt"))[0]
110
+ bright_ok = check_key("BRIGHTDATA_MCP_URL", os.path.join(workspace, "scripts", "brightdata-mcp-url.txt"))[0]
111
+ jina_ok = check_key("JINA_API_KEY", os.path.join(workspace, "scripts", "jina-api-key.txt"))[0]
112
+ perp_ok = check_key("PERPLEXITY_API_KEY")[0]
113
+ tavily_ok = check_key("TAVILY_API_KEY")[0]
114
+ exa_ok = check_key("EXA_API_KEY")[0]
115
+ parallel_ok = check_key("PARALLEL_API_KEY", os.path.join(workspace, "scripts", "parallel-api-key.txt"))[0]
116
+
117
+ caps = [
118
+ (apify_ok, "LinkedIn scraping (Apify)"),
119
+ (apify_ok, "Instagram scraping (Apify)"),
120
+ (apify_ok, "TikTok scraping (Apify)"),
121
+ (apify_ok, "YouTube scraping (Apify)"),
122
+ (apify_ok, "Contact enrichment (Apify)"),
123
+ (apify_ok, "Google Maps (Apify)"),
124
+ (bright_ok, "Facebook scraping (Bright Data)"),
125
+ (bright_ok, "CAPTCHA bypass (Bright Data)"),
126
+ (jina_ok, "Deep search (Jina)"),
127
+ (perp_ok, "Quick answers (Perplexity Sonar)"),
128
+ (perp_ok, "Deep research (Perplexity Deep)"),
129
+ (tavily_ok, "Agent search (Tavily)"),
130
+ (exa_ok, "Semantic search (Exa)"),
131
+ (exa_ok, "People/Company search (Exa)"),
132
+ (parallel_ok, "AI search (Parallel)"),
133
+ ]
134
+ for available, name in caps:
135
+ print(f" {'✅' if available else '❌'} {name}")
136
+ print()
137
+ print("=== END DIAGNOSTIC ===")
138
+
139
+
140
+ if __name__ == "__main__":
141
+ main()
@@ -0,0 +1,79 @@
1
+ #!/usr/bin/env python3
2
+ """Exa AI — semantic search, company research, people search, crawl. Stdlib only."""
3
+
4
+ import json
5
+ import os
6
+ import sys
7
+
8
+ sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
9
+ from _http import api_post, get_key, truncate
10
+
11
+ BASE = "https://api.exa.ai"
12
+
13
+
14
+ def init():
15
+ return get_key("EXA_API_KEY", help_url="https://dashboard.exa.ai")
16
+
17
+
18
+ def search(api_key, query, category=None, max_chars=500, icon="🔗"):
19
+ """Exa semantic search."""
20
+ payload = {
21
+ "query": query,
22
+ "type": "auto",
23
+ "numResults": 10,
24
+ "contents": {"text": {"maxCharacters": max_chars}},
25
+ }
26
+ if category:
27
+ payload["category"] = category
28
+ data = api_post(f"{BASE}/search", payload,
29
+ headers={"x-api-key": api_key})
30
+ if not data:
31
+ return
32
+ for r in data.get("results", [])[:10]:
33
+ print(f'{icon} {r.get("title", "")}')
34
+ print(f' {r.get("url", "")}')
35
+ txt = truncate(r.get("text", ""), max_chars)
36
+ if txt:
37
+ print(f" {txt}")
38
+ print()
39
+
40
+
41
+ def crawl(api_key, url):
42
+ """Crawl a URL and extract content."""
43
+ data = api_post(f"{BASE}/contents",
44
+ {"urls": [url], "text": {"maxCharacters": 5000}},
45
+ headers={"x-api-key": api_key})
46
+ if not data:
47
+ return
48
+ for r in data.get("results", []):
49
+ print(f'📄 {r.get("title", "")}')
50
+ print(f' {r.get("url", "")}')
51
+ print(r.get("text", "")[:3000])
52
+
53
+
54
+ def main():
55
+ if len(sys.argv) < 3:
56
+ print("Usage: exa.py search|company|people|crawl|deep <query>")
57
+ sys.exit(1)
58
+ api_key = init()
59
+ cmd = sys.argv[1]
60
+ query = " ".join(sys.argv[2:])
61
+ if cmd == "search":
62
+ search(api_key, query)
63
+ elif cmd == "company":
64
+ search(api_key, f"{query} company information about",
65
+ category="company", max_chars=1000, icon="🏢")
66
+ elif cmd == "people":
67
+ search(api_key, query, category="personal site", icon="👤")
68
+ elif cmd == "crawl":
69
+ crawl(api_key, query)
70
+ elif cmd == "deep":
71
+ print("🔬 Exa Deep Research — use MCP or dashboard")
72
+ print(" MCP: https://mcp.exa.ai/mcp?tools=deep_researcher_start,deep_researcher_check")
73
+ else:
74
+ print(f"Unknown: {cmd} (use search|company|people|crawl|deep)", file=sys.stderr)
75
+ sys.exit(1)
76
+
77
+
78
+ if __name__ == "__main__":
79
+ main()