bmad-plus 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +75 -0
- package/README.md +482 -0
- package/osint-agent-package/README.md +88 -0
- package/osint-agent-package/SETUP_KEYS.md +108 -0
- package/osint-agent-package/agents/osint-investigator.md +80 -0
- package/osint-agent-package/install.ps1 +87 -0
- package/osint-agent-package/install.sh +76 -0
- package/osint-agent-package/skills/bmad-osint-investigate/SKILL.md +147 -0
- package/osint-agent-package/skills/bmad-osint-investigate/osint/SKILL.md +452 -0
- package/osint-agent-package/skills/bmad-osint-investigate/osint/assets/dossier-template.md +116 -0
- package/osint-agent-package/skills/bmad-osint-investigate/osint/references/content-extraction.md +100 -0
- package/osint-agent-package/skills/bmad-osint-investigate/osint/references/enrichment-databases-fr.md +148 -0
- package/osint-agent-package/skills/bmad-osint-investigate/osint/references/platforms.md +130 -0
- package/osint-agent-package/skills/bmad-osint-investigate/osint/references/psychoprofile.md +69 -0
- package/osint-agent-package/skills/bmad-osint-investigate/osint/references/tools.md +281 -0
- package/osint-agent-package/skills/bmad-osint-investigate/osint/scripts/_http.py +101 -0
- package/osint-agent-package/skills/bmad-osint-investigate/osint/scripts/apify.py +260 -0
- package/osint-agent-package/skills/bmad-osint-investigate/osint/scripts/brightdata.py +101 -0
- package/osint-agent-package/skills/bmad-osint-investigate/osint/scripts/diagnose.py +141 -0
- package/osint-agent-package/skills/bmad-osint-investigate/osint/scripts/exa.py +79 -0
- package/osint-agent-package/skills/bmad-osint-investigate/osint/scripts/jina.py +71 -0
- package/osint-agent-package/skills/bmad-osint-investigate/osint/scripts/mcp-client.py +136 -0
- package/osint-agent-package/skills/bmad-osint-investigate/osint/scripts/parallel.py +85 -0
- package/osint-agent-package/skills/bmad-osint-investigate/osint/scripts/perplexity.py +102 -0
- package/osint-agent-package/skills/bmad-osint-investigate/osint/scripts/tavily.py +72 -0
- package/osint-agent-package/skills/bmad-osint-investigate/osint/scripts/volley.py +208 -0
- package/osint-agent-package/skills/bmad-osint-investigator/SKILL.md +15 -0
- package/package.json +51 -0
- package/readme-international/README.de.md +392 -0
- package/readme-international/README.es.md +484 -0
- package/readme-international/README.fr.md +482 -0
- package/src/bmad-plus/agents/agent-architect-dev/SKILL.md +96 -0
- package/src/bmad-plus/agents/agent-architect-dev/bmad-skill-manifest.yaml +13 -0
- package/src/bmad-plus/agents/agent-maker/SKILL.md +201 -0
- package/src/bmad-plus/agents/agent-maker/bmad-skill-manifest.yaml +13 -0
- package/src/bmad-plus/agents/agent-orchestrator/SKILL.md +137 -0
- package/src/bmad-plus/agents/agent-orchestrator/bmad-skill-manifest.yaml +13 -0
- package/src/bmad-plus/agents/agent-quality/SKILL.md +83 -0
- package/src/bmad-plus/agents/agent-quality/bmad-skill-manifest.yaml +13 -0
- package/src/bmad-plus/agents/agent-shadow/SKILL.md +71 -0
- package/src/bmad-plus/agents/agent-shadow/bmad-skill-manifest.yaml +13 -0
- package/src/bmad-plus/agents/agent-strategist/SKILL.md +80 -0
- package/src/bmad-plus/agents/agent-strategist/bmad-skill-manifest.yaml +13 -0
- package/src/bmad-plus/data/role-triggers.yaml +209 -0
- package/src/bmad-plus/module-help.csv +10 -0
- package/src/bmad-plus/module.yaml +174 -0
- package/src/bmad-plus/skills/bmad-plus-autopilot/SKILL.md +99 -0
- package/src/bmad-plus/skills/bmad-plus-parallel/SKILL.md +93 -0
- package/src/bmad-plus/skills/bmad-plus-sync/SKILL.md +69 -0
- package/tools/bmad-plus-npx.js +33 -0
- package/tools/cli/bmad-plus-cli.js +50 -0
- package/tools/cli/commands/install.js +437 -0
- package/tools/cli/commands/uninstall.js +70 -0
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
"""Shared HTTP utilities for OSINT scripts — stdlib only, zero dependencies."""
|
|
2
|
+
|
|
3
|
+
import http.client
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import ssl
|
|
7
|
+
import sys
|
|
8
|
+
from urllib.parse import urlparse, urlencode, quote
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def https_request(method, url, headers=None, body=None, timeout=120):
|
|
12
|
+
"""Make an HTTPS request and return (status, headers, body_str).
|
|
13
|
+
|
|
14
|
+
Handles both http and https URLs.
|
|
15
|
+
"""
|
|
16
|
+
parsed = urlparse(url)
|
|
17
|
+
scheme = parsed.scheme or "https"
|
|
18
|
+
host = parsed.hostname
|
|
19
|
+
port = parsed.port or (443 if scheme == "https" else 80)
|
|
20
|
+
path = parsed.path or "/"
|
|
21
|
+
if parsed.query:
|
|
22
|
+
path += "?" + parsed.query
|
|
23
|
+
|
|
24
|
+
if scheme == "https":
|
|
25
|
+
ctx = ssl.create_default_context()
|
|
26
|
+
conn = http.client.HTTPSConnection(host, port, timeout=timeout, context=ctx)
|
|
27
|
+
else:
|
|
28
|
+
conn = http.client.HTTPConnection(host, port, timeout=timeout)
|
|
29
|
+
|
|
30
|
+
hdrs = {"User-Agent": "osint-skill/3.2-python"}
|
|
31
|
+
if headers:
|
|
32
|
+
hdrs.update(headers)
|
|
33
|
+
|
|
34
|
+
conn.request(method, path, body=body, headers=hdrs)
|
|
35
|
+
resp = conn.getresponse()
|
|
36
|
+
data = resp.read().decode("utf-8", errors="replace")
|
|
37
|
+
status = resp.status
|
|
38
|
+
resp_headers = dict(resp.getheaders())
|
|
39
|
+
conn.close()
|
|
40
|
+
return status, resp_headers, data
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def api_post(url, payload, headers=None, timeout=120):
|
|
44
|
+
"""POST JSON payload and return parsed JSON response."""
|
|
45
|
+
hdrs = {"Content-Type": "application/json"}
|
|
46
|
+
if headers:
|
|
47
|
+
hdrs.update(headers)
|
|
48
|
+
body = json.dumps(payload) if isinstance(payload, dict) else payload
|
|
49
|
+
status, _, data = https_request("POST", url, headers=hdrs, body=body, timeout=timeout)
|
|
50
|
+
if status >= 400:
|
|
51
|
+
print(f"ERROR: HTTP {status}: {data[:300]}", file=sys.stderr)
|
|
52
|
+
return None
|
|
53
|
+
try:
|
|
54
|
+
return json.loads(data)
|
|
55
|
+
except json.JSONDecodeError:
|
|
56
|
+
print(f"ERROR: Invalid JSON response: {data[:300]}", file=sys.stderr)
|
|
57
|
+
return None
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def api_get(url, headers=None, timeout=120):
|
|
61
|
+
"""GET request and return parsed JSON response."""
|
|
62
|
+
status, _, data = https_request("GET", url, headers=headers, timeout=timeout)
|
|
63
|
+
if status >= 400:
|
|
64
|
+
print(f"ERROR: HTTP {status}: {data[:300]}", file=sys.stderr)
|
|
65
|
+
return None
|
|
66
|
+
try:
|
|
67
|
+
return json.loads(data)
|
|
68
|
+
except json.JSONDecodeError:
|
|
69
|
+
# Return raw text if not JSON
|
|
70
|
+
return {"raw": data[:5000]}
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def get_key(env_var, file_fallback=None, required=True, help_url=""):
|
|
74
|
+
"""Load API key from environment or fallback file."""
|
|
75
|
+
val = os.environ.get(env_var, "")
|
|
76
|
+
if val:
|
|
77
|
+
return val
|
|
78
|
+
if file_fallback and os.path.isfile(file_fallback):
|
|
79
|
+
with open(file_fallback, "r") as f:
|
|
80
|
+
return f.readline().strip()
|
|
81
|
+
if required:
|
|
82
|
+
print(f"ERROR: {env_var} not set.", file=sys.stderr)
|
|
83
|
+
if help_url:
|
|
84
|
+
print(f"Get one at: {help_url}", file=sys.stderr)
|
|
85
|
+
sys.exit(1)
|
|
86
|
+
return ""
|
|
87
|
+
|
|
88
|
+
|
|
89
|
+
def get_workspace():
|
|
90
|
+
"""Get workspace root (2 levels up from scripts dir)."""
|
|
91
|
+
scripts_dir = os.path.dirname(os.path.abspath(__file__))
|
|
92
|
+
skill_dir = os.path.dirname(scripts_dir)
|
|
93
|
+
workspace = os.path.dirname(os.path.dirname(skill_dir))
|
|
94
|
+
return workspace, skill_dir, scripts_dir
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def truncate(text, max_len=200):
|
|
98
|
+
"""Truncate text with ellipsis."""
|
|
99
|
+
if not text:
|
|
100
|
+
return ""
|
|
101
|
+
return text[:max_len] + "..." if len(text) > max_len else text
|
|
@@ -0,0 +1,260 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Apify API — universal actor runner + LinkedIn/Instagram shortcuts.
|
|
3
|
+
|
|
4
|
+
Replaces: apify.sh, run-actor.sh, run_actor.js
|
|
5
|
+
Stdlib only, zero dependencies.
|
|
6
|
+
|
|
7
|
+
Usage:
|
|
8
|
+
python apify.py run <actor_id> '<json_input>' [--output file] [--format csv|json]
|
|
9
|
+
python apify.py linkedin <profile_url>
|
|
10
|
+
python apify.py instagram <username>
|
|
11
|
+
python apify.py results <run_id>
|
|
12
|
+
python apify.py status <run_id>
|
|
13
|
+
python apify.py store-search <query>
|
|
14
|
+
"""
|
|
15
|
+
|
|
16
|
+
import json
|
|
17
|
+
import os
|
|
18
|
+
import sys
|
|
19
|
+
import time
|
|
20
|
+
|
|
21
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
22
|
+
from _http import api_post, api_get, get_key, get_workspace
|
|
23
|
+
|
|
24
|
+
BASE = "https://api.apify.com/v2"
|
|
25
|
+
USER_AGENT = "osint-skill/3.2-python"
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
def init():
|
|
29
|
+
workspace, _, _ = get_workspace()
|
|
30
|
+
token = os.environ.get("APIFY_TOKEN") or os.environ.get("APIFY_API_TOKEN")
|
|
31
|
+
if not token:
|
|
32
|
+
fallback = os.path.join(workspace, "scripts", "apify-api-token.txt")
|
|
33
|
+
if os.path.isfile(fallback):
|
|
34
|
+
with open(fallback) as f:
|
|
35
|
+
token = f.readline().strip()
|
|
36
|
+
if not token:
|
|
37
|
+
print("ERROR: No Apify token found.", file=sys.stderr)
|
|
38
|
+
print("Set APIFY_API_TOKEN env var, or put token in scripts/apify-api-token.txt", file=sys.stderr)
|
|
39
|
+
print("Get one at: https://console.apify.com/account/integrations", file=sys.stderr)
|
|
40
|
+
sys.exit(1)
|
|
41
|
+
return token
|
|
42
|
+
|
|
43
|
+
|
|
44
|
+
def start_actor(token, actor_id, input_json):
|
|
45
|
+
"""Start an Apify actor and return (run_id, dataset_id)."""
|
|
46
|
+
api_actor = actor_id.replace("/", "~")
|
|
47
|
+
url = f"{BASE}/acts/{api_actor}/runs?token={token}"
|
|
48
|
+
try:
|
|
49
|
+
input_data = json.loads(input_json) if isinstance(input_json, str) else input_json
|
|
50
|
+
except json.JSONDecodeError as e:
|
|
51
|
+
print(f"ERROR: Invalid JSON input: {e}", file=sys.stderr)
|
|
52
|
+
sys.exit(1)
|
|
53
|
+
data = api_post(url, input_data)
|
|
54
|
+
if not data:
|
|
55
|
+
sys.exit(1)
|
|
56
|
+
run_data = data.get("data", data)
|
|
57
|
+
return run_data.get("id"), run_data.get("defaultDatasetId")
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def poll_until_complete(token, run_id, timeout=600, interval=5):
|
|
61
|
+
"""Poll actor run until complete. Returns final status."""
|
|
62
|
+
url = f"{BASE}/actor-runs/{run_id}?token={token}"
|
|
63
|
+
start_time = time.time()
|
|
64
|
+
last_status = None
|
|
65
|
+
|
|
66
|
+
while True:
|
|
67
|
+
data = api_get(url)
|
|
68
|
+
if not data:
|
|
69
|
+
print("ERROR: Failed to get run status", file=sys.stderr)
|
|
70
|
+
sys.exit(1)
|
|
71
|
+
status = data.get("data", data).get("status", "UNKNOWN")
|
|
72
|
+
if status != last_status:
|
|
73
|
+
print(f"Status: {status}")
|
|
74
|
+
last_status = status
|
|
75
|
+
if status in ("SUCCEEDED", "FAILED", "ABORTED", "TIMED-OUT"):
|
|
76
|
+
return status
|
|
77
|
+
elapsed = time.time() - start_time
|
|
78
|
+
if elapsed > timeout:
|
|
79
|
+
print(f"WARNING: Timeout after {timeout}s, actor still running")
|
|
80
|
+
return "TIMED-OUT"
|
|
81
|
+
time.sleep(interval)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
def download_results(token, dataset_id, output_path=None, fmt="json"):
|
|
85
|
+
"""Download actor results. If no output_path, display top 5."""
|
|
86
|
+
url = f"{BASE}/datasets/{dataset_id}/items?token={token}&format=json"
|
|
87
|
+
data = api_get(url)
|
|
88
|
+
if not data:
|
|
89
|
+
return
|
|
90
|
+
items = data if isinstance(data, list) else [data]
|
|
91
|
+
|
|
92
|
+
if output_path:
|
|
93
|
+
if fmt == "json":
|
|
94
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
|
95
|
+
json.dump(items, f, indent=2, ensure_ascii=False)
|
|
96
|
+
else:
|
|
97
|
+
# CSV
|
|
98
|
+
if items:
|
|
99
|
+
fields = list(items[0].keys())
|
|
100
|
+
lines = [",".join(fields)]
|
|
101
|
+
for row in items:
|
|
102
|
+
vals = []
|
|
103
|
+
for k in fields:
|
|
104
|
+
v = row.get(k, "")
|
|
105
|
+
if isinstance(v, str) and len(v) > 200:
|
|
106
|
+
v = v[:200] + "..."
|
|
107
|
+
elif isinstance(v, (list, dict)):
|
|
108
|
+
v = json.dumps(v, ensure_ascii=False)
|
|
109
|
+
if v is None:
|
|
110
|
+
v = ""
|
|
111
|
+
v = str(v)
|
|
112
|
+
if "," in v or '"' in v or "\n" in v:
|
|
113
|
+
v = f'"{v.replace(chr(34), chr(34)+chr(34))}"'
|
|
114
|
+
vals.append(v)
|
|
115
|
+
lines.append(",".join(vals))
|
|
116
|
+
with open(output_path, "w", encoding="utf-8") as f:
|
|
117
|
+
f.write("\n".join(lines))
|
|
118
|
+
print(f"Saved to: {output_path}")
|
|
119
|
+
print(f"Records: {len(items)}")
|
|
120
|
+
else:
|
|
121
|
+
# Display top 5
|
|
122
|
+
total = len(items)
|
|
123
|
+
if total == 0:
|
|
124
|
+
print("\nNo results found.")
|
|
125
|
+
return
|
|
126
|
+
print(f"\n{'='*60}")
|
|
127
|
+
print(f"TOP 5 RESULTS (of {total} total)")
|
|
128
|
+
print("=" * 60)
|
|
129
|
+
for i, item in enumerate(items[:5]):
|
|
130
|
+
print(f"\n--- Result {i+1} ---")
|
|
131
|
+
for key, value in item.items():
|
|
132
|
+
display = value
|
|
133
|
+
if isinstance(value, str) and len(value) > 100:
|
|
134
|
+
display = value[:100] + "..."
|
|
135
|
+
elif isinstance(value, (list, dict)):
|
|
136
|
+
s = json.dumps(value, ensure_ascii=False)
|
|
137
|
+
display = s[:100] + "..." if len(s) > 100 else s
|
|
138
|
+
print(f" {key}: {display}")
|
|
139
|
+
print(f"\n{'='*60}")
|
|
140
|
+
if total > 5:
|
|
141
|
+
print(f"Showing 5 of {total} results.")
|
|
142
|
+
print(f"Full data: https://console.apify.com/storage/datasets/{dataset_id}")
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def run_actor(token, actor_id, input_json, output=None, fmt="json", timeout=600):
|
|
146
|
+
"""Full pipeline: start → poll → download."""
|
|
147
|
+
print(f"Starting actor: {actor_id}")
|
|
148
|
+
run_id, dataset_id = start_actor(token, actor_id, input_json)
|
|
149
|
+
print(f"Run ID: {run_id}")
|
|
150
|
+
print(f"Dataset ID: {dataset_id}")
|
|
151
|
+
status = poll_until_complete(token, run_id, timeout=timeout)
|
|
152
|
+
if status != "SUCCEEDED":
|
|
153
|
+
print(f"ERROR: Actor run {status}", file=sys.stderr)
|
|
154
|
+
print(f"Details: https://console.apify.com/actors/runs/{run_id}", file=sys.stderr)
|
|
155
|
+
sys.exit(1)
|
|
156
|
+
download_results(token, dataset_id, output, fmt)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def get_results(token, run_id):
|
|
160
|
+
"""Get results from a completed run."""
|
|
161
|
+
data = api_get(f"{BASE}/actor-runs/{run_id}/dataset/items?token={token}")
|
|
162
|
+
if data:
|
|
163
|
+
print(json.dumps(data, indent=2, ensure_ascii=False)[:5000])
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def get_status(token, run_id):
|
|
167
|
+
"""Check run status."""
|
|
168
|
+
data = api_get(f"{BASE}/actor-runs/{run_id}?token={token}")
|
|
169
|
+
if data:
|
|
170
|
+
print(json.dumps(data, indent=2, ensure_ascii=False)[:2000])
|
|
171
|
+
|
|
172
|
+
|
|
173
|
+
def store_search(token, query):
|
|
174
|
+
"""Search the Apify actor store."""
|
|
175
|
+
from urllib.parse import quote
|
|
176
|
+
data = api_get(f"{BASE}/store?token={token}&limit=10&search={quote(query)}")
|
|
177
|
+
if data:
|
|
178
|
+
items = data.get("data", {}).get("items", data if isinstance(data, list) else [])
|
|
179
|
+
for item in items[:10]:
|
|
180
|
+
print(f'📦 {item.get("title", item.get("name", ""))}')
|
|
181
|
+
print(f' ID: {item.get("id", item.get("actorId", ""))}')
|
|
182
|
+
desc = item.get("description", "")[:150]
|
|
183
|
+
if desc:
|
|
184
|
+
print(f" {desc}")
|
|
185
|
+
print()
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def main():
|
|
189
|
+
if len(sys.argv) < 2:
|
|
190
|
+
print(__doc__)
|
|
191
|
+
sys.exit(1)
|
|
192
|
+
token = init()
|
|
193
|
+
cmd = sys.argv[1]
|
|
194
|
+
|
|
195
|
+
if cmd == "run":
|
|
196
|
+
if len(sys.argv) < 4:
|
|
197
|
+
print("Usage: apify.py run <actor_id> '<json_input>' [--output file] [--format csv|json]")
|
|
198
|
+
sys.exit(1)
|
|
199
|
+
actor_id = sys.argv[2]
|
|
200
|
+
input_json = sys.argv[3]
|
|
201
|
+
# Parse optional args
|
|
202
|
+
output = None
|
|
203
|
+
fmt = "json"
|
|
204
|
+
i = 4
|
|
205
|
+
while i < len(sys.argv):
|
|
206
|
+
if sys.argv[i] == "--output" and i + 1 < len(sys.argv):
|
|
207
|
+
output = sys.argv[i + 1]
|
|
208
|
+
i += 2
|
|
209
|
+
elif sys.argv[i] == "--format" and i + 1 < len(sys.argv):
|
|
210
|
+
fmt = sys.argv[i + 1]
|
|
211
|
+
i += 2
|
|
212
|
+
else:
|
|
213
|
+
i += 1
|
|
214
|
+
run_actor(token, actor_id, input_json, output, fmt)
|
|
215
|
+
|
|
216
|
+
elif cmd == "linkedin":
|
|
217
|
+
url = sys.argv[2] if len(sys.argv) > 2 else ""
|
|
218
|
+
if not url:
|
|
219
|
+
print("Usage: apify.py linkedin <profile_url>", file=sys.stderr)
|
|
220
|
+
sys.exit(1)
|
|
221
|
+
run_actor(token, "supreme_coder/linkedin-profile-scraper",
|
|
222
|
+
json.dumps({"urls": [{"url": url}]}))
|
|
223
|
+
|
|
224
|
+
elif cmd == "instagram":
|
|
225
|
+
username = sys.argv[2] if len(sys.argv) > 2 else ""
|
|
226
|
+
if not username:
|
|
227
|
+
print("Usage: apify.py instagram <username>", file=sys.stderr)
|
|
228
|
+
sys.exit(1)
|
|
229
|
+
run_actor(token, "apify/instagram-profile-scraper",
|
|
230
|
+
json.dumps({"usernames": [username]}))
|
|
231
|
+
|
|
232
|
+
elif cmd == "results":
|
|
233
|
+
run_id = sys.argv[2] if len(sys.argv) > 2 else ""
|
|
234
|
+
if not run_id:
|
|
235
|
+
print("Usage: apify.py results <run_id>", file=sys.stderr)
|
|
236
|
+
sys.exit(1)
|
|
237
|
+
get_results(token, run_id)
|
|
238
|
+
|
|
239
|
+
elif cmd == "status":
|
|
240
|
+
run_id = sys.argv[2] if len(sys.argv) > 2 else ""
|
|
241
|
+
if not run_id:
|
|
242
|
+
print("Usage: apify.py status <run_id>", file=sys.stderr)
|
|
243
|
+
sys.exit(1)
|
|
244
|
+
get_status(token, run_id)
|
|
245
|
+
|
|
246
|
+
elif cmd == "store-search":
|
|
247
|
+
query = " ".join(sys.argv[2:])
|
|
248
|
+
if not query:
|
|
249
|
+
print("Usage: apify.py store-search <query>", file=sys.stderr)
|
|
250
|
+
sys.exit(1)
|
|
251
|
+
store_search(token, query)
|
|
252
|
+
|
|
253
|
+
else:
|
|
254
|
+
print(f"Unknown command: {cmd}", file=sys.stderr)
|
|
255
|
+
print(__doc__)
|
|
256
|
+
sys.exit(1)
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
if __name__ == "__main__":
|
|
260
|
+
main()
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Bright Data MCP wrapper — scrape, search, search-geo, search-yandex. Stdlib only.
|
|
3
|
+
|
|
4
|
+
Uses mcp-client.py for MCP JSON-RPC calls.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
import json
|
|
8
|
+
import os
|
|
9
|
+
import subprocess
|
|
10
|
+
import sys
|
|
11
|
+
|
|
12
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
13
|
+
from _http import get_key, get_workspace
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def init():
|
|
17
|
+
workspace, _, _ = get_workspace()
|
|
18
|
+
return get_key("BRIGHTDATA_MCP_URL",
|
|
19
|
+
file_fallback=os.path.join(workspace, "scripts", "brightdata-mcp-url.txt"),
|
|
20
|
+
help_url="https://brightdata.com/products/web-scraper/mcp")
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def mcp_call(mcp_url, tool_name, arguments):
|
|
24
|
+
"""Call MCP tool via mcp-client.py."""
|
|
25
|
+
scripts_dir = os.path.dirname(os.path.abspath(__file__))
|
|
26
|
+
mcp_client = os.path.join(scripts_dir, "mcp-client.py")
|
|
27
|
+
args_json = json.dumps(arguments)
|
|
28
|
+
result = subprocess.run(
|
|
29
|
+
[sys.executable, mcp_client, mcp_url, tool_name, args_json],
|
|
30
|
+
capture_output=True, text=True, timeout=120
|
|
31
|
+
)
|
|
32
|
+
if result.stdout:
|
|
33
|
+
print(result.stdout)
|
|
34
|
+
if result.stderr:
|
|
35
|
+
print(result.stderr, file=sys.stderr)
|
|
36
|
+
return result.returncode
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
def list_tools(mcp_url):
|
|
40
|
+
"""List available MCP tools."""
|
|
41
|
+
scripts_dir = os.path.dirname(os.path.abspath(__file__))
|
|
42
|
+
mcp_client = os.path.join(scripts_dir, "mcp-client.py")
|
|
43
|
+
result = subprocess.run(
|
|
44
|
+
[sys.executable, mcp_client, mcp_url, "--list-tools"],
|
|
45
|
+
capture_output=True, text=True, timeout=30
|
|
46
|
+
)
|
|
47
|
+
if result.stdout:
|
|
48
|
+
print(result.stdout)
|
|
49
|
+
if result.stderr:
|
|
50
|
+
print(result.stderr, file=sys.stderr)
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def main():
|
|
54
|
+
if len(sys.argv) < 2:
|
|
55
|
+
print("Usage: brightdata.py {tools|scrape|scrape-batch|search|search-geo|search-yandex} <args>")
|
|
56
|
+
print()
|
|
57
|
+
print(" tools - list available MCP tools")
|
|
58
|
+
print(" scrape <url> - any URL → markdown (bypasses CAPTCHA)")
|
|
59
|
+
print(" scrape-batch <url1> <url2> - batch scrape up to 10 URLs")
|
|
60
|
+
print(" search <query> - Google search via Bright Data")
|
|
61
|
+
print(" search-geo <cc> <query> - geo-targeted search")
|
|
62
|
+
print(" search-yandex <query> - Yandex search")
|
|
63
|
+
sys.exit(1)
|
|
64
|
+
|
|
65
|
+
mcp_url = init()
|
|
66
|
+
cmd = sys.argv[1]
|
|
67
|
+
|
|
68
|
+
if cmd == "tools":
|
|
69
|
+
list_tools(mcp_url)
|
|
70
|
+
elif cmd == "scrape":
|
|
71
|
+
url = sys.argv[2] if len(sys.argv) > 2 else ""
|
|
72
|
+
if not url:
|
|
73
|
+
print("Usage: brightdata.py scrape <url>", file=sys.stderr)
|
|
74
|
+
sys.exit(1)
|
|
75
|
+
mcp_call(mcp_url, "scrape_as_markdown", {"url": url})
|
|
76
|
+
elif cmd == "scrape-batch":
|
|
77
|
+
urls = sys.argv[2:]
|
|
78
|
+
if not urls:
|
|
79
|
+
print("Usage: brightdata.py scrape-batch <url1> <url2>...", file=sys.stderr)
|
|
80
|
+
sys.exit(1)
|
|
81
|
+
mcp_call(mcp_url, "scrape_as_markdown_batch", {"urls": urls})
|
|
82
|
+
elif cmd == "search":
|
|
83
|
+
query = " ".join(sys.argv[2:])
|
|
84
|
+
mcp_call(mcp_url, "web_data_search_engine", {"query": query})
|
|
85
|
+
elif cmd == "search-geo":
|
|
86
|
+
if len(sys.argv) < 4:
|
|
87
|
+
print("Usage: brightdata.py search-geo <country_code> <query>", file=sys.stderr)
|
|
88
|
+
sys.exit(1)
|
|
89
|
+
geo = sys.argv[2]
|
|
90
|
+
query = " ".join(sys.argv[3:])
|
|
91
|
+
mcp_call(mcp_url, "web_data_search_engine", {"query": query, "country": geo})
|
|
92
|
+
elif cmd == "search-yandex":
|
|
93
|
+
query = " ".join(sys.argv[2:])
|
|
94
|
+
mcp_call(mcp_url, "web_data_search_engine", {"query": query, "engine": "yandex"})
|
|
95
|
+
else:
|
|
96
|
+
print(f"Unknown: {cmd}", file=sys.stderr)
|
|
97
|
+
sys.exit(1)
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
if __name__ == "__main__":
|
|
101
|
+
main()
|
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""OSINT Toolkit Diagnostic — checks available API keys, CLI tools, and capabilities."""
|
|
3
|
+
|
|
4
|
+
import os
|
|
5
|
+
import shutil
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
9
|
+
from _http import get_workspace
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def check_key(env_var, file_path=None):
|
|
13
|
+
"""Check if API key is available via env or file."""
|
|
14
|
+
if os.environ.get(env_var):
|
|
15
|
+
return True, f"(env)"
|
|
16
|
+
if file_path and os.path.isfile(file_path):
|
|
17
|
+
return True, f"(file: {os.path.basename(file_path)})"
|
|
18
|
+
return False, ""
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def main():
|
|
22
|
+
workspace, skill_dir, scripts_dir = get_workspace()
|
|
23
|
+
|
|
24
|
+
print("=== OSINT TOOLKIT DIAGNOSTIC ===")
|
|
25
|
+
print()
|
|
26
|
+
|
|
27
|
+
# 1. API Tokens
|
|
28
|
+
print("📡 API Tokens:")
|
|
29
|
+
keys = [
|
|
30
|
+
("APIFY_API_TOKEN", os.path.join(workspace, "scripts", "apify-api-token.txt"),
|
|
31
|
+
"https://console.apify.com/account/integrations"),
|
|
32
|
+
("JINA_API_KEY", os.path.join(workspace, "scripts", "jina-api-key.txt"),
|
|
33
|
+
"https://jina.ai/api-key"),
|
|
34
|
+
("PERPLEXITY_API_KEY", None, "https://perplexity.ai/settings/api"),
|
|
35
|
+
("PARALLEL_API_KEY", os.path.join(workspace, "scripts", "parallel-api-key.txt"),
|
|
36
|
+
"https://platform.parallel.ai"),
|
|
37
|
+
("EXA_API_KEY", None, "https://dashboard.exa.ai"),
|
|
38
|
+
("TAVILY_API_KEY", None, "https://app.tavily.com/home"),
|
|
39
|
+
("BRIGHTDATA_MCP_URL", os.path.join(workspace, "scripts", "brightdata-mcp-url.txt"),
|
|
40
|
+
"https://brightdata.com/products/web-scraper/mcp"),
|
|
41
|
+
]
|
|
42
|
+
for env_var, file_path, url in keys:
|
|
43
|
+
found, source = check_key(env_var, file_path)
|
|
44
|
+
if found:
|
|
45
|
+
print(f" ✅ {env_var} {source}")
|
|
46
|
+
else:
|
|
47
|
+
print(f" ❌ {env_var} — get one at {url}")
|
|
48
|
+
print()
|
|
49
|
+
|
|
50
|
+
# 2. CLI Tools
|
|
51
|
+
print("🔧 CLI Tools:")
|
|
52
|
+
tools = [
|
|
53
|
+
("python3", "python"),
|
|
54
|
+
("node", None),
|
|
55
|
+
("jq", None),
|
|
56
|
+
("curl", None),
|
|
57
|
+
("git", None),
|
|
58
|
+
]
|
|
59
|
+
for tool_name, alt_name in tools:
|
|
60
|
+
path = shutil.which(tool_name) or (shutil.which(alt_name) if alt_name else None)
|
|
61
|
+
if path:
|
|
62
|
+
print(f" ✅ {tool_name}")
|
|
63
|
+
else:
|
|
64
|
+
print(f" ❌ {tool_name}")
|
|
65
|
+
|
|
66
|
+
# Check run_actor.js
|
|
67
|
+
run_actor_js = os.path.join(scripts_dir, "run_actor.js")
|
|
68
|
+
if os.path.isfile(run_actor_js):
|
|
69
|
+
print(f" ✅ run_actor.js (embedded, 55+ actors)")
|
|
70
|
+
else:
|
|
71
|
+
print(f" ❌ run_actor.js (missing)")
|
|
72
|
+
|
|
73
|
+
# Check Python scripts
|
|
74
|
+
py_scripts = ["perplexity.py", "tavily.py", "exa.py", "jina.py",
|
|
75
|
+
"parallel.py", "apify.py", "brightdata.py", "volley.py"]
|
|
76
|
+
for script in py_scripts:
|
|
77
|
+
path = os.path.join(scripts_dir, script)
|
|
78
|
+
if os.path.isfile(path):
|
|
79
|
+
print(f" ✅ {script}")
|
|
80
|
+
else:
|
|
81
|
+
print(f" ⚠️ {script} (not found)")
|
|
82
|
+
print()
|
|
83
|
+
|
|
84
|
+
# 3. Internal Intelligence
|
|
85
|
+
print("📱 Internal Intelligence:")
|
|
86
|
+
tg_path = os.path.join(workspace, "skills", "telegram", "scripts", "tg.py")
|
|
87
|
+
if os.path.isfile(tg_path):
|
|
88
|
+
print(" ✅ tg.py (Telegram history/search)")
|
|
89
|
+
else:
|
|
90
|
+
print(" ❌ tg.py (no Telegram access)")
|
|
91
|
+
|
|
92
|
+
himalaya = shutil.which("himalaya")
|
|
93
|
+
himalaya_local = os.path.expanduser("~/.local/bin/himalaya")
|
|
94
|
+
if himalaya or os.path.isfile(himalaya_local):
|
|
95
|
+
print(" ✅ himalaya (email search)")
|
|
96
|
+
else:
|
|
97
|
+
print(" ❌ himalaya (no email access)")
|
|
98
|
+
|
|
99
|
+
vault_crm = os.path.join(workspace, "vault", "crm")
|
|
100
|
+
if os.path.isdir(vault_crm):
|
|
101
|
+
count = sum(1 for f in os.listdir(vault_crm) if f.endswith(".md"))
|
|
102
|
+
print(f" ✅ vault/crm ({count} cards)")
|
|
103
|
+
else:
|
|
104
|
+
print(" ❌ vault/crm (no CRM vault)")
|
|
105
|
+
print()
|
|
106
|
+
|
|
107
|
+
# 4. Capability Summary
|
|
108
|
+
print("📊 Capabilities:")
|
|
109
|
+
apify_ok = check_key("APIFY_API_TOKEN", os.path.join(workspace, "scripts", "apify-api-token.txt"))[0]
|
|
110
|
+
bright_ok = check_key("BRIGHTDATA_MCP_URL", os.path.join(workspace, "scripts", "brightdata-mcp-url.txt"))[0]
|
|
111
|
+
jina_ok = check_key("JINA_API_KEY", os.path.join(workspace, "scripts", "jina-api-key.txt"))[0]
|
|
112
|
+
perp_ok = check_key("PERPLEXITY_API_KEY")[0]
|
|
113
|
+
tavily_ok = check_key("TAVILY_API_KEY")[0]
|
|
114
|
+
exa_ok = check_key("EXA_API_KEY")[0]
|
|
115
|
+
parallel_ok = check_key("PARALLEL_API_KEY", os.path.join(workspace, "scripts", "parallel-api-key.txt"))[0]
|
|
116
|
+
|
|
117
|
+
caps = [
|
|
118
|
+
(apify_ok, "LinkedIn scraping (Apify)"),
|
|
119
|
+
(apify_ok, "Instagram scraping (Apify)"),
|
|
120
|
+
(apify_ok, "TikTok scraping (Apify)"),
|
|
121
|
+
(apify_ok, "YouTube scraping (Apify)"),
|
|
122
|
+
(apify_ok, "Contact enrichment (Apify)"),
|
|
123
|
+
(apify_ok, "Google Maps (Apify)"),
|
|
124
|
+
(bright_ok, "Facebook scraping (Bright Data)"),
|
|
125
|
+
(bright_ok, "CAPTCHA bypass (Bright Data)"),
|
|
126
|
+
(jina_ok, "Deep search (Jina)"),
|
|
127
|
+
(perp_ok, "Quick answers (Perplexity Sonar)"),
|
|
128
|
+
(perp_ok, "Deep research (Perplexity Deep)"),
|
|
129
|
+
(tavily_ok, "Agent search (Tavily)"),
|
|
130
|
+
(exa_ok, "Semantic search (Exa)"),
|
|
131
|
+
(exa_ok, "People/Company search (Exa)"),
|
|
132
|
+
(parallel_ok, "AI search (Parallel)"),
|
|
133
|
+
]
|
|
134
|
+
for available, name in caps:
|
|
135
|
+
print(f" {'✅' if available else '❌'} {name}")
|
|
136
|
+
print()
|
|
137
|
+
print("=== END DIAGNOSTIC ===")
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
if __name__ == "__main__":
|
|
141
|
+
main()
|
|
@@ -0,0 +1,79 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
"""Exa AI — semantic search, company research, people search, crawl. Stdlib only."""
|
|
3
|
+
|
|
4
|
+
import json
|
|
5
|
+
import os
|
|
6
|
+
import sys
|
|
7
|
+
|
|
8
|
+
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
|
|
9
|
+
from _http import api_post, get_key, truncate
|
|
10
|
+
|
|
11
|
+
BASE = "https://api.exa.ai"
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def init():
|
|
15
|
+
return get_key("EXA_API_KEY", help_url="https://dashboard.exa.ai")
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def search(api_key, query, category=None, max_chars=500, icon="🔗"):
|
|
19
|
+
"""Exa semantic search."""
|
|
20
|
+
payload = {
|
|
21
|
+
"query": query,
|
|
22
|
+
"type": "auto",
|
|
23
|
+
"numResults": 10,
|
|
24
|
+
"contents": {"text": {"maxCharacters": max_chars}},
|
|
25
|
+
}
|
|
26
|
+
if category:
|
|
27
|
+
payload["category"] = category
|
|
28
|
+
data = api_post(f"{BASE}/search", payload,
|
|
29
|
+
headers={"x-api-key": api_key})
|
|
30
|
+
if not data:
|
|
31
|
+
return
|
|
32
|
+
for r in data.get("results", [])[:10]:
|
|
33
|
+
print(f'{icon} {r.get("title", "")}')
|
|
34
|
+
print(f' {r.get("url", "")}')
|
|
35
|
+
txt = truncate(r.get("text", ""), max_chars)
|
|
36
|
+
if txt:
|
|
37
|
+
print(f" {txt}")
|
|
38
|
+
print()
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
def crawl(api_key, url):
|
|
42
|
+
"""Crawl a URL and extract content."""
|
|
43
|
+
data = api_post(f"{BASE}/contents",
|
|
44
|
+
{"urls": [url], "text": {"maxCharacters": 5000}},
|
|
45
|
+
headers={"x-api-key": api_key})
|
|
46
|
+
if not data:
|
|
47
|
+
return
|
|
48
|
+
for r in data.get("results", []):
|
|
49
|
+
print(f'📄 {r.get("title", "")}')
|
|
50
|
+
print(f' {r.get("url", "")}')
|
|
51
|
+
print(r.get("text", "")[:3000])
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
def main():
|
|
55
|
+
if len(sys.argv) < 3:
|
|
56
|
+
print("Usage: exa.py search|company|people|crawl|deep <query>")
|
|
57
|
+
sys.exit(1)
|
|
58
|
+
api_key = init()
|
|
59
|
+
cmd = sys.argv[1]
|
|
60
|
+
query = " ".join(sys.argv[2:])
|
|
61
|
+
if cmd == "search":
|
|
62
|
+
search(api_key, query)
|
|
63
|
+
elif cmd == "company":
|
|
64
|
+
search(api_key, f"{query} company information about",
|
|
65
|
+
category="company", max_chars=1000, icon="🏢")
|
|
66
|
+
elif cmd == "people":
|
|
67
|
+
search(api_key, query, category="personal site", icon="👤")
|
|
68
|
+
elif cmd == "crawl":
|
|
69
|
+
crawl(api_key, query)
|
|
70
|
+
elif cmd == "deep":
|
|
71
|
+
print("🔬 Exa Deep Research — use MCP or dashboard")
|
|
72
|
+
print(" MCP: https://mcp.exa.ai/mcp?tools=deep_researcher_start,deep_researcher_check")
|
|
73
|
+
else:
|
|
74
|
+
print(f"Unknown: {cmd} (use search|company|people|crawl|deep)", file=sys.stderr)
|
|
75
|
+
sys.exit(1)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
if __name__ == "__main__":
|
|
79
|
+
main()
|