@agentikos/omega-os 0.19.23 → 0.19.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/bootstrap/lib/steps.sh +9 -0
- package/omega/Agentik_Engine/omega_engine/__init__.py +1 -1
- package/omega/Agentik_Engine/omega_engine/__pycache__/__init__.cpython-313.pyc +0 -0
- package/omega/Agentik_Engine/omega_engine/__pycache__/cli.cpython-313.pyc +0 -0
- package/omega/Agentik_Engine/omega_engine/cli.py +76 -22
- package/omega/Agentik_Engine/pyproject.toml +1 -1
- package/omega/Agentik_SSOT/VERSION +1 -1
- package/omega/Agentik_SSOT/clis/clis-catalog.yaml +13 -0
- package/package.json +1 -1
package/bootstrap/lib/steps.sh
CHANGED
|
@@ -564,6 +564,15 @@ for entry in missing:
|
|
|
564
564
|
if shutil.which(binary):
|
|
565
565
|
print(f" ok {cli_id:<14} (installed via {attempt})")
|
|
566
566
|
installed_now.append(cli_id)
|
|
567
|
+
# Optional post-install command (e.g. `scrapling install`
|
|
568
|
+
# downloads its browser deps after pip install).
|
|
569
|
+
post = entry.get("post_install") or []
|
|
570
|
+
if post and shutil.which(post[0]):
|
|
571
|
+
rc2, _ = _run(post, timeout=600)
|
|
572
|
+
if rc2 == 0:
|
|
573
|
+
print(f" post-install: {' '.join(post)} ok")
|
|
574
|
+
else:
|
|
575
|
+
print(f" post-install: {' '.join(post)} rc={rc2} (non-fatal)")
|
|
567
576
|
else:
|
|
568
577
|
print(f" fail {cli_id}: {attempt} returned rc={rc}")
|
|
569
578
|
failed_now.append(cli_id)
|
|
Binary file
|
|
Binary file
|
|
@@ -2966,28 +2966,74 @@ def _set_active_provider(provider_id: str) -> None:
|
|
|
2966
2966
|
|
|
2967
2967
|
|
|
2968
2968
|
def cmd_scrape(args: argparse.Namespace) -> int:
|
|
2969
|
-
"""`omega scrape <url> [--
|
|
2970
|
-
|
|
2971
|
-
|
|
2972
|
-
|
|
2973
|
-
|
|
2974
|
-
|
|
2975
|
-
|
|
2976
|
-
|
|
2977
|
-
|
|
2978
|
-
|
|
2979
|
-
|
|
2980
|
-
|
|
2981
|
-
|
|
2982
|
-
|
|
2983
|
-
|
|
2984
|
-
|
|
2969
|
+
"""`omega scrape <url> [--engine cloak|scrapling] [--out file]` — scraper.
|
|
2970
|
+
|
|
2971
|
+
Two engines available since v0.19.24:
|
|
2972
|
+
* ``cloak`` (default) — CloakBrowser, heavy stealth Chromium, 58
|
|
2973
|
+
C++ patches, passes Cloudflare/Turnstile/FingerprintJS. Slow
|
|
2974
|
+
but invincible.
|
|
2975
|
+
* ``scrapling`` — D4Vinci/Scrapling, HTTP-first with optional
|
|
2976
|
+
browser, adaptive element tracking, concurrent crawl with
|
|
2977
|
+
pause/resume. Fast for non-protected sites.
|
|
2978
|
+
|
|
2979
|
+
Output: cleaned text to stdout (or --out FILE). Wraps both engines
|
|
2980
|
+
behind a consistent CLI so any LLM (Claude/Gemini/Codex/OpenCode/
|
|
2981
|
+
Aider) can `omega scrape <url>` via Bash without caring which
|
|
2982
|
+
Python library is doing the work.
|
|
2983
|
+
|
|
2984
|
+
Examples:
|
|
2985
|
+
omega scrape https://example.com # CloakBrowser
|
|
2986
|
+
omega scrape https://example.com --engine scrapling # Scrapling
|
|
2987
|
+
omega scrape https://news.ycombinator.com --engine scrapling --css ".titleline"
|
|
2985
2988
|
"""
|
|
2986
2989
|
import shutil
|
|
2987
2990
|
import subprocess
|
|
2988
2991
|
if not args.url:
|
|
2989
|
-
print("usage: omega scrape <url>
|
|
2992
|
+
print("usage: omega scrape <url> "
|
|
2993
|
+
"[--engine cloak|scrapling] [--out FILE] [--humanize] "
|
|
2994
|
+
"[--proxy URL] [--css SELECTOR]")
|
|
2990
2995
|
return 2
|
|
2996
|
+
engine = getattr(args, "engine", None) or "cloak"
|
|
2997
|
+
if engine == "scrapling":
|
|
2998
|
+
# Scrapling path — uses its own CLI `scrapling extract`.
|
|
2999
|
+
if not shutil.which("scrapling"):
|
|
3000
|
+
print(" scrapling not installed — "
|
|
3001
|
+
"run `omega tool install scrapling` or "
|
|
3002
|
+
"`uv tool install 'scrapling[fetchers]' && scrapling install`")
|
|
3003
|
+
return 2
|
|
3004
|
+
out_file = getattr(args, "out", None)
|
|
3005
|
+
# Scrapling writes to a file (it doesn't print to stdout by
|
|
3006
|
+
# default). If user didn't specify --out, use a tmpfile and
|
|
3007
|
+
# cat it after.
|
|
3008
|
+
import tempfile
|
|
3009
|
+
from pathlib import Path
|
|
3010
|
+
tmp_out = out_file or tempfile.mktemp(suffix=".md")
|
|
3011
|
+
scrapling_cmd = ["scrapling", "extract",
|
|
3012
|
+
"stealthy-fetch" if getattr(args, "humanize", False)
|
|
3013
|
+
else "fetch",
|
|
3014
|
+
args.url, tmp_out]
|
|
3015
|
+
if getattr(args, "css", None):
|
|
3016
|
+
scrapling_cmd += ["--css-selector", args.css]
|
|
3017
|
+
try:
|
|
3018
|
+
proc = subprocess.run(scrapling_cmd, capture_output=True,
|
|
3019
|
+
text=True, timeout=120)
|
|
3020
|
+
except subprocess.TimeoutExpired:
|
|
3021
|
+
print(" scrapling timed out after 120s")
|
|
3022
|
+
return 2
|
|
3023
|
+
if proc.returncode != 0:
|
|
3024
|
+
print(f" scrapling error: {proc.stderr[:400]}")
|
|
3025
|
+
return proc.returncode
|
|
3026
|
+
if not out_file:
|
|
3027
|
+
try:
|
|
3028
|
+
print(Path(tmp_out).read_text())
|
|
3029
|
+
Path(tmp_out).unlink(missing_ok=True)
|
|
3030
|
+
except OSError as exc:
|
|
3031
|
+
print(f" read scrapling output failed: {exc}")
|
|
3032
|
+
return 2
|
|
3033
|
+
else:
|
|
3034
|
+
print(f" wrote → {out_file}")
|
|
3035
|
+
return 0
|
|
3036
|
+
# Default engine = CloakBrowser (kept verbatim from v0.19.23).
|
|
2991
3037
|
# CloakBrowser is installed as a uv tool (`cloakbrowser`) but we need
|
|
2992
3038
|
# to drive it from Python — the canonical path is `python -m
|
|
2993
3039
|
# cloakbrowser` after `pip install cloakbrowser`, OR we invoke the
|
|
@@ -4190,20 +4236,28 @@ def _build_parser() -> argparse.ArgumentParser:
|
|
|
4190
4236
|
help="provider id (omit to print current + available)")
|
|
4191
4237
|
p_sw.set_defaults(fn=cmd_switch)
|
|
4192
4238
|
|
|
4193
|
-
# `omega scrape <url>` — official OmegaOS web scraper (CloakBrowser
|
|
4239
|
+
# `omega scrape <url>` — official OmegaOS web scraper (CloakBrowser
|
|
4240
|
+
# default, Scrapling optional). Both engines pre-installed at step 40
|
|
4241
|
+
# when their catalog entry is `recommended: true`.
|
|
4194
4242
|
p_sc = sub.add_parser(
|
|
4195
4243
|
"scrape",
|
|
4196
|
-
help="
|
|
4197
|
-
"
|
|
4244
|
+
help="scrape a URL — CloakBrowser (default, stealth) or Scrapling "
|
|
4245
|
+
"(--engine scrapling, fast). Output: stdout or --out FILE",
|
|
4198
4246
|
)
|
|
4199
4247
|
p_sc.add_argument("url", nargs="?", default=None,
|
|
4200
4248
|
help="URL to fetch")
|
|
4249
|
+
p_sc.add_argument("--engine", default="cloak",
|
|
4250
|
+
choices=["cloak", "scrapling"],
|
|
4251
|
+
help="which scraper to use (default: cloak = CloakBrowser)")
|
|
4201
4252
|
p_sc.add_argument("--out", default=None,
|
|
4202
4253
|
help="write result to file instead of stdout")
|
|
4203
4254
|
p_sc.add_argument("--humanize", action="store_true",
|
|
4204
|
-
help="mouse curves +
|
|
4255
|
+
help="mouse curves + scroll patterns (cloak) "
|
|
4256
|
+
"or stealthy-fetch mode (scrapling)")
|
|
4205
4257
|
p_sc.add_argument("--proxy", default=None,
|
|
4206
|
-
help="HTTP or SOCKS5 proxy (
|
|
4258
|
+
help="HTTP or SOCKS5 proxy (cloak engine only)")
|
|
4259
|
+
p_sc.add_argument("--css", default=None,
|
|
4260
|
+
help="CSS selector to scope output (scrapling engine only)")
|
|
4207
4261
|
p_sc.set_defaults(fn=cmd_scrape)
|
|
4208
4262
|
p_doc = sub.add_parser("doctor", help="validate the deployment")
|
|
4209
4263
|
p_doc.add_argument("--json", action="store_true",
|
|
@@ -1 +1 @@
|
|
|
1
|
-
0.19.
|
|
1
|
+
0.19.24
|
|
@@ -74,6 +74,19 @@ native:
|
|
|
74
74
|
secrets: []
|
|
75
75
|
recommended: true
|
|
76
76
|
|
|
77
|
+
- id: scrapling
|
|
78
|
+
name: Scrapling (optional fast scraper — HTTP-first, adaptive elements)
|
|
79
|
+
# Complementary to CloakBrowser. Scrapling shines for non-protected
|
|
80
|
+
# sites + multi-page crawls (concurrent + pause/resume + adaptive
|
|
81
|
+
# element tracking that survives page redesigns). CloakBrowser stays
|
|
82
|
+
# the default for hard bot-walls; Scrapling for everything else.
|
|
83
|
+
# `omega scrape --engine scrapling` selects it explicitly.
|
|
84
|
+
install: { uv_tool: "scrapling[fetchers]" }
|
|
85
|
+
binary: scrapling
|
|
86
|
+
secrets: []
|
|
87
|
+
recommended: false
|
|
88
|
+
post_install: ["scrapling", "install"] # downloads browser deps once
|
|
89
|
+
|
|
77
90
|
# --- 2. Printing Press CLIs --------------------------------------------
|
|
78
91
|
# Installed via `npx -y @mvanhorn/printing-press-library install <name>`.
|
|
79
92
|
# Each ships with a local SQLite mirror + Claude Code skill.
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@agentikos/omega-os",
|
|
3
|
-
"version": "0.19.
|
|
3
|
+
"version": "0.19.24",
|
|
4
4
|
"description": "Omega OS — installable agentic operating system with verified-completion orchestration. Event-sourced engine, 8-block rack, autonomous agents, MCP.",
|
|
5
5
|
"bin": {
|
|
6
6
|
"omega-os": "bin/omega-os.js"
|