@agentikos/omega-os 0.19.23 → 0.19.24

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -564,6 +564,15 @@ for entry in missing:
564
564
  if shutil.which(binary):
565
565
  print(f" ok {cli_id:<14} (installed via {attempt})")
566
566
  installed_now.append(cli_id)
567
+ # Optional post-install command (e.g. `scrapling install`
568
+ # downloads its browser deps after pip install).
569
+ post = entry.get("post_install") or []
570
+ if post and shutil.which(post[0]):
571
+ rc2, _ = _run(post, timeout=600)
572
+ if rc2 == 0:
573
+ print(f" post-install: {' '.join(post)} ok")
574
+ else:
575
+ print(f" post-install: {' '.join(post)} rc={rc2} (non-fatal)")
567
576
  else:
568
577
  print(f" fail {cli_id}: {attempt} returned rc={rc}")
569
578
  failed_now.append(cli_id)
@@ -188,7 +188,7 @@ from omega_engine.genesis import (
188
188
  )
189
189
  from omega_engine import plan as plan_v7
190
190
 
191
- __version__ = "0.19.23"
191
+ __version__ = "0.19.24"
192
192
 
193
193
  __all__ = [
194
194
  "__version__",
@@ -2966,28 +2966,74 @@ def _set_active_provider(provider_id: str) -> None:
2966
2966
 
2967
2967
 
2968
2968
  def cmd_scrape(args: argparse.Namespace) -> int:
2969
- """`omega scrape <url> [--out file] [--humanize]` — stealth scraper.
2970
-
2971
- Thin wrapper over CloakBrowser (https://github.com/CloakHQ/CloakBrowser).
2972
- Bypasses Cloudflare/Turnstile/FingerprintJS by patching the browser at
2973
- the C++ level. Default output = markdown to stdout so any LLM CLI can
2974
- pipe it directly into a context. The wrapper is what makes
2975
- CloakBrowser usable as a SHELL CLI for Claude/Gemini/Codex/etc.
2976
-
2977
- Usage:
2978
- omega scrape https://example.com # → markdown to stdout
2979
- omega scrape https://example.com --out page.md # file
2980
- omega scrape https://example.com --humanize # mouse curves + scroll
2981
- omega scrape https://example.com --proxy <url> # SOCKS5/HTTP
2982
-
2983
- Inside any Claude Code chat session: ``omega scrape <url>`` returns the
2984
- cleaned page content. No MCP server, no Playwright dance.
2969
+ """`omega scrape <url> [--engine cloak|scrapling] [--out file]` — scraper.
2970
+
2971
+ Two engines available since v0.19.24:
2972
+ * ``cloak`` (default) CloakBrowser, heavy stealth Chromium, 58
2973
+ C++ patches, passes Cloudflare/Turnstile/FingerprintJS. Slow
2974
+ but invincible.
2975
+ * ``scrapling`` D4Vinci/Scrapling, HTTP-first with optional
2976
+ browser, adaptive element tracking, concurrent crawl with
2977
+ pause/resume. Fast for non-protected sites.
2978
+
2979
+ Output: cleaned text to stdout (or --out FILE). Wraps both engines
2980
+ behind a consistent CLI so any LLM (Claude/Gemini/Codex/OpenCode/
2981
+ Aider) can `omega scrape <url>` via Bash without caring which
2982
+ Python library is doing the work.
2983
+
2984
+ Examples:
2985
+ omega scrape https://example.com # CloakBrowser
2986
+ omega scrape https://example.com --engine scrapling # Scrapling
2987
+ omega scrape https://news.ycombinator.com --engine scrapling --css ".titleline"
2985
2988
  """
2986
2989
  import shutil
2987
2990
  import subprocess
2988
2991
  if not args.url:
2989
- print("usage: omega scrape <url> [--out FILE] [--humanize] [--proxy URL]")
2992
+ print("usage: omega scrape <url> "
2993
+ "[--engine cloak|scrapling] [--out FILE] [--humanize] "
2994
+ "[--proxy URL] [--css SELECTOR]")
2990
2995
  return 2
2996
+ engine = getattr(args, "engine", None) or "cloak"
2997
+ if engine == "scrapling":
2998
+ # Scrapling path — uses its own CLI `scrapling extract`.
2999
+ if not shutil.which("scrapling"):
3000
+ print(" scrapling not installed — "
3001
+ "run `omega tool install scrapling` or "
3002
+ "`uv tool install 'scrapling[fetchers]' && scrapling install`")
3003
+ return 2
3004
+ out_file = getattr(args, "out", None)
3005
+ # Scrapling writes to a file (it doesn't print to stdout by
3006
+ # default). If user didn't specify --out, use a tmpfile and
3007
+ # cat it after.
3008
+ import tempfile
3009
+ from pathlib import Path
3010
+ tmp_out = out_file or tempfile.mktemp(suffix=".md")
3011
+ scrapling_cmd = ["scrapling", "extract",
3012
+ "stealthy-fetch" if getattr(args, "humanize", False)
3013
+ else "fetch",
3014
+ args.url, tmp_out]
3015
+ if getattr(args, "css", None):
3016
+ scrapling_cmd += ["--css-selector", args.css]
3017
+ try:
3018
+ proc = subprocess.run(scrapling_cmd, capture_output=True,
3019
+ text=True, timeout=120)
3020
+ except subprocess.TimeoutExpired:
3021
+ print(" scrapling timed out after 120s")
3022
+ return 2
3023
+ if proc.returncode != 0:
3024
+ print(f" scrapling error: {proc.stderr[:400]}")
3025
+ return proc.returncode
3026
+ if not out_file:
3027
+ try:
3028
+ print(Path(tmp_out).read_text())
3029
+ Path(tmp_out).unlink(missing_ok=True)
3030
+ except OSError as exc:
3031
+ print(f" read scrapling output failed: {exc}")
3032
+ return 2
3033
+ else:
3034
+ print(f" wrote → {out_file}")
3035
+ return 0
3036
+ # Default engine = CloakBrowser (kept verbatim from v0.19.23).
2991
3037
  # CloakBrowser is installed as a uv tool (`cloakbrowser`) but we need
2992
3038
  # to drive it from Python — the canonical path is `python -m
2993
3039
  # cloakbrowser` after `pip install cloakbrowser`, OR we invoke the
@@ -4190,20 +4236,28 @@ def _build_parser() -> argparse.ArgumentParser:
4190
4236
  help="provider id (omit to print current + available)")
4191
4237
  p_sw.set_defaults(fn=cmd_switch)
4192
4238
 
4193
- # `omega scrape <url>` — official OmegaOS web scraper (CloakBrowser).
4239
+ # `omega scrape <url>` — official OmegaOS web scraper (CloakBrowser
4240
+ # default, Scrapling optional). Both engines pre-installed at step 40
4241
+ # when their catalog entry is `recommended: true`.
4194
4242
  p_sc = sub.add_parser(
4195
4243
  "scrape",
4196
- help="stealth scrape a URL via CloakBrowser bypasses Cloudflare, "
4197
- "Turnstile, FingerprintJS. Output: markdown to stdout (or --out FILE)",
4244
+ help="scrape a URL CloakBrowser (default, stealth) or Scrapling "
4245
+ "(--engine scrapling, fast). Output: stdout or --out FILE",
4198
4246
  )
4199
4247
  p_sc.add_argument("url", nargs="?", default=None,
4200
4248
  help="URL to fetch")
4249
+ p_sc.add_argument("--engine", default="cloak",
4250
+ choices=["cloak", "scrapling"],
4251
+ help="which scraper to use (default: cloak = CloakBrowser)")
4201
4252
  p_sc.add_argument("--out", default=None,
4202
4253
  help="write result to file instead of stdout")
4203
4254
  p_sc.add_argument("--humanize", action="store_true",
4204
- help="mouse curves + keyboard timing + scroll patterns")
4255
+ help="mouse curves + scroll patterns (cloak) "
4256
+ "or stealthy-fetch mode (scrapling)")
4205
4257
  p_sc.add_argument("--proxy", default=None,
4206
- help="HTTP or SOCKS5 proxy (with inline creds)")
4258
+ help="HTTP or SOCKS5 proxy (cloak engine only)")
4259
+ p_sc.add_argument("--css", default=None,
4260
+ help="CSS selector to scope output (scrapling engine only)")
4207
4261
  p_sc.set_defaults(fn=cmd_scrape)
4208
4262
  p_doc = sub.add_parser("doctor", help="validate the deployment")
4209
4263
  p_doc.add_argument("--json", action="store_true",
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "omega-engine"
3
- version = "0.19.23"
3
+ version = "0.19.24"
4
4
  description = "The Omega OS orchestration engine — event-sourced, verified-completion agent graphs."
5
5
  readme = "README.md"
6
6
  requires-python = ">=3.11"
@@ -1 +1 @@
1
- 0.19.23
1
+ 0.19.24
@@ -74,6 +74,19 @@ native:
74
74
  secrets: []
75
75
  recommended: true
76
76
 
77
+ - id: scrapling
78
+ name: Scrapling (optional fast scraper — HTTP-first, adaptive elements)
79
+ # Complementary to CloakBrowser. Scrapling shines for non-protected
80
+ # sites + multi-page crawls (concurrent + pause/resume + adaptive
81
+ # element tracking that survives page redesigns). CloakBrowser stays
82
+ # the default for hard bot-walls; Scrapling for everything else.
83
+ # `omega scrape --engine scrapling` selects it explicitly.
84
+ install: { uv_tool: "scrapling[fetchers]" }
85
+ binary: scrapling
86
+ secrets: []
87
+ recommended: false
88
+ post_install: ["scrapling", "install"] # downloads browser deps once
89
+
77
90
  # --- 2. Printing Press CLIs --------------------------------------------
78
91
  # Installed via `npx -y @mvanhorn/printing-press-library install <name>`.
79
92
  # Each ships with a local SQLite mirror + Claude Code skill.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@agentikos/omega-os",
3
- "version": "0.19.23",
3
+ "version": "0.19.24",
4
4
  "description": "Omega OS — installable agentic operating system with verified-completion orchestration. Event-sourced engine, 8-block rack, autonomous agents, MCP.",
5
5
  "bin": {
6
6
  "omega-os": "bin/omega-os.js"