PyPI - omniscout - Versions diffs - 0.1.0__tar.gz - Mend

omniscout 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (60) hide show

omniscout-0.1.0/.python-version +1 -0
omniscout-0.1.0/CHANGELOG.md +19 -0
omniscout-0.1.0/PKG-INFO +197 -0
omniscout-0.1.0/README.md +156 -0
omniscout-0.1.0/RELEASING.md +73 -0
omniscout-0.1.0/harness/__init__.py +3 -0
omniscout-0.1.0/harness/__main__.py +6 -0
omniscout-0.1.0/harness/app.py +306 -0
omniscout-0.1.0/harness/client.py +94 -0
omniscout-0.1.0/harness/commands/__init__.py +0 -0
omniscout-0.1.0/harness/commands/benchmark.py +186 -0
omniscout-0.1.0/harness/commands/browser.py +597 -0
omniscout-0.1.0/harness/commands/daemon.py +349 -0
omniscout-0.1.0/harness/commands/extract.py +37 -0
omniscout-0.1.0/harness/commands/profile.py +47 -0
omniscout-0.1.0/harness/commands/research.py +44 -0
omniscout-0.1.0/harness/commands/search.py +154 -0
omniscout-0.1.0/harness/commands/session.py +75 -0
omniscout-0.1.0/harness/commands/workflow.py +100 -0
omniscout-0.1.0/harness/config.py +122 -0
omniscout-0.1.0/harness/daemon/__init__.py +10 -0
omniscout-0.1.0/harness/daemon/__main__.py +17 -0
omniscout-0.1.0/harness/daemon/backends/__init__.py +215 -0
omniscout-0.1.0/harness/daemon/backends/extension.py +457 -0
omniscout-0.1.0/harness/daemon/backends/playwright.py +1244 -0
omniscout-0.1.0/harness/daemon/captcha.py +203 -0
omniscout-0.1.0/harness/daemon/events.py +111 -0
omniscout-0.1.0/harness/daemon/history.py +244 -0
omniscout-0.1.0/harness/daemon/lifecycle.py +252 -0
omniscout-0.1.0/harness/daemon/protocol.py +90 -0
omniscout-0.1.0/harness/daemon/server.py +591 -0
omniscout-0.1.0/harness/daemon/sessions_store.py +127 -0
omniscout-0.1.0/harness/daemon/snapshot.py +224 -0
omniscout-0.1.0/harness/engines/__init__.py +0 -0
omniscout-0.1.0/harness/engines/browser.py +397 -0
omniscout-0.1.0/harness/engines/crawler.py +276 -0
omniscout-0.1.0/harness/engines/extractor.py +172 -0
omniscout-0.1.0/harness/engines/research.py +199 -0
omniscout-0.1.0/harness/engines/search/__init__.py +0 -0
omniscout-0.1.0/harness/engines/search/answer.py +102 -0
omniscout-0.1.0/harness/engines/search/answer_balanced.py +200 -0
omniscout-0.1.0/harness/engines/search/answer_cache.py +116 -0
omniscout-0.1.0/harness/engines/search/answer_deep.py +101 -0
omniscout-0.1.0/harness/engines/search/answer_eval.py +69 -0
omniscout-0.1.0/harness/engines/search/answer_fast.py +95 -0
omniscout-0.1.0/harness/engines/search/answer_text.py +366 -0
omniscout-0.1.0/harness/engines/search/ddg.py +128 -0
omniscout-0.1.0/harness/engines/search/embed.py +125 -0
omniscout-0.1.0/harness/engines/search/index.py +152 -0
omniscout-0.1.0/harness/engines/search/pipeline.py +106 -0
omniscout-0.1.0/harness/engines/search/query_classifier.py +59 -0
omniscout-0.1.0/harness/engines/search/rerank.py +38 -0
omniscout-0.1.0/harness/io.py +108 -0
omniscout-0.1.0/harness/logging.py +99 -0
omniscout-0.1.0/harness/models.py +257 -0
omniscout-0.1.0/harness/store/__init__.py +0 -0
omniscout-0.1.0/harness/store/cache.py +112 -0
omniscout-0.1.0/harness/store/sessions.py +155 -0
omniscout-0.1.0/harness/store/workflow.py +135 -0
omniscout-0.1.0/pyproject.toml +79 -0

omniscout-0.1.0/.python-version ADDED Viewed

	@@ -0,0 +1 @@
1	+ 3.11

omniscout-0.1.0/CHANGELOG.md ADDED Viewed

@@ -0,0 +1,19 @@
+# Changelog
+All notable changes to the `omniscout` Python package are documented in this
+file.
+## [0.1.0] - 2026-05-28
+### Added
+- Initial public release scaffolding for PyPI.
+- GitHub Actions CI for Linux/macOS tests, build checks, and install smoke test.
+- TestPyPI and PyPI trusted publishing workflows via GitHub OIDC.
+- Release process documentation in `RELEASING.md`.
+### Changed
+- Renamed distribution package from `harness-harness` to `omniscout`.
+- Set `omniscout` as the primary console entrypoint and kept `harness` alias.
+- Added project URL metadata for PyPI.

omniscout-0.1.0/PKG-INFO ADDED Viewed

@@ -0,0 +1,197 @@
+Metadata-Version: 2.4
+Name: omniscout
+Version: 0.1.0
+Summary: OmniScout CLI (harness): local-first browser automation, semantic search, and research for AI agents
+Project-URL: Homepage, https://github.com/sriramramnath/omniscout
+Project-URL: Repository, https://github.com/sriramramnath/omniscout
+Project-URL: Issues, https://github.com/sriramramnath/omniscout/issues
+Author: OmniScout
+License: MIT
+Keywords: agent,cli,playwright,research,scraping,search
+Classifier: Development Status :: 3 - Alpha
+Classifier: Environment :: Console
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: MIT License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python :: 3
+Classifier: Programming Language :: Python :: 3.11
+Classifier: Programming Language :: Python :: 3.12
+Requires-Python: >=3.11
+Requires-Dist: aiohttp>=3.9
+Requires-Dist: httpx>=0.27
+Requires-Dist: markdownify>=0.13
+Requires-Dist: nltk>=3.8
+Requires-Dist: platformdirs>=4.2
+Requires-Dist: playwright>=1.45
+Requires-Dist: pydantic>=2.7
+Requires-Dist: qdrant-client>=1.9
+Requires-Dist: rich>=13.7
+Requires-Dist: selectolax>=0.3.21
+Requires-Dist: sentence-transformers>=2.7
+Requires-Dist: sumy>=0.11
+Requires-Dist: tomli>=2.0; python_version < '3.11'
+Requires-Dist: trafilatura>=1.12
+Requires-Dist: typer>=0.12
+Provides-Extra: dev
+Requires-Dist: pytest-asyncio>=0.23; extra == 'dev'
+Requires-Dist: pytest>=8; extra == 'dev'
+Requires-Dist: respx>=0.21; extra == 'dev'
+Requires-Dist: ruff>=0.5; extra == 'dev'
+Description-Content-Type: text/markdown
+# scout — OmniScout CLI
+Local-first browser automation, semantic search, and research for AI agents.
+No cloud APIs, no hosted browser sessions, no MCP, no SDK.
+The CLI is the interface.
+## Install
+Requires Python 3.11+ and Google Chrome (already installed on most macOS
+machines at `/Applications/Google Chrome.app`).
+### Recommended: install as a global tool
+```bash
+cd cli
+uv tool install --editable .   # creates ~/.local/bin/omniscout on PATH
+scout install              # verifies Chrome + prefetches embedding model
+```
+After this, `scout` works from any directory. Edits to source files are
+picked up live (editable install).
+`omniscout` and `harness` remain available as compatibility aliases.
+### Alternative: project venv
+If you prefer not to install globally:
+```bash
+cd cli
+uv venv --python 3.11 .venv
+uv pip install -e ".[dev]"
+source .venv/bin/activate
+scout install
+```
+If you don't have Chrome installed, add `--bundled` to also download
+Playwright's bundled Chromium (~190MB).
+`scout install` also prefetches the local sentence-transformers model into
+OmniScout's app data directory so later commands do not need to fetch it again.
+Use `--no-model` to skip model prefetch.
+## Quickstart
+```bash
+# Search the web (DuckDuckGo HTML + local embedding rerank)
+scout search "local-first browser agents"
+# same command via alias:
+scout search "local-first browser agents"
+# Extract a URL to clean Markdown
+scout extract https://example.com
+# Capture a screenshot of a real page using your installed Chrome
+scout browser screenshot https://example.com --out page.png
+# Run a multi-step research pipeline (search -> crawl -> extract -> rerank -> summarize)
+scout research "state of local AI agents in 2026"
+# Manage persistent browser profiles (cookies, logins persist across runs)
+scout profile create work
+scout browser open https://news.ycombinator.com --profile work --headful
+# Long-lived browser sessions (other tools can attach via CDP)
+scout session start --headful
+scout session list
+scout session kill --all
+```
+## JSON output (for agents)
+Every command emits structured JSON when invoked with `--json` (or with
+`HARNESS_JSON=1` in the environment). Logs always go to stderr; stdout is
+reserved for the structured result.
+```bash
+HARNESS_JSON=1 scout search "robotics simulators" --limit 5
+```
+## Architecture
+```
+harness/
+  app.py              # Typer root
+  commands/           # CLI sub-commands (thin)
+  engines/
+    browser.py        # Playwright + system Chrome
+    extractor.py      # trafilatura + markdownify
+    crawler.py        # async httpx + Chrome fallback
+    search/
+      ddg.py          # DuckDuckGo HTML
+      embed.py        # sentence-transformers (all-MiniLM-L6-v2)
+      index.py        # embedded Qdrant on-disk
+      rerank.py       # cosine rerank
+      pipeline.py     # ddg | index | hybrid
+    research.py       # full pipeline (search -> crawl -> extract -> rerank -> summarize)
+  store/
+    cache.py          # SQLite + content-hashed HTML cache
+    sessions.py       # SQLite registry of browser sessions
+  models.py           # pydantic result types (the JSON contract)
+```
+On-disk state lives under `~/Library/Application Support/harness/` (macOS) /
+`$XDG_DATA_HOME/harness/` (Linux):
+| Path             | Purpose                                            |
+| ---------------- | -------------------------------------------------- |
+| `profiles/`      | Persistent Chrome user-data-dirs                   |
+| `qdrant/`        | Embedded vector index                              |
+| `sessions.sqlite`| Registry of long-lived browser sessions            |
+| `cache/pages/`   | Content-hashed HTML cache used by extract+crawler |
+Override via `HARNESS_DATA_DIR`, `HARNESS_CONFIG_DIR`, `HARNESS_CACHE_DIR`,
+or settings in `~/Library/Application Support/harness/config.toml`.
+## Configuration
+`config.toml` example:
+```toml
+default_source = "ddg"           # search source default
+search_limit = 10
+research_results = 8
+request_throttle_seconds = 1.0   # per-host throttle in the crawler
+embedding_model = "sentence-transformers/all-MiniLM-L6-v2"
+embedding_local_only = true         # default; never fetch model files at query time
+browser_channel = "chrome"       # uses installed Google Chrome
+# browser_executable = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
+summary_sentences = 6
+```
+Set `HARNESS_EMBED_LOCAL_ONLY=0` to allow runtime Hugging Face fetches.
+## Testing
+```bash
+.venv/bin/pytest
+```
+Tests run offline — search, extract, and the research pipeline are all
+exercised against saved HTML fixtures and patched network seams.
+## Why local Chrome?
+Using your system Chrome (channel = "chrome") gives you:
+- Real cookies, login state, extensions, and font rendering
+- No extra ~190MB Chromium download
+- The same user-agent fingerprint as your daily browsing
+- Cleaner integration with `omniscout session start` for long-lived sessions
+  that other tools can attach to over CDP
+If Chrome isn't available, the engine transparently falls back to Playwright's
+bundled Chromium.

omniscout-0.1.0/README.md ADDED Viewed

@@ -0,0 +1,156 @@
+# scout — OmniScout CLI
+Local-first browser automation, semantic search, and research for AI agents.
+No cloud APIs, no hosted browser sessions, no MCP, no SDK.
+The CLI is the interface.
+## Install
+Requires Python 3.11+ and Google Chrome (already installed on most macOS
+machines at `/Applications/Google Chrome.app`).
+### Recommended: install as a global tool
+```bash
+cd cli
+uv tool install --editable .   # creates ~/.local/bin/omniscout on PATH
+scout install              # verifies Chrome + prefetches embedding model
+```
+After this, `scout` works from any directory. Edits to source files are
+picked up live (editable install).
+`omniscout` and `harness` remain available as compatibility aliases.
+### Alternative: project venv
+If you prefer not to install globally:
+```bash
+cd cli
+uv venv --python 3.11 .venv
+uv pip install -e ".[dev]"
+source .venv/bin/activate
+scout install
+```
+If you don't have Chrome installed, add `--bundled` to also download
+Playwright's bundled Chromium (~190MB).
+`scout install` also prefetches the local sentence-transformers model into
+OmniScout's app data directory so later commands do not need to fetch it again.
+Use `--no-model` to skip model prefetch.
+## Quickstart
+```bash
+# Search the web (DuckDuckGo HTML + local embedding rerank)
+scout search "local-first browser agents"
+# same command via alias:
+scout search "local-first browser agents"
+# Extract a URL to clean Markdown
+scout extract https://example.com
+# Capture a screenshot of a real page using your installed Chrome
+scout browser screenshot https://example.com --out page.png
+# Run a multi-step research pipeline (search -> crawl -> extract -> rerank -> summarize)
+scout research "state of local AI agents in 2026"
+# Manage persistent browser profiles (cookies, logins persist across runs)
+scout profile create work
+scout browser open https://news.ycombinator.com --profile work --headful
+# Long-lived browser sessions (other tools can attach via CDP)
+scout session start --headful
+scout session list
+scout session kill --all
+```
+## JSON output (for agents)
+Every command emits structured JSON when invoked with `--json` (or with
+`HARNESS_JSON=1` in the environment). Logs always go to stderr; stdout is
+reserved for the structured result.
+```bash
+HARNESS_JSON=1 scout search "robotics simulators" --limit 5
+```
+## Architecture
+```
+harness/
+  app.py              # Typer root
+  commands/           # CLI sub-commands (thin)
+  engines/
+    browser.py        # Playwright + system Chrome
+    extractor.py      # trafilatura + markdownify
+    crawler.py        # async httpx + Chrome fallback
+    search/
+      ddg.py          # DuckDuckGo HTML
+      embed.py        # sentence-transformers (all-MiniLM-L6-v2)
+      index.py        # embedded Qdrant on-disk
+      rerank.py       # cosine rerank
+      pipeline.py     # ddg | index | hybrid
+    research.py       # full pipeline (search -> crawl -> extract -> rerank -> summarize)
+  store/
+    cache.py          # SQLite + content-hashed HTML cache
+    sessions.py       # SQLite registry of browser sessions
+  models.py           # pydantic result types (the JSON contract)
+```
+On-disk state lives under `~/Library/Application Support/harness/` (macOS) /
+`$XDG_DATA_HOME/harness/` (Linux):
+| Path             | Purpose                                            |
+| ---------------- | -------------------------------------------------- |
+| `profiles/`      | Persistent Chrome user-data-dirs                   |
+| `qdrant/`        | Embedded vector index                              |
+| `sessions.sqlite`| Registry of long-lived browser sessions            |
+| `cache/pages/`   | Content-hashed HTML cache used by extract+crawler |
+Override via `HARNESS_DATA_DIR`, `HARNESS_CONFIG_DIR`, `HARNESS_CACHE_DIR`,
+or settings in `~/Library/Application Support/harness/config.toml`.
+## Configuration
+`config.toml` example:
+```toml
+default_source = "ddg"           # search source default
+search_limit = 10
+research_results = 8
+request_throttle_seconds = 1.0   # per-host throttle in the crawler
+embedding_model = "sentence-transformers/all-MiniLM-L6-v2"
+embedding_local_only = true         # default; never fetch model files at query time
+browser_channel = "chrome"       # uses installed Google Chrome
+# browser_executable = "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
+summary_sentences = 6
+```
+Set `HARNESS_EMBED_LOCAL_ONLY=0` to allow runtime Hugging Face fetches.
+## Testing
+```bash
+.venv/bin/pytest
+```
+Tests run offline — search, extract, and the research pipeline are all
+exercised against saved HTML fixtures and patched network seams.
+## Why local Chrome?
+Using your system Chrome (channel = "chrome") gives you:
+- Real cookies, login state, extensions, and font rendering
+- No extra ~190MB Chromium download
+- The same user-agent fingerprint as your daily browsing
+- Cleaner integration with `omniscout session start` for long-lived sessions
+  that other tools can attach to over CDP
+If Chrome isn't available, the engine transparently falls back to Playwright's
+bundled Chromium.

omniscout-0.1.0/RELEASING.md ADDED Viewed

@@ -0,0 +1,73 @@
+# Releasing `omniscout` to PyPI
+This project publishes the Python CLI package from `cli/` using GitHub Actions
+OIDC trusted publishing.
+## Package identity
+- Distribution name: `omniscout`
+- Console commands:
+  - `omniscout` (primary)
+  - `scout` (alias)
+  - `harness` (compatibility alias)
+## Trusted publisher configuration
+Configure on **pypi.org** (production):
+- PyPI project name: `omniscout`
+- Owner: `sriramramnath`
+- Repository: `omniscout`
+- Workflow: `pypi-publish.yml`
+- Environment: `pypi`
+If workflow file name or environment changes, update the trusted publisher entry
+to match exactly before publishing.
+TestPyPI is optional and not wired in CI right now. Add a `testpypi-publish.yml`
+workflow and register a matching publisher on test.pypi.org when you want a
+staging index later.
+## Versioning strategy
+Use semantic versioning:
+- `MAJOR`: breaking CLI/JSON contract changes.
+- `MINOR`: backward-compatible features.
+- `PATCH`: backward-compatible fixes/docs/internal improvements.
+Release flow:
+1. Update version in `pyproject.toml` and `harness/__init__.py`.
+2. Add a new section in `CHANGELOG.md`.
+3. Open PR and ensure CI passes.
+4. Create and push tag `vX.Y.Z` (runs CI only).
+5. Run `publish-pypi` workflow manually with `ref=vX.Y.Z` and approve the
+   `pypi` environment.
+## Dependency pinning strategy
+- Runtime dependencies are lower-bounded for compatibility and should remain
+  narrow on major version boundaries where practical.
+- Add upper bounds only when an upstream major release is known to break the
+  CLI.
+- For reproducible local development, use a lock file (`uv.lock`) in contributor
+  workflows; do not publish pinned transitive dependencies in the package
+  metadata unless required.
+## Release notes / changelog discipline
+- Every release must include a `CHANGELOG.md` entry.
+- Keep entries grouped under `Added`, `Changed`, `Fixed`, `Removed`.
+- Link the tag to corresponding GitHub release notes.
+## First release checklist
+1. Confirm `omniscout` is available/reserved on PyPI.
+2. Confirm trusted publisher entry is configured on pypi.org.
+3. Ensure GitHub environment `pypi` has required reviewers (manual approval).
+4. Push tag `vX.Y.Z` and wait for CI to pass.
+5. Run `publish-pypi` workflow manually with `ref=vX.Y.Z` and approve `pypi`.
+6. Verify production install:
+   - `pip install omniscout`
+   - `scout --help`

omniscout-0.1.0/harness/__init__.py ADDED Viewed

@@ -0,0 +1,3 @@
+"""OmniScout `harness` CLI package."""
+__version__ = "0.1.0"

omniscout-0.1.0/harness/__main__.py ADDED Viewed

@@ -0,0 +1,6 @@
+"""Allow `python -m harness` to invoke the CLI."""
+from harness.app import main
+if __name__ == "__main__":
+    main()