PyPI - openkb - Versions diffs - 0.1.2__tar.gz → 0.1.4.dev0__tar.gz - Mend

openkb 0.1.2tar.gz → 0.1.4.dev0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (49) hide show

openkb-0.1.4.dev0/.env.example +5 -0
openkb-0.1.4.dev0/.github/workflows/publish.yml +58 -0
openkb-0.1.4.dev0/.gitignore +19 -0
{openkb-0.1.2 → openkb-0.1.4.dev0}/PKG-INFO +61 -33
{openkb-0.1.2 → openkb-0.1.4.dev0}/README.md +44 -18
openkb-0.1.4.dev0/config.yaml.example +3 -0
openkb-0.1.4.dev0/openkb/agent/_markdown.py +371 -0
{openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/agent/chat.py +276 -20
{openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/agent/compiler.py +358 -73
{openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/agent/query.py +98 -21
{openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/cli.py +98 -29
{openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/lint.py +168 -0
{openkb-0.1.2 → openkb-0.1.4.dev0}/pyproject.toml +31 -32
openkb-0.1.4.dev0/tests/conftest.py +80 -0
openkb-0.1.4.dev0/tests/test_add_command.py +151 -0
openkb-0.1.4.dev0/tests/test_agent_tools.py +215 -0
openkb-0.1.4.dev0/tests/test_chat_session.py +76 -0
openkb-0.1.4.dev0/tests/test_chat_slash_commands.py +256 -0
openkb-0.1.4.dev0/tests/test_cli.py +160 -0
openkb-0.1.4.dev0/tests/test_compiler.py +1347 -0
openkb-0.1.4.dev0/tests/test_config.py +49 -0
openkb-0.1.4.dev0/tests/test_converter.py +133 -0
openkb-0.1.4.dev0/tests/test_images.py +169 -0
openkb-0.1.4.dev0/tests/test_indexer.py +127 -0
openkb-0.1.4.dev0/tests/test_lint.py +395 -0
openkb-0.1.4.dev0/tests/test_lint_cli.py +75 -0
openkb-0.1.4.dev0/tests/test_linter.py +85 -0
openkb-0.1.4.dev0/tests/test_list_status.py +150 -0
openkb-0.1.4.dev0/tests/test_markdown_renderer.py +38 -0
openkb-0.1.4.dev0/tests/test_query.py +139 -0
openkb-0.1.4.dev0/tests/test_state.py +84 -0
openkb-0.1.4.dev0/tests/test_tree_renderer.py +41 -0
openkb-0.1.4.dev0/tests/test_watcher.py +126 -0
{openkb-0.1.2 → openkb-0.1.4.dev0}/LICENSE +0 -0
{openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/__init__.py +0 -0
{openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/__main__.py +0 -0
{openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/agent/__init__.py +0 -0
{openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/agent/chat_session.py +0 -0
{openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/agent/linter.py +0 -0
{openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/agent/tools.py +0 -0
{openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/config.py +0 -0
{openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/converter.py +0 -0
{openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/images.py +0 -0
{openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/indexer.py +0 -0
{openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/log.py +0 -0
{openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/schema.py +0 -0
{openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/state.py +0 -0
{openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/tree_renderer.py +0 -0
{openkb-0.1.2 → openkb-0.1.4.dev0}/openkb/watcher.py +0 -0

openkb-0.1.4.dev0/.env.example ADDED Viewed

@@ -0,0 +1,5 @@
+# LLM API key (required) — works with any LiteLLM-supported provider
+# OpenAI:    LLM_API_KEY=sk-...
+# Anthropic: LLM_API_KEY=sk-ant-...
+# Gemini:    LLM_API_KEY=AIza...
+LLM_API_KEY=your-key-here

openkb-0.1.4.dev0/.github/workflows/publish.yml ADDED Viewed

@@ -0,0 +1,58 @@
+name: Publish to PyPI
+# Release flow:
+#   1. Bump `version` in pyproject.toml on main.
+#   2. `git tag -a vX.Y.Z -m "Release X.Y.Z" && git push origin vX.Y.Z`
+#      The tag MUST match pyproject.toml's version exactly (with the `v` prefix).
+#   3. This workflow builds the package, publishes to PyPI via OIDC trusted
+#      publishing, and creates a GitHub Release with auto-generated notes.
+#
+# Do not run `python -m build && twine upload` locally — that bypasses the
+# version check and leaves no GitHub Release. PyPI rejects duplicate
+# version uploads, so if the workflow fails after PyPI publish succeeded,
+# manually create the missing GitHub Release with `gh release create vX.Y.Z`.
+on:
+  push:
+    tags:
+      - "v*"
+jobs:
+  publish:
+    runs-on: ubuntu-latest
+    environment: pypi
+    permissions:
+      id-token: write   # OIDC trusted publishing to PyPI
+      contents: write   # Create GitHub Release
+    steps:
+      - uses: actions/checkout@692973e3d937129bcbf40652eb9f2f61becf3332  # v4.2.2
+      - uses: actions/setup-python@f677139bbe7f9c59b41e40162b753c062f5d49a3  # v5.2.0
+        with:
+          python-version: "3.12"
+      - name: Verify tag matches pyproject.toml version
+        run: |
+          tag="${GITHUB_REF_NAME#v}"
+          pkg_version=$(python -c "import tomllib,pathlib; print(tomllib.loads(pathlib.Path('pyproject.toml').read_text())['project']['version'])")
+          if [ "$tag" != "$pkg_version" ]; then
+            echo "::error::Tag v$tag does not match pyproject.toml version $pkg_version"
+            exit 1
+          fi
+      - name: Install build tools
+        run: pip install build
+      - name: Build package
+        run: python -m build
+      - name: Publish to PyPI
+        uses: pypa/gh-action-pypi-publish@cef221092ed1bacb1cc03d23a2d87d1d172e277b  # release/v1.14.0
+      - name: Create GitHub Release
+        uses: softprops/action-gh-release@b4309332981a82ec1c5618f44dd2e27cc8bfbfda  # v3.0.0
+        with:
+          tag_name: ${{ github.ref_name }}
+          name: ${{ github.ref_name }}
+          generate_release_notes: true
+          files: dist/*

openkb-0.1.4.dev0/.gitignore ADDED Viewed

@@ -0,0 +1,19 @@
+__pycache__/
+*.pyc
+*.egg-info/
+dist/
+build/
+.venv/
+venv/
+*.db
+.DS_Store
+.env
+# Knowledge base test artifacts
+raw/
+wiki/
+.openkb/
+# Local only
+docs/
+.claude/

{openkb-0.1.2 → openkb-0.1.4.dev0}/PKG-INFO RENAMED Viewed

@@ -1,13 +1,14 @@
-Metadata-Version: 2.3
+Metadata-Version: 2.4
 Name: openkb
-Version: 0.1.2
+Version: 0.1.4.dev0
 Summary: OpenKB: Open LLM Knowledge Base, powered by PageIndex
+Project-URL: Repository, https://github.com/VectifyAI/OpenKB
+Project-URL: Homepage, https://github.com/VectifyAI/OpenKB
+Project-URL: Issues, https://github.com/VectifyAI/OpenKB/issues
+Author-email: Kylin <quanqi@pageindex.ai>, Ray <ray@vectify.ai>
 License: Apache-2.0
-Keywords: ai,rag,retrieval,knowledge-base,llm,pageindex,agents,document
-Author: Ray
-Author-email: ray@vectify.ai
-Maintainer: rejojer
-Requires-Python: >=3.10
+License-File: LICENSE
+Keywords: agents,ai,document,knowledge-base,llm,pageindex,rag,retrieval
 Classifier: Development Status :: 3 - Alpha
 Classifier: Intended Audience :: Developers
 Classifier: License :: OSI Approved :: Apache Software License
@@ -17,19 +18,21 @@ Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
 Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
-Requires-Dist: click (>=8.0)
+Requires-Python: >=3.10
+Requires-Dist: click>=8.0
 Requires-Dist: json-repair
 Requires-Dist: litellm
 Requires-Dist: markitdown[all]
 Requires-Dist: openai-agents
-Requires-Dist: pageindex (==0.3.0.dev1)
-Requires-Dist: prompt_toolkit (>=3.0)
+Requires-Dist: pageindex==0.3.0.dev1
+Requires-Dist: prompt-toolkit>=3.0
 Requires-Dist: python-dotenv
 Requires-Dist: pyyaml
-Requires-Dist: watchdog (>=3.0)
-Project-URL: Homepage, https://github.com/VectifyAI/OpenKB
-Project-URL: Issues, https://github.com/VectifyAI/OpenKB/issues
-Project-URL: Repository, https://github.com/VectifyAI/OpenKB
+Requires-Dist: rich>=13.0
+Requires-Dist: watchdog>=3.0
+Provides-Extra: dev
+Requires-Dist: pytest; extra == 'dev'
+Requires-Dist: pytest-asyncio; extra == 'dev'
 Description-Content-Type: text/markdown
 <div align="center">
@@ -52,13 +55,13 @@ Description-Content-Type: text/markdown
 The idea is based on a [concept](https://x.com/karpathy/status/2039805659525644595) described by Andrej Karpathy: LLMs generate summaries, concept pages, and cross-references, all maintained automatically. Knowledge compounds over time instead of being re-derived on every query.
-### Why not just traditional RAG?
+### Why not traditional RAG?
 Traditional RAG rediscovers knowledge from scratch on every query. Nothing accumulates. OpenKB compiles knowledge once into a persistent wiki, then keeps it current. Cross-references already exist. Contradictions are flagged. Synthesis reflects everything consumed.
 ### Features
-- **Broad format support** — PDF, Word, Markdown, PowerPoint, HTML, Excel, CSV, text, and more via markitdown
+- **Broad format support** — PDF, Word, Markdown, PowerPoint, HTML, Excel, text, and more via markitdown
 - **Scale to long documents** — Long and complex documents are handled via [PageIndex](https://github.com/VectifyAI/PageIndex) tree indexing, enabling accurate, vectorless long-context retrieval
 - **Native multi-modality** — Retrieves and understands figures, tables, and images, not just text
 - **Compiled Wiki** — LLM manages and compiles your documents into summaries, concept pages, and cross-links, all kept in sync
@@ -76,7 +79,26 @@ Traditional RAG rediscovers knowledge from scratch on every query. Nothing accum
 pip install openkb
 ```
-### Quick start
+<details>
+<summary><i>Other install options</i></summary>
+- **Latest from GitHub:**
+  ```bash
+  pip install git+https://github.com/VectifyAI/OpenKB.git
+  ```
+- **Install from source** (editable, for development):
+  ```bash
+  git clone https://github.com/VectifyAI/OpenKB.git
+  cd OpenKB
+  pip install -e .
+  ```
+</details>
+### Quick Start
 ```bash
 # 1. Create a directory for your knowledge base
@@ -87,13 +109,12 @@ openkb init
 # 3. Add documents
 openkb add paper.pdf
-openkb add ~/papers/                   # Add a whole directory
-openkb add article.html
+openkb add ~/papers/  # Add a whole directory
 # 4. Ask a question
 openkb query "What are the main findings?"
-# 5. Or start an interactive chat session
+# 5. Or chat interactively
 openkb chat
 ```
@@ -109,7 +130,7 @@ Create a `.env` file with your LLM API key:
 LLM_API_KEY=your_llm_api_key
 ```
-# 🧩 How It Works
+# 🧩 How OpenKB Works
 ### Architecture
@@ -135,7 +156,7 @@ wiki/
  └── reports/            Lint reports
 ```
-### Short vs. long document handling
+### Short vs. Long Document Handling
 | | Short documents | Long documents (PDF ≥ 20 pages) |
 |---|---|---|
@@ -146,7 +167,7 @@ wiki/
 Short docs are read in full by the LLM. Long PDFs are indexed by PageIndex into a hierarchical tree with summaries. The LLM reads the tree instead of the full text, enabling better retrieval from long documents.
-### The wiki compiles knowledge
+### Knowledge Compilation
 When you add a document, the LLM:
@@ -157,16 +178,15 @@ When you add a document, the LLM:
 A single source might touch 10-15 wiki pages. Knowledge accumulates: each document enriches the existing wiki rather than sitting in isolation.
-# 📦 Usage
+# ⚙️ Usage
 ### Commands
 | Command | Description |
 |---|---|
 | `openkb init` | Initialize a new knowledge base (interactive) |
-| `openkb add <file_or_dir>` | Add documents and compile to wiki |
-| `openkb query "question"` | Ask a question against the knowledge base |
-| `openkb query "question" --save` | Ask and save the answer to `wiki/explorations/` |
+| <code>openkb&nbsp;add&nbsp;&lt;file_or_dir&gt;</code> | Add documents and compile to wiki |
+| <code>openkb&nbsp;query&nbsp;"question"</code> | Ask a question over the knowledge base (use `--save` to save the answer to `wiki/explorations/`) |
 | `openkb chat` | Start an interactive multi-turn chat (use `--resume`, `--list`, `--delete` to manage sessions) |
 | `openkb watch` | Watch `raw/` and auto-compile new files |
 | `openkb lint` | Run structural + knowledge health checks |
@@ -175,7 +195,7 @@ A single source might touch 10-15 wiki pages. Knowledge accumulates: each docume
 <!-- | `openkb lint --fix` | Auto-fix what it can | -->
-### Interactive chat
+### Interactive Chat
 `openkb chat` opens an interactive chat session over your wiki knowledge base. Unlike the one-shot `openkb query`, each turn carries the conversation history, so you can dig into a topic without re-typing context.
@@ -187,7 +207,16 @@ openkb chat --list                # list all sessions
 openkb chat --delete <id>         # delete a session
 ```
-`/help` lists all slash commands: e.g., `/save` exports the transcript, `/clear` starts a fresh session.
+Inside a chat, type `/` to access slash commands (Tab to complete):
+- `/help` — list available commands
+- `/status` — show knowledge base status
+- `/list` — list all documents
+- `/add <path>` — add a document or directory without leaving the chat
+- `/save [name]` — export the transcript to `wiki/explorations/`
+- `/clear` — start a fresh session (the current one stays on disk)
+- `/lint` — run knowledge base lint
+- `/exit` — exit (Ctrl-D also works)
 ### Configuration
@@ -207,7 +236,7 @@ Model names use `provider/model` LiteLLM [format](https://docs.litellm.ai/docs/p
 | Anthropic | `anthropic/claude-sonnet-4-6` |
 | Gemini | `gemini/gemini-3.1-pro-preview` |
-### PageIndex integration
+### PageIndex Integration
 Long documents are challenging for LLMs due to context limits, context rot, and summarization loss.
 [PageIndex](https://github.com/VectifyAI/PageIndex) solves this with vectorless, reasoning-based retrieval — building a hierarchical tree index that lets LLMs reason over the index for context-aware retrieval.
@@ -255,7 +284,7 @@ OpenKB's wiki is a directory of Markdown files with `[[wikilinks]]`. Obsidian re
 | Wiki compilation | LLM agent | LLM agent (same) |
 | Q&A | Query over wiki | Wiki + PageIndex retrieval |
-### Tech Stack
+### The Stack
 - [PageIndex](https://github.com/VectifyAI/PageIndex) — Vectorless, reasoning-based document indexing and retrieval
 - [markitdown](https://github.com/microsoft/markitdown) — Universal file-to-markdown conversion
@@ -282,7 +311,7 @@ Apache 2.0. See [LICENSE](LICENSE).
 ### Support Us
-If you find OpenKB useful, give us a star 🌟 — and check out [PageIndex](https://github.com/VectifyAI/PageIndex) too!
+If you find OpenKB useful, please give us a star 🌟 — and check out [PageIndex](https://github.com/VectifyAI/PageIndex) too!
 <div>
@@ -291,4 +320,3 @@ If you find OpenKB useful, give us a star 🌟 — and check out [PageIndex](htt
 [![Contact Us](https://img.shields.io/badge/Contact_Us-3B82F6?style=for-the-badge&logo=envelope&logoColor=white)](https://ii2abc2jejf.typeform.com/to/tK3AXl8T)
 </div>

{openkb-0.1.2 → openkb-0.1.4.dev0}/README.md RENAMED Viewed

@@ -18,13 +18,13 @@
 The idea is based on a [concept](https://x.com/karpathy/status/2039805659525644595) described by Andrej Karpathy: LLMs generate summaries, concept pages, and cross-references, all maintained automatically. Knowledge compounds over time instead of being re-derived on every query.
-### Why not just traditional RAG?
+### Why not traditional RAG?
 Traditional RAG rediscovers knowledge from scratch on every query. Nothing accumulates. OpenKB compiles knowledge once into a persistent wiki, then keeps it current. Cross-references already exist. Contradictions are flagged. Synthesis reflects everything consumed.
 ### Features
-- **Broad format support** — PDF, Word, Markdown, PowerPoint, HTML, Excel, CSV, text, and more via markitdown
+- **Broad format support** — PDF, Word, Markdown, PowerPoint, HTML, Excel, text, and more via markitdown
 - **Scale to long documents** — Long and complex documents are handled via [PageIndex](https://github.com/VectifyAI/PageIndex) tree indexing, enabling accurate, vectorless long-context retrieval
 - **Native multi-modality** — Retrieves and understands figures, tables, and images, not just text
 - **Compiled Wiki** — LLM manages and compiles your documents into summaries, concept pages, and cross-links, all kept in sync
@@ -42,7 +42,26 @@ Traditional RAG rediscovers knowledge from scratch on every query. Nothing accum
 pip install openkb
 ```
-### Quick start
+<details>
+<summary><i>Other install options</i></summary>
+- **Latest from GitHub:**
+  ```bash
+  pip install git+https://github.com/VectifyAI/OpenKB.git
+  ```
+- **Install from source** (editable, for development):
+  ```bash
+  git clone https://github.com/VectifyAI/OpenKB.git
+  cd OpenKB
+  pip install -e .
+  ```
+</details>
+### Quick Start
 ```bash
 # 1. Create a directory for your knowledge base
@@ -53,13 +72,12 @@ openkb init
 # 3. Add documents
 openkb add paper.pdf
-openkb add ~/papers/                   # Add a whole directory
-openkb add article.html
+openkb add ~/papers/  # Add a whole directory
 # 4. Ask a question
 openkb query "What are the main findings?"
-# 5. Or start an interactive chat session
+# 5. Or chat interactively
 openkb chat
 ```
@@ -75,7 +93,7 @@ Create a `.env` file with your LLM API key:
 LLM_API_KEY=your_llm_api_key
 ```
-# 🧩 How It Works
+# 🧩 How OpenKB Works
 ### Architecture
@@ -101,7 +119,7 @@ wiki/
  └── reports/            Lint reports
 ```
-### Short vs. long document handling
+### Short vs. Long Document Handling
 | | Short documents | Long documents (PDF ≥ 20 pages) |
 |---|---|---|
@@ -112,7 +130,7 @@ wiki/
 Short docs are read in full by the LLM. Long PDFs are indexed by PageIndex into a hierarchical tree with summaries. The LLM reads the tree instead of the full text, enabling better retrieval from long documents.
-### The wiki compiles knowledge
+### Knowledge Compilation
 When you add a document, the LLM:
@@ -123,16 +141,15 @@ When you add a document, the LLM:
 A single source might touch 10-15 wiki pages. Knowledge accumulates: each document enriches the existing wiki rather than sitting in isolation.
-# 📦 Usage
+# ⚙️ Usage
 ### Commands
 | Command | Description |
 |---|---|
 | `openkb init` | Initialize a new knowledge base (interactive) |
-| `openkb add <file_or_dir>` | Add documents and compile to wiki |
-| `openkb query "question"` | Ask a question against the knowledge base |
-| `openkb query "question" --save` | Ask and save the answer to `wiki/explorations/` |
+| <code>openkb&nbsp;add&nbsp;&lt;file_or_dir&gt;</code> | Add documents and compile to wiki |
+| <code>openkb&nbsp;query&nbsp;"question"</code> | Ask a question over the knowledge base (use `--save` to save the answer to `wiki/explorations/`) |
 | `openkb chat` | Start an interactive multi-turn chat (use `--resume`, `--list`, `--delete` to manage sessions) |
 | `openkb watch` | Watch `raw/` and auto-compile new files |
 | `openkb lint` | Run structural + knowledge health checks |
@@ -141,7 +158,7 @@ A single source might touch 10-15 wiki pages. Knowledge accumulates: each docume
 <!-- | `openkb lint --fix` | Auto-fix what it can | -->
-### Interactive chat
+### Interactive Chat
 `openkb chat` opens an interactive chat session over your wiki knowledge base. Unlike the one-shot `openkb query`, each turn carries the conversation history, so you can dig into a topic without re-typing context.
@@ -153,7 +170,16 @@ openkb chat --list                # list all sessions
 openkb chat --delete <id>         # delete a session
 ```
-`/help` lists all slash commands: e.g., `/save` exports the transcript, `/clear` starts a fresh session.
+Inside a chat, type `/` to access slash commands (Tab to complete):
+- `/help` — list available commands
+- `/status` — show knowledge base status
+- `/list` — list all documents
+- `/add <path>` — add a document or directory without leaving the chat
+- `/save [name]` — export the transcript to `wiki/explorations/`
+- `/clear` — start a fresh session (the current one stays on disk)
+- `/lint` — run knowledge base lint
+- `/exit` — exit (Ctrl-D also works)
 ### Configuration
@@ -173,7 +199,7 @@ Model names use `provider/model` LiteLLM [format](https://docs.litellm.ai/docs/p
 | Anthropic | `anthropic/claude-sonnet-4-6` |
 | Gemini | `gemini/gemini-3.1-pro-preview` |
-### PageIndex integration
+### PageIndex Integration
 Long documents are challenging for LLMs due to context limits, context rot, and summarization loss.
 [PageIndex](https://github.com/VectifyAI/PageIndex) solves this with vectorless, reasoning-based retrieval — building a hierarchical tree index that lets LLMs reason over the index for context-aware retrieval.
@@ -221,7 +247,7 @@ OpenKB's wiki is a directory of Markdown files with `[[wikilinks]]`. Obsidian re
 | Wiki compilation | LLM agent | LLM agent (same) |
 | Q&A | Query over wiki | Wiki + PageIndex retrieval |
-### Tech Stack
+### The Stack
 - [PageIndex](https://github.com/VectifyAI/PageIndex) — Vectorless, reasoning-based document indexing and retrieval
 - [markitdown](https://github.com/microsoft/markitdown) — Universal file-to-markdown conversion
@@ -248,7 +274,7 @@ Apache 2.0. See [LICENSE](LICENSE).
 ### Support Us
-If you find OpenKB useful, give us a star 🌟 — and check out [PageIndex](https://github.com/VectifyAI/PageIndex) too!
+If you find OpenKB useful, please give us a star 🌟 — and check out [PageIndex](https://github.com/VectifyAI/PageIndex) too!
 <div>

openkb-0.1.4.dev0/config.yaml.example ADDED Viewed

@@ -0,0 +1,3 @@
+model: gpt-5.4                   # LLM model (any LiteLLM-supported provider)
+language: en                     # Wiki output language
+pageindex_threshold: 20          # PDF pages threshold for PageIndex

openkb 0.1.2__tar.gz → 0.1.4.dev0__tar.gz

openkb 0.1.2tar.gz → 0.1.4.dev0tar.gz