jehoctor-rag-demo 0.1.1.dev1__tar.gz → 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. jehoctor_rag_demo-0.2.0/PKG-INFO +100 -0
  2. jehoctor_rag_demo-0.2.0/README.md +69 -0
  3. jehoctor_rag_demo-0.2.0/pyproject.toml +102 -0
  4. jehoctor_rag_demo-0.2.0/src/rag_demo/__main__.py +31 -0
  5. jehoctor_rag_demo-0.2.0/src/rag_demo/app.py +58 -0
  6. jehoctor_rag_demo-0.2.0/src/rag_demo/app.tcss +0 -0
  7. jehoctor_rag_demo-0.2.0/src/rag_demo/db.py +87 -0
  8. jehoctor_rag_demo-0.2.0/src/rag_demo/dirs.py +14 -0
  9. jehoctor_rag_demo-0.2.0/src/rag_demo/logic.py +287 -0
  10. jehoctor_rag_demo-0.2.0/src/rag_demo/markdown.py +17 -0
  11. jehoctor_rag_demo-0.2.0/src/rag_demo/modes/__init__.py +3 -0
  12. jehoctor_rag_demo-0.2.0/src/rag_demo/modes/_logic_provider.py +43 -0
  13. jehoctor_rag_demo-0.2.0/src/rag_demo/modes/chat.py +315 -0
  14. jehoctor_rag_demo-0.2.0/src/rag_demo/modes/chat.tcss +75 -0
  15. jehoctor_rag_demo-0.2.0/src/rag_demo/modes/config.py +77 -0
  16. jehoctor_rag_demo-0.2.0/src/rag_demo/modes/config.tcss +0 -0
  17. jehoctor_rag_demo-0.2.0/src/rag_demo/modes/help.py +26 -0
  18. jehoctor_rag_demo-0.2.0/src/rag_demo/modes/help.tcss +0 -0
  19. jehoctor_rag_demo-0.2.0/src/rag_demo/py.typed +0 -0
  20. jehoctor_rag_demo-0.2.0/src/rag_demo/widgets/__init__.py +1 -0
  21. jehoctor_rag_demo-0.2.0/src/rag_demo/widgets/escapable_input.py +110 -0
  22. jehoctor_rag_demo-0.1.1.dev1/PKG-INFO +0 -11
  23. jehoctor_rag_demo-0.1.1.dev1/README.md +0 -2
  24. jehoctor_rag_demo-0.1.1.dev1/pyproject.toml +0 -33
  25. jehoctor_rag_demo-0.1.1.dev1/src/rag_demo/__init__.py +0 -2
  26. /jehoctor_rag_demo-0.1.1.dev1/src/rag_demo/py.typed → /jehoctor_rag_demo-0.2.0/src/rag_demo/__init__.py +0 -0
@@ -0,0 +1,100 @@
1
+ Metadata-Version: 2.3
2
+ Name: jehoctor-rag-demo
3
+ Version: 0.2.0
4
+ Summary: Chat with Wikipedia
5
+ Author: James Hoctor
6
+ Author-email: James Hoctor <JEHoctor@protonmail.com>
7
+ Requires-Dist: aiosqlite==0.21.0
8
+ Requires-Dist: chromadb>=1.3.4
9
+ Requires-Dist: datasets>=4.4.1
10
+ Requires-Dist: httpx>=0.28.1
11
+ Requires-Dist: huggingface-hub>=0.36.0
12
+ Requires-Dist: langchain>=1.0.5
13
+ Requires-Dist: langchain-anthropic>=1.0.2
14
+ Requires-Dist: langchain-community>=0.4.1
15
+ Requires-Dist: langchain-huggingface>=1.1.0
16
+ Requires-Dist: langchain-ollama>=1.0.0
17
+ Requires-Dist: langchain-openai>=1.0.2
18
+ Requires-Dist: langgraph-checkpoint-sqlite>=3.0.1
19
+ Requires-Dist: llama-cpp-python>=0.3.16
20
+ Requires-Dist: nvidia-ml-py>=13.590.44
21
+ Requires-Dist: ollama>=0.6.0
22
+ Requires-Dist: platformdirs>=4.5.0
23
+ Requires-Dist: psutil>=7.1.3
24
+ Requires-Dist: py-cpuinfo>=9.0.0
25
+ Requires-Dist: pydantic>=2.12.4
26
+ Requires-Dist: pyperclip>=1.11.0
27
+ Requires-Dist: textual>=6.5.0
28
+ Requires-Dist: typer>=0.20.0
29
+ Requires-Python: >=3.12
30
+ Description-Content-Type: text/markdown
31
+
32
+ # RAG-demo
33
+
34
+ Chat with (a small portion of) Wikipedia
35
+
36
+ ⚠️ RAG functionality is still under development. ⚠️
37
+
38
+ ![app screenshot](screenshots/screenshot_062f205a.png "App screenshot (this AI response is not accurate)")
39
+
40
+ ## Requirements
41
+
42
+ 1. [uv](https://docs.astral.sh/uv/)
43
+ 2. At least one of the following:
44
+ - A suitable terminal emulator. In particular, on macOS consider using [iTerm2](https://iterm2.com/) instead of the default Terminal.app ([explanation](https://textual.textualize.io/FAQ/#why-doesnt-textual-look-good-on-macos)). On Linux, you might want to try [kitty](https://sw.kovidgoyal.net/kitty/), [wezterm](https://wezterm.org/), [alacritty](https://alacritty.org/), or [ghostty](https://ghostty.org/) instead of the terminal that came with your DE ([reason](https://darren.codes/posts/textual-copy-paste/)). Windows Terminal should be fine as far as I know.
45
+ - Any common web browser
46
+
47
+ ## Optional stuff that could make your experience better
48
+
49
+ 1. [Hugging Face login](https://huggingface.co/docs/huggingface_hub/quick-start#login)
50
+ 2. API key for your favorite LLM provider (support coming soon)
51
+ 3. Ollama installed on your system if you have a GPU
52
+ 4. Run RAG-demo on a more capable (bigger GPU) machine over SSH if you can. It is a terminal app after all.
53
+
54
+
55
+ ## Run from the repository
56
+
57
+ First, clone this repository. Then, run one of the options below.
58
+
59
+ Run in a terminal:
60
+ ```bash
61
+ uv run chat
62
+ ```
63
+
64
+ Or run in a web browser:
65
+ ```bash
66
+ uv run textual serve chat
67
+ ```
68
+
69
+ ## Run from the latest version on PyPI
70
+
71
+ TODO: test uv automatic torch backend selection:
72
+ https://docs.astral.sh/uv/guides/integration/pytorch/#automatic-backend-selection
73
+
74
+ Run in a terminal:
75
+ ```bash
76
+ uvx --from=jehoctor-rag-demo chat
77
+ ```
78
+
79
+ Or run in a web browser:
80
+ ```bash
81
+ uvx --from=jehoctor-rag-demo textual serve chat
82
+ ```
83
+
84
+ ## CUDA acceleration via Llama.cpp
85
+
86
+ If you have an NVIDIA GPU with CUDA and build tools installed, you might be able to get CUDA acceleration without installing Ollama.
87
+
88
+ ```bash
89
+ CMAKE_ARGS="-DGGML_CUDA=on" uv run chat
90
+ ```
91
+
92
+ ## Metal acceleration via Llama.cpp (on Apple Silicon)
93
+
94
+ On an Apple Silicon machine, make sure `uv` runs an ARM interpreter as this should cause it to install Llama.cpp with Metal support.
95
+
96
+ ## Ollama on Linux
97
+
98
+ Remember that you have to keep Ollama up-to-date manually on Linux.
99
+ A recent version of Ollama (v0.11.10 or later) is required to run the [embedding model we use](https://ollama.com/library/embeddinggemma).
100
+ See this FAQ: https://docs.ollama.com/faq#how-can-i-upgrade-ollama.
@@ -0,0 +1,69 @@
1
+ # RAG-demo
2
+
3
+ Chat with (a small portion of) Wikipedia
4
+
5
+ ⚠️ RAG functionality is still under development. ⚠️
6
+
7
+ ![app screenshot](screenshots/screenshot_062f205a.png "App screenshot (this AI response is not accurate)")
8
+
9
+ ## Requirements
10
+
11
+ 1. [uv](https://docs.astral.sh/uv/)
12
+ 2. At least one of the following:
13
+ - A suitable terminal emulator. In particular, on macOS consider using [iTerm2](https://iterm2.com/) instead of the default Terminal.app ([explanation](https://textual.textualize.io/FAQ/#why-doesnt-textual-look-good-on-macos)). On Linux, you might want to try [kitty](https://sw.kovidgoyal.net/kitty/), [wezterm](https://wezterm.org/), [alacritty](https://alacritty.org/), or [ghostty](https://ghostty.org/) instead of the terminal that came with your DE ([reason](https://darren.codes/posts/textual-copy-paste/)). Windows Terminal should be fine as far as I know.
14
+ - Any common web browser
15
+
16
+ ## Optional stuff that could make your experience better
17
+
18
+ 1. [Hugging Face login](https://huggingface.co/docs/huggingface_hub/quick-start#login)
19
+ 2. API key for your favorite LLM provider (support coming soon)
20
+ 3. Ollama installed on your system if you have a GPU
21
+ 4. Run RAG-demo on a more capable (bigger GPU) machine over SSH if you can. It is a terminal app after all.
22
+
23
+
24
+ ## Run from the repository
25
+
26
+ First, clone this repository. Then, run one of the options below.
27
+
28
+ Run in a terminal:
29
+ ```bash
30
+ uv run chat
31
+ ```
32
+
33
+ Or run in a web browser:
34
+ ```bash
35
+ uv run textual serve chat
36
+ ```
37
+
38
+ ## Run from the latest version on PyPI
39
+
40
+ TODO: test uv automatic torch backend selection:
41
+ https://docs.astral.sh/uv/guides/integration/pytorch/#automatic-backend-selection
42
+
43
+ Run in a terminal:
44
+ ```bash
45
+ uvx --from=jehoctor-rag-demo chat
46
+ ```
47
+
48
+ Or run in a web browser:
49
+ ```bash
50
+ uvx --from=jehoctor-rag-demo textual serve chat
51
+ ```
52
+
53
+ ## CUDA acceleration via Llama.cpp
54
+
55
+ If you have an NVIDIA GPU with CUDA and build tools installed, you might be able to get CUDA acceleration without installing Ollama.
56
+
57
+ ```bash
58
+ CMAKE_ARGS="-DGGML_CUDA=on" uv run chat
59
+ ```
60
+
61
+ ## Metal acceleration via Llama.cpp (on Apple Silicon)
62
+
63
+ On an Apple Silicon machine, make sure `uv` runs an ARM interpreter as this should cause it to install Llama.cpp with Metal support.
64
+
65
+ ## Ollama on Linux
66
+
67
+ Remember that you have to keep Ollama up-to-date manually on Linux.
68
+ A recent version of Ollama (v0.11.10 or later) is required to run the [embedding model we use](https://ollama.com/library/embeddinggemma).
69
+ See this FAQ: https://docs.ollama.com/faq#how-can-i-upgrade-ollama.
@@ -0,0 +1,102 @@
1
+ [project]
2
+ name = "jehoctor-rag-demo"
3
+ version = "0.2.0"
4
+ description = "Chat with Wikipedia"
5
+ readme = "README.md"
6
+ authors = [
7
+ { name = "James Hoctor", email = "JEHoctor@protonmail.com" }
8
+ ]
9
+ requires-python = ">=3.12"
10
+ # TODO: Reverse pinning of aiosqlite to 0.21.0 to work around this issue:
11
+ # https://github.com/langchain-ai/langgraph/issues/6583
12
+ dependencies = [
13
+ "aiosqlite==0.21.0",
14
+ "chromadb>=1.3.4",
15
+ "datasets>=4.4.1",
16
+ "httpx>=0.28.1",
17
+ "huggingface-hub>=0.36.0",
18
+ "langchain>=1.0.5",
19
+ "langchain-anthropic>=1.0.2",
20
+ "langchain-community>=0.4.1",
21
+ "langchain-huggingface>=1.1.0",
22
+ "langchain-ollama>=1.0.0",
23
+ "langchain-openai>=1.0.2",
24
+ "langgraph-checkpoint-sqlite>=3.0.1",
25
+ "llama-cpp-python>=0.3.16",
26
+ "nvidia-ml-py>=13.590.44",
27
+ "ollama>=0.6.0",
28
+ "platformdirs>=4.5.0",
29
+ "psutil>=7.1.3",
30
+ "py-cpuinfo>=9.0.0",
31
+ "pydantic>=2.12.4",
32
+ "pyperclip>=1.11.0",
33
+ "textual>=6.5.0",
34
+ "typer>=0.20.0",
35
+ ]
36
+
37
+ [project.scripts]
38
+ chat = "rag_demo.__main__:main"
39
+
40
+ [dependency-groups]
41
+ dev = [
42
+ "pytest>=8.4.2",
43
+ "ruff>=0.14.3",
44
+ "mypy>=1.18.2",
45
+ "textual-dev>=1.8.0",
46
+ "ipython>=9.7.0",
47
+ "pytest-cov>=7.0.0",
48
+ "pytest-asyncio>=1.3.0",
49
+ ]
50
+
51
+ [[tool.uv.index]]
52
+ name = "testpypi"
53
+ url = "https://test.pypi.org/simple/"
54
+ publish-url = "https://test.pypi.org/legacy/"
55
+ explicit = true
56
+
57
+ [[tool.uv.index]]
58
+ name = "llama-cpp-metal"
59
+ url = "https://abetlen.github.io/llama-cpp-python/whl/metal"
60
+ explicit = true
61
+
62
+ [tool.uv.sources]
63
+ llama-cpp-python = [
64
+ { index = "llama-cpp-metal", marker = "platform_machine == 'arm64' and sys_platform == 'darwin'" },
65
+ ]
66
+
67
+ [build-system]
68
+ requires = ["uv_build>=0.8.0,<0.9"]
69
+ build-backend = "uv_build"
70
+
71
+ [tool.uv.build-backend]
72
+ module-name = "rag_demo"
73
+
74
+ [tool.ruff]
75
+ line-length = 120
76
+
77
+ [tool.ruff.lint]
78
+ per-file-ignores = { "__init__.py" = ["F401"] } # Ignore unused-import in all __init__.py files.
79
+ select = ["ALL"]
80
+ ignore = [
81
+ "E501", # Handled by ruff format (line-too-long)
82
+ "D100", # undocumented-public-module
83
+ "D104", # undocumented-public-package
84
+ "D203", # Conflicts with Google style D211/D212
85
+ "ANN101", # Missing type annotation for self
86
+ "ANN102", # Missing type annotation for cls
87
+ ]
88
+
89
+ [tool.ruff.lint.pydocstyle]
90
+ convention = "google"
91
+
92
+ [tool.ruff.lint.flake8-boolean-trap]
93
+ extend-allowed-calls = ["textual.reactive.reactive"]
94
+
95
+ [tool.mypy]
96
+ strict = true
97
+ show_error_codes = true
98
+ warn_unused_ignores = true
99
+ files = ["src/", "tests/"]
100
+
101
+ [tool.mypy.plugins]
102
+ pydantic.mypy.plugins = { enabled = true }
@@ -0,0 +1,31 @@
1
+ import time
2
+
3
+ # Measure the application start time.
4
+ APPLICATION_START_TIME = time.time()
5
+
6
+ # Disable "module import not at top of file" (aka E402) when importing Typer. This is necessary so that Typer's
7
+ # initialization is included in the application startup time.
8
+ import typer # noqa: E402
9
+
10
+
11
+ def _main(
12
+ name: str | None = typer.Option(None, help="The name you want to want the AI to use with you."),
13
+ ) -> None:
14
+ """Talk to Wikipedia."""
15
+ # Import here so that imports run within the typer.run context for prettier stack traces if errors occur.
16
+ # We ignore PLC0415 because we do not want these imports to be at the top of the module as is usually preferred.
17
+ from rag_demo.app import RAGDemo # noqa: PLC0415
18
+ from rag_demo.logic import Logic # noqa: PLC0415
19
+
20
+ logic = Logic(username=name, application_start_time=APPLICATION_START_TIME)
21
+ app = RAGDemo(logic)
22
+ app.run()
23
+
24
+
25
+ def main() -> None:
26
+ """Entrypoint for the rag demo, specifically the `chat` command."""
27
+ typer.run(_main)
28
+
29
+
30
+ if __name__ == "__main__":
31
+ main()
@@ -0,0 +1,58 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ from pathlib import Path
5
+ from typing import TYPE_CHECKING, ClassVar
6
+
7
+ from textual.app import App
8
+ from textual.binding import Binding
9
+
10
+ from rag_demo.modes import ChatScreen, ConfigScreen, HelpScreen
11
+
12
+ if TYPE_CHECKING:
13
+ from rag_demo.logic import Logic, Runtime
14
+
15
+
16
+ class RAGDemo(App):
17
+ """Main application UI.
18
+
19
+ This class is responsible for creating the modes of the application, which are defined in :mod:`rag_demo.modes`.
20
+ """
21
+
22
+ TITLE = "RAG Demo"
23
+ CSS_PATH = Path(__file__).parent / "app.tcss"
24
+ BINDINGS: ClassVar = [
25
+ Binding("z", "switch_mode('chat')", "chat"),
26
+ Binding("c", "switch_mode('config')", "configure"),
27
+ Binding("h", "switch_mode('help')", "help"),
28
+ ]
29
+ MODES: ClassVar = {
30
+ "chat": ChatScreen,
31
+ "config": ConfigScreen,
32
+ "help": HelpScreen,
33
+ }
34
+
35
+ def __init__(self, logic: Logic) -> None:
36
+ """Initialize the main app.
37
+
38
+ Args:
39
+ logic (Logic): Object implementing the application logic.
40
+ """
41
+ super().__init__()
42
+ self.logic = logic
43
+ self._runtime_future: asyncio.Future[Runtime] = asyncio.Future()
44
+
45
+ async def on_mount(self) -> None:
46
+ """Set the initial mode to chat and initialize async parts of the logic."""
47
+ self.switch_mode("chat")
48
+ self.run_worker(self._hold_runtime())
49
+
50
+ async def _hold_runtime(self) -> None:
51
+ async with self.logic.runtime(app_like=self) as runtime:
52
+ self._runtime_future.set_result(runtime)
53
+ # Pause the task until Textual cancels it when the application closes.
54
+ await asyncio.Event().wait()
55
+
56
+ async def runtime(self) -> Runtime:
57
+ """Returns the application runtime logic."""
58
+ return await self._runtime_future
File without changes
@@ -0,0 +1,87 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
5
+ import aiosqlite
6
+
7
+ if TYPE_CHECKING:
8
+ from pathlib import Path
9
+
10
+
11
+ class AtomicIDManager:
12
+ """A database manager for managing thread IDs.
13
+
14
+ This was written by Claude, and I fixed it up with feedback from Ruff and Flake8.
15
+ Maybe one day the app logic database will require something fancier, but this gets the job done now.
16
+
17
+ As you can see from the conversation with Claude, this was quite a simple task for it:
18
+ https://claude.ai/share/227d08ff-96a3-495a-9f56-509a1fd528f7
19
+ """
20
+
21
+ def __init__(self, db_path: str | Path) -> None:
22
+ """Initialize the database manager."""
23
+ self.db_path = db_path
24
+
25
+ async def initialize(self) -> None:
26
+ """Initialize the database and create the table if it doesn't exist."""
27
+ async with aiosqlite.connect(self.db_path) as db:
28
+ # Enable WAL mode for better concurrent access
29
+ await db.execute("PRAGMA journal_mode=WAL")
30
+
31
+ await db.execute("""
32
+ CREATE TABLE IF NOT EXISTS claimed_ids (
33
+ id INTEGER PRIMARY KEY
34
+ )
35
+ """)
36
+ await db.commit()
37
+
38
+ async def claim_next_id(self) -> int:
39
+ """Atomically find the max id, increment it, and claim it. Returns the newly claimed ID.
40
+
41
+ This operation is atomic and multiprocess-safe because:
42
+ 1. SQLite serializes writes by default
43
+ 2. We use IMMEDIATE transaction to acquire write lock immediately
44
+ 3. The entire operation happens in a single transaction
45
+ """
46
+ async with aiosqlite.connect(self.db_path) as db:
47
+ # Start an IMMEDIATE transaction to get write lock right away
48
+ await db.execute("BEGIN IMMEDIATE")
49
+
50
+ try:
51
+ # Find the current max ID
52
+ async with db.execute("SELECT MAX(id) FROM claimed_ids") as cursor:
53
+ row = await cursor.fetchone()
54
+ max_id = row[0] if row is not None and row[0] is not None else 0
55
+
56
+ # Calculate next ID
57
+ next_id = max_id + 1
58
+
59
+ # Insert the new ID
60
+ await db.execute("INSERT INTO claimed_ids (id) VALUES (?)", (next_id,))
61
+
62
+ # Commit the transaction
63
+ await db.commit()
64
+
65
+ except Exception:
66
+ await db.rollback()
67
+ raise
68
+
69
+ else:
70
+ return next_id
71
+
72
+ async def get_all_claimed_ids(self) -> list[int]:
73
+ """Retrieve all claimed IDs."""
74
+ async with (
75
+ aiosqlite.connect(self.db_path) as db,
76
+ db.execute("SELECT id FROM claimed_ids ORDER BY id") as cursor,
77
+ ):
78
+ rows = await cursor.fetchall()
79
+ return [row[0] for row in rows]
80
+
81
+ async def get_count(self) -> int:
82
+ """Get the total number of claimed IDs."""
83
+ async with aiosqlite.connect(self.db_path) as db, db.execute("SELECT COUNT(*) FROM claimed_ids") as cursor:
84
+ row = await cursor.fetchone()
85
+ if row is None:
86
+ raise ValueError("A SQL COUNT query should always return at least one row") # noqa: EM101, TRY003
87
+ return row[0]
@@ -0,0 +1,14 @@
1
+ from pathlib import Path
2
+
3
+ from platformdirs import PlatformDirs
4
+
5
+ _appdirs = PlatformDirs(appname="jehoctor-rag-demo", ensure_exists=True)
6
+
7
+
8
+ def _ensure(dir_: Path) -> Path:
9
+ dir_.mkdir(parents=True, exist_ok=True)
10
+ return dir_
11
+
12
+
13
+ DATA_DIR = _appdirs.user_data_path
14
+ CONFIG_DIR = _appdirs.user_config_path