White-Walker 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (27) hide show
  1. white_walker-0.1.0/LICENSE +21 -0
  2. white_walker-0.1.0/PKG-INFO +174 -0
  3. white_walker-0.1.0/README.md +140 -0
  4. white_walker-0.1.0/White_Walker.egg-info/PKG-INFO +174 -0
  5. white_walker-0.1.0/White_Walker.egg-info/SOURCES.txt +30 -0
  6. white_walker-0.1.0/White_Walker.egg-info/dependency_links.txt +1 -0
  7. white_walker-0.1.0/White_Walker.egg-info/requires.txt +8 -0
  8. white_walker-0.1.0/White_Walker.egg-info/top_level.txt +1 -0
  9. white_walker-0.1.0/pyproject.toml +53 -0
  10. white_walker-0.1.0/setup.cfg +4 -0
  11. white_walker-0.1.0/white_walker/__init__.py +9 -0
  12. white_walker-0.1.0/white_walker/client.py +162 -0
  13. white_walker-0.1.0/white_walker/config.yaml +9 -0
  14. white_walker-0.1.0/white_walker/ingest/__init__.py +2 -0
  15. white_walker-0.1.0/white_walker/ingest/md_ingestor.py +288 -0
  16. white_walker-0.1.0/white_walker/ingest/pdf_ingestor.py +1109 -0
  17. white_walker-0.1.0/white_walker/llm/__init__.py +1 -0
  18. white_walker-0.1.0/white_walker/llm/adapter.py +186 -0
  19. white_walker-0.1.0/white_walker/retrieval/__init__.py +2 -0
  20. white_walker-0.1.0/white_walker/retrieval/agent_retriever.py +85 -0
  21. white_walker-0.1.0/white_walker/retrieval/prompts.py +54 -0
  22. white_walker-0.1.0/white_walker/retrieval/retrieve.py +112 -0
  23. white_walker-0.1.0/white_walker/store/__init__.py +3 -0
  24. white_walker-0.1.0/white_walker/store/base_store.py +36 -0
  25. white_walker-0.1.0/white_walker/store/local_store.py +114 -0
  26. white_walker-0.1.0/white_walker/store/supabase_store.py +102 -0
  27. white_walker-0.1.0/white_walker/utils.py +669 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 NUHASHROXME
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,174 @@
1
+ Metadata-Version: 2.4
2
+ Name: White-Walker
3
+ Version: 0.1.0
4
+ Summary: Multimodal Agentic RAG with hierarchical tree indexing — index PDFs & Markdown into navigable tree structures, persist in Supabase, and query with LLM-powered retrieval.
5
+ Author: NUHASHROXME
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/YashNuhash/White-Walker
8
+ Project-URL: Repository, https://github.com/YashNuhash/White-Walker
9
+ Project-URL: Issues, https://github.com/YashNuhash/White-Walker/issues
10
+ Keywords: RAG,multimodal,agentic,LLM,retrieval,tree-indexing,supabase,nvidia,pdf,arxiv
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
+ Requires-Python: >=3.9
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: supabase>=2.0
26
+ Requires-Dist: PyPDF2>=3.0
27
+ Requires-Dist: pdfplumber>=0.9
28
+ Requires-Dist: python-dotenv
29
+ Requires-Dist: httpx
30
+ Requires-Dist: requests
31
+ Requires-Dist: pymupdf
32
+ Requires-Dist: pyyaml
33
+ Dynamic: license-file
34
+
35
+ # 🧊 White Walker
36
+
37
+ [![PyPI version](https://badge.fury.io/py/white-walker.svg)](https://pypi.org/project/white-walker/)
38
+ [![Python](https://img.shields.io/pypi/pyversions/white-walker.svg)](https://pypi.org/project/white-walker/)
39
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
40
+
41
+ **Multimodal Agentic RAG** — Index PDFs & Markdown into hierarchical tree structures, persist in Supabase, and query with LLM-powered retrieval.
42
+
43
+ White Walker transforms documents into navigable tree indexes using LLM-based structural analysis, stores them in Supabase (or locally), and answers questions by intelligently traversing the tree to find relevant sections.
44
+
45
+ ---
46
+
47
+ ## ✨ Features
48
+
49
+ - 🌲 **Hierarchical Tree Indexing** — Automatically detects document structure (TOC, sections, subsections) and builds a navigable tree
50
+ - 🗄️ **Supabase Persistence** — Trees, raw pages, and metadata stored in PostgreSQL via Supabase
51
+ - 🤖 **Agentic Retrieval** — LLM navigates the tree to find relevant sections, then synthesizes answers with citations
52
+ - 📄 **PDF & Markdown Support** — Works with academic papers, technical docs, and any structured document
53
+ - ⚡ **Rate-Limited API** — Built-in token-bucket rate limiter for NVIDIA NIM API (38 RPM, configurable)
54
+ - 🔌 **Pluggable LLM Backend** — Default: NVIDIA NIM API, easily extensible
55
+
56
+ ---
57
+
58
+ ## 📦 Installation
59
+
60
+ ```bash
61
+ pip install white-walker
62
+ ```
63
+
64
+ ---
65
+
66
+ ## 🚀 Quick Start
67
+
68
+ ### 1. Set Environment Variables
69
+
70
+ ```bash
71
+ # .env file
72
+ SUPABASE_URL=https://your-project.supabase.co
73
+ SUPABASE_KEY=your-anon-key
74
+ NVIDIA_API_KEY=your-nvidia-nim-api-key
75
+ ```
76
+
77
+ ### 2. Set Up Supabase Tables
78
+
79
+ Run the SQL in `setup_supabase.sql` in your Supabase SQL editor to create the required tables (`rag_documents`, `rag_raw_pages`, `rag_tree_nodes`).
80
+
81
+ ### 3. Index & Query
82
+
83
+ ```python
84
+ from white_walker import WhiteWalkerClient
85
+
86
+ client = WhiteWalkerClient()
87
+
88
+ # Index a PDF
89
+ doc_id = client.index("paper.pdf")
90
+
91
+ # Query the indexed document
92
+ result = client.query(doc_id, "What is the main contribution of this paper?")
93
+
94
+ print(result["answer"])
95
+ print(result["citations"])
96
+ print(result["confidence"]) # "high", "medium", or "low"
97
+ ```
98
+
99
+ ---
100
+
101
+ ## 🏗️ Architecture
102
+
103
+ ```
104
+ PDF / Markdown
105
+
106
+
107
+ ┌─────────────────┐
108
+ │ PDF Ingestor │ Extracts text, detects TOC, builds hierarchical tree
109
+ └────────┬────────┘
110
+
111
+
112
+ ┌─────────────────┐
113
+ │ Tree Indexer │ LLM-based section detection, title verification,
114
+ │ │ page-number alignment, node summarization
115
+ └────────┬────────┘
116
+
117
+
118
+ ┌─────────────────┐
119
+ │ Supabase Store │ Persists tree nodes, raw pages, document metadata
120
+ └────────┬────────┘
121
+
122
+
123
+ ┌─────────────────┐
124
+ │ Agent Retriever │ LLM navigates tree → finds relevant nodes →
125
+ │ │ fetches page content → synthesizes cited answer
126
+ └─────────────────┘
127
+ ```
128
+
129
+ ---
130
+
131
+ ## ⚙️ Configuration
132
+
133
+ White Walker uses `white_walker/config.yaml` for defaults:
134
+
135
+ | Parameter | Default | Description |
136
+ |---|---|---|
137
+ | `model` | `moonshotai/kimi-k2.6` | NVIDIA NIM model for indexing & retrieval |
138
+ | `toc_check_page_num` | `20` | Max pages to scan for table of contents |
139
+ | `max_page_num_each_node` | `10` | Max pages per tree node |
140
+ | `max_token_num_each_node` | `20000` | Max tokens per tree node |
141
+
142
+ Override at initialization:
143
+
144
+ ```python
145
+ client = WhiteWalkerClient(model="meta/llama-3.1-70b-instruct")
146
+ ```
147
+
148
+ ---
149
+
150
+ ## 🔑 Environment Variables
151
+
152
+ | Variable | Required | Description |
153
+ |---|---|---|
154
+ | `NVIDIA_API_KEY` | ✅ | NVIDIA NIM API key for LLM calls |
155
+ | `SUPABASE_URL` | Optional | Supabase project URL (falls back to local storage) |
156
+ | `SUPABASE_KEY` | Optional | Supabase anon/service key |
157
+
158
+ ---
159
+
160
+ ## 📊 Evaluation
161
+
162
+ White Walker includes a RAGAS-based evaluation pipeline for benchmarking against multimodal agentic RAG datasets. See `evaluate_pipeline.py` for details.
163
+
164
+ ---
165
+
166
+ ## 📄 License
167
+
168
+ MIT License — see [LICENSE](LICENSE) for details.
169
+
170
+ ---
171
+
172
+ ## 🙏 Acknowledgments
173
+
174
+ Built on the foundation of [VectifyAI/PageIndex](https://github.com/VectifyAI/PageIndex), re-architected for local processing, Supabase persistence, and pluggable LLM backends.
@@ -0,0 +1,140 @@
1
+ # 🧊 White Walker
2
+
3
+ [![PyPI version](https://badge.fury.io/py/white-walker.svg)](https://pypi.org/project/white-walker/)
4
+ [![Python](https://img.shields.io/pypi/pyversions/white-walker.svg)](https://pypi.org/project/white-walker/)
5
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
6
+
7
+ **Multimodal Agentic RAG** — Index PDFs & Markdown into hierarchical tree structures, persist in Supabase, and query with LLM-powered retrieval.
8
+
9
+ White Walker transforms documents into navigable tree indexes using LLM-based structural analysis, stores them in Supabase (or locally), and answers questions by intelligently traversing the tree to find relevant sections.
10
+
11
+ ---
12
+
13
+ ## ✨ Features
14
+
15
+ - 🌲 **Hierarchical Tree Indexing** — Automatically detects document structure (TOC, sections, subsections) and builds a navigable tree
16
+ - 🗄️ **Supabase Persistence** — Trees, raw pages, and metadata stored in PostgreSQL via Supabase
17
+ - 🤖 **Agentic Retrieval** — LLM navigates the tree to find relevant sections, then synthesizes answers with citations
18
+ - 📄 **PDF & Markdown Support** — Works with academic papers, technical docs, and any structured document
19
+ - ⚡ **Rate-Limited API** — Built-in token-bucket rate limiter for NVIDIA NIM API (38 RPM, configurable)
20
+ - 🔌 **Pluggable LLM Backend** — Default: NVIDIA NIM API, easily extensible
21
+
22
+ ---
23
+
24
+ ## 📦 Installation
25
+
26
+ ```bash
27
+ pip install white-walker
28
+ ```
29
+
30
+ ---
31
+
32
+ ## 🚀 Quick Start
33
+
34
+ ### 1. Set Environment Variables
35
+
36
+ ```bash
37
+ # .env file
38
+ SUPABASE_URL=https://your-project.supabase.co
39
+ SUPABASE_KEY=your-anon-key
40
+ NVIDIA_API_KEY=your-nvidia-nim-api-key
41
+ ```
42
+
43
+ ### 2. Set Up Supabase Tables
44
+
45
+ Run the SQL in `setup_supabase.sql` in your Supabase SQL editor to create the required tables (`rag_documents`, `rag_raw_pages`, `rag_tree_nodes`).
46
+
47
+ ### 3. Index & Query
48
+
49
+ ```python
50
+ from white_walker import WhiteWalkerClient
51
+
52
+ client = WhiteWalkerClient()
53
+
54
+ # Index a PDF
55
+ doc_id = client.index("paper.pdf")
56
+
57
+ # Query the indexed document
58
+ result = client.query(doc_id, "What is the main contribution of this paper?")
59
+
60
+ print(result["answer"])
61
+ print(result["citations"])
62
+ print(result["confidence"]) # "high", "medium", or "low"
63
+ ```
64
+
65
+ ---
66
+
67
+ ## 🏗️ Architecture
68
+
69
+ ```
70
+ PDF / Markdown
71
+
72
+
73
+ ┌─────────────────┐
74
+ │ PDF Ingestor │ Extracts text, detects TOC, builds hierarchical tree
75
+ └────────┬────────┘
76
+
77
+
78
+ ┌─────────────────┐
79
+ │ Tree Indexer │ LLM-based section detection, title verification,
80
+ │ │ page-number alignment, node summarization
81
+ └────────┬────────┘
82
+
83
+
84
+ ┌─────────────────┐
85
+ │ Supabase Store │ Persists tree nodes, raw pages, document metadata
86
+ └────────┬────────┘
87
+
88
+
89
+ ┌─────────────────┐
90
+ │ Agent Retriever │ LLM navigates tree → finds relevant nodes →
91
+ │ │ fetches page content → synthesizes cited answer
92
+ └─────────────────┘
93
+ ```
94
+
95
+ ---
96
+
97
+ ## ⚙️ Configuration
98
+
99
+ White Walker uses `white_walker/config.yaml` for defaults:
100
+
101
+ | Parameter | Default | Description |
102
+ |---|---|---|
103
+ | `model` | `moonshotai/kimi-k2.6` | NVIDIA NIM model for indexing & retrieval |
104
+ | `toc_check_page_num` | `20` | Max pages to scan for table of contents |
105
+ | `max_page_num_each_node` | `10` | Max pages per tree node |
106
+ | `max_token_num_each_node` | `20000` | Max tokens per tree node |
107
+
108
+ Override at initialization:
109
+
110
+ ```python
111
+ client = WhiteWalkerClient(model="meta/llama-3.1-70b-instruct")
112
+ ```
113
+
114
+ ---
115
+
116
+ ## 🔑 Environment Variables
117
+
118
+ | Variable | Required | Description |
119
+ |---|---|---|
120
+ | `NVIDIA_API_KEY` | ✅ | NVIDIA NIM API key for LLM calls |
121
+ | `SUPABASE_URL` | Optional | Supabase project URL (falls back to local storage) |
122
+ | `SUPABASE_KEY` | Optional | Supabase anon/service key |
123
+
124
+ ---
125
+
126
+ ## 📊 Evaluation
127
+
128
+ White Walker includes a RAGAS-based evaluation pipeline for benchmarking against multimodal agentic RAG datasets. See `evaluate_pipeline.py` for details.
129
+
130
+ ---
131
+
132
+ ## 📄 License
133
+
134
+ MIT License — see [LICENSE](LICENSE) for details.
135
+
136
+ ---
137
+
138
+ ## 🙏 Acknowledgments
139
+
140
+ Built on the foundation of [VectifyAI/PageIndex](https://github.com/VectifyAI/PageIndex), re-architected for local processing, Supabase persistence, and pluggable LLM backends.
@@ -0,0 +1,174 @@
1
+ Metadata-Version: 2.4
2
+ Name: White-Walker
3
+ Version: 0.1.0
4
+ Summary: Multimodal Agentic RAG with hierarchical tree indexing — index PDFs & Markdown into navigable tree structures, persist in Supabase, and query with LLM-powered retrieval.
5
+ Author: NUHASHROXME
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/YashNuhash/White-Walker
8
+ Project-URL: Repository, https://github.com/YashNuhash/White-Walker
9
+ Project-URL: Issues, https://github.com/YashNuhash/White-Walker/issues
10
+ Keywords: RAG,multimodal,agentic,LLM,retrieval,tree-indexing,supabase,nvidia,pdf,arxiv
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Programming Language :: Python :: 3
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
21
+ Classifier: Topic :: Software Development :: Libraries :: Python Modules
22
+ Requires-Python: >=3.9
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: supabase>=2.0
26
+ Requires-Dist: PyPDF2>=3.0
27
+ Requires-Dist: pdfplumber>=0.9
28
+ Requires-Dist: python-dotenv
29
+ Requires-Dist: httpx
30
+ Requires-Dist: requests
31
+ Requires-Dist: pymupdf
32
+ Requires-Dist: pyyaml
33
+ Dynamic: license-file
34
+
35
+ # 🧊 White Walker
36
+
37
+ [![PyPI version](https://badge.fury.io/py/white-walker.svg)](https://pypi.org/project/white-walker/)
38
+ [![Python](https://img.shields.io/pypi/pyversions/white-walker.svg)](https://pypi.org/project/white-walker/)
39
+ [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](LICENSE)
40
+
41
+ **Multimodal Agentic RAG** — Index PDFs & Markdown into hierarchical tree structures, persist in Supabase, and query with LLM-powered retrieval.
42
+
43
+ White Walker transforms documents into navigable tree indexes using LLM-based structural analysis, stores them in Supabase (or locally), and answers questions by intelligently traversing the tree to find relevant sections.
44
+
45
+ ---
46
+
47
+ ## ✨ Features
48
+
49
+ - 🌲 **Hierarchical Tree Indexing** — Automatically detects document structure (TOC, sections, subsections) and builds a navigable tree
50
+ - 🗄️ **Supabase Persistence** — Trees, raw pages, and metadata stored in PostgreSQL via Supabase
51
+ - 🤖 **Agentic Retrieval** — LLM navigates the tree to find relevant sections, then synthesizes answers with citations
52
+ - 📄 **PDF & Markdown Support** — Works with academic papers, technical docs, and any structured document
53
+ - ⚡ **Rate-Limited API** — Built-in token-bucket rate limiter for NVIDIA NIM API (38 RPM, configurable)
54
+ - 🔌 **Pluggable LLM Backend** — Default: NVIDIA NIM API, easily extensible
55
+
56
+ ---
57
+
58
+ ## 📦 Installation
59
+
60
+ ```bash
61
+ pip install white-walker
62
+ ```
63
+
64
+ ---
65
+
66
+ ## 🚀 Quick Start
67
+
68
+ ### 1. Set Environment Variables
69
+
70
+ ```bash
71
+ # .env file
72
+ SUPABASE_URL=https://your-project.supabase.co
73
+ SUPABASE_KEY=your-anon-key
74
+ NVIDIA_API_KEY=your-nvidia-nim-api-key
75
+ ```
76
+
77
+ ### 2. Set Up Supabase Tables
78
+
79
+ Run the SQL in `setup_supabase.sql` in your Supabase SQL editor to create the required tables (`rag_documents`, `rag_raw_pages`, `rag_tree_nodes`).
80
+
81
+ ### 3. Index & Query
82
+
83
+ ```python
84
+ from white_walker import WhiteWalkerClient
85
+
86
+ client = WhiteWalkerClient()
87
+
88
+ # Index a PDF
89
+ doc_id = client.index("paper.pdf")
90
+
91
+ # Query the indexed document
92
+ result = client.query(doc_id, "What is the main contribution of this paper?")
93
+
94
+ print(result["answer"])
95
+ print(result["citations"])
96
+ print(result["confidence"]) # "high", "medium", or "low"
97
+ ```
98
+
99
+ ---
100
+
101
+ ## 🏗️ Architecture
102
+
103
+ ```
104
+ PDF / Markdown
105
+
106
+
107
+ ┌─────────────────┐
108
+ │ PDF Ingestor │ Extracts text, detects TOC, builds hierarchical tree
109
+ └────────┬────────┘
110
+
111
+
112
+ ┌─────────────────┐
113
+ │ Tree Indexer │ LLM-based section detection, title verification,
114
+ │ │ page-number alignment, node summarization
115
+ └────────┬────────┘
116
+
117
+
118
+ ┌─────────────────┐
119
+ │ Supabase Store │ Persists tree nodes, raw pages, document metadata
120
+ └────────┬────────┘
121
+
122
+
123
+ ┌─────────────────┐
124
+ │ Agent Retriever │ LLM navigates tree → finds relevant nodes →
125
+ │ │ fetches page content → synthesizes cited answer
126
+ └─────────────────┘
127
+ ```
128
+
129
+ ---
130
+
131
+ ## ⚙️ Configuration
132
+
133
+ White Walker uses `white_walker/config.yaml` for defaults:
134
+
135
+ | Parameter | Default | Description |
136
+ |---|---|---|
137
+ | `model` | `moonshotai/kimi-k2.6` | NVIDIA NIM model for indexing & retrieval |
138
+ | `toc_check_page_num` | `20` | Max pages to scan for table of contents |
139
+ | `max_page_num_each_node` | `10` | Max pages per tree node |
140
+ | `max_token_num_each_node` | `20000` | Max tokens per tree node |
141
+
142
+ Override at initialization:
143
+
144
+ ```python
145
+ client = WhiteWalkerClient(model="meta/llama-3.1-70b-instruct")
146
+ ```
147
+
148
+ ---
149
+
150
+ ## 🔑 Environment Variables
151
+
152
+ | Variable | Required | Description |
153
+ |---|---|---|
154
+ | `NVIDIA_API_KEY` | ✅ | NVIDIA NIM API key for LLM calls |
155
+ | `SUPABASE_URL` | Optional | Supabase project URL (falls back to local storage) |
156
+ | `SUPABASE_KEY` | Optional | Supabase anon/service key |
157
+
158
+ ---
159
+
160
+ ## 📊 Evaluation
161
+
162
+ White Walker includes a RAGAS-based evaluation pipeline for benchmarking against multimodal agentic RAG datasets. See `evaluate_pipeline.py` for details.
163
+
164
+ ---
165
+
166
+ ## 📄 License
167
+
168
+ MIT License — see [LICENSE](LICENSE) for details.
169
+
170
+ ---
171
+
172
+ ## 🙏 Acknowledgments
173
+
174
+ Built on the foundation of [VectifyAI/PageIndex](https://github.com/VectifyAI/PageIndex), re-architected for local processing, Supabase persistence, and pluggable LLM backends.
@@ -0,0 +1,30 @@
1
+ LICENSE
2
+ README.md
3
+ pyproject.toml
4
+ White_Walker.egg-info/PKG-INFO
5
+ White_Walker.egg-info/SOURCES.txt
6
+ White_Walker.egg-info/dependency_links.txt
7
+ White_Walker.egg-info/requires.txt
8
+ White_Walker.egg-info/top_level.txt
9
+ white_walker/__init__.py
10
+ white_walker/client.py
11
+ white_walker/config.yaml
12
+ white_walker/utils.py
13
+ white_walker.egg-info/PKG-INFO
14
+ white_walker.egg-info/SOURCES.txt
15
+ white_walker.egg-info/dependency_links.txt
16
+ white_walker.egg-info/requires.txt
17
+ white_walker.egg-info/top_level.txt
18
+ white_walker/ingest/__init__.py
19
+ white_walker/ingest/md_ingestor.py
20
+ white_walker/ingest/pdf_ingestor.py
21
+ white_walker/llm/__init__.py
22
+ white_walker/llm/adapter.py
23
+ white_walker/retrieval/__init__.py
24
+ white_walker/retrieval/agent_retriever.py
25
+ white_walker/retrieval/prompts.py
26
+ white_walker/retrieval/retrieve.py
27
+ white_walker/store/__init__.py
28
+ white_walker/store/base_store.py
29
+ white_walker/store/local_store.py
30
+ white_walker/store/supabase_store.py
@@ -0,0 +1,8 @@
1
+ supabase>=2.0
2
+ PyPDF2>=3.0
3
+ pdfplumber>=0.9
4
+ python-dotenv
5
+ httpx
6
+ requests
7
+ pymupdf
8
+ pyyaml
@@ -0,0 +1 @@
1
+ white_walker
@@ -0,0 +1,53 @@
1
+ [build-system]
2
+ requires = ["setuptools>=61.0"]
3
+ build-backend = "setuptools.build_meta"
4
+
5
+ [project]
6
+ name = "White-Walker"
7
+ version = "0.1.0"
8
+ description = "Multimodal Agentic RAG with hierarchical tree indexing — index PDFs & Markdown into navigable tree structures, persist in Supabase, and query with LLM-powered retrieval."
9
+ readme = "README.md"
10
+ license = {text = "MIT"}
11
+ requires-python = ">=3.9"
12
+ authors = [
13
+ {name = "NUHASHROXME"},
14
+ ]
15
+ keywords = [
16
+ "RAG", "multimodal", "agentic", "LLM", "retrieval",
17
+ "tree-indexing", "supabase", "nvidia", "pdf", "arxiv",
18
+ ]
19
+ classifiers = [
20
+ "Development Status :: 3 - Alpha",
21
+ "Intended Audience :: Developers",
22
+ "Intended Audience :: Science/Research",
23
+ "License :: OSI Approved :: MIT License",
24
+ "Programming Language :: Python :: 3",
25
+ "Programming Language :: Python :: 3.9",
26
+ "Programming Language :: Python :: 3.10",
27
+ "Programming Language :: Python :: 3.11",
28
+ "Programming Language :: Python :: 3.12",
29
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
30
+ "Topic :: Software Development :: Libraries :: Python Modules",
31
+ ]
32
+ dependencies = [
33
+ "supabase>=2.0",
34
+ "PyPDF2>=3.0",
35
+ "pdfplumber>=0.9",
36
+ "python-dotenv",
37
+ "httpx",
38
+ "requests",
39
+ "pymupdf",
40
+ "pyyaml",
41
+ ]
42
+
43
+ [project.urls]
44
+ Homepage = "https://github.com/YashNuhash/White-Walker"
45
+ Repository = "https://github.com/YashNuhash/White-Walker"
46
+ Issues = "https://github.com/YashNuhash/White-Walker/issues"
47
+
48
+ [tool.setuptools.packages.find]
49
+ where = ["."]
50
+ include = ["white_walker*"]
51
+
52
+ [tool.setuptools.package-data]
53
+ white_walker = ["config.yaml"]
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
@@ -0,0 +1,9 @@
1
+ from .client import WhiteWalkerClient
2
+ from .store.supabase_store import SupabaseTreeStore
3
+ from .store.local_store import LocalStore
4
+ from .ingest.pdf_ingestor import white_walker_index
5
+ from .ingest.md_ingestor import md_to_tree
6
+ from .llm.adapter import register_local_model
7
+
8
+ __version__ = "0.1.0"
9
+