deeplightrag 1.0.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (59) hide show
  1. deeplightrag-1.0.6/.env.example +13 -0
  2. deeplightrag-1.0.6/.gitignore +184 -0
  3. deeplightrag-1.0.6/LICENSE +21 -0
  4. deeplightrag-1.0.6/MANIFEST.in +18 -0
  5. deeplightrag-1.0.6/PKG-INFO +121 -0
  6. deeplightrag-1.0.6/README.md +33 -0
  7. deeplightrag-1.0.6/main.py +212 -0
  8. deeplightrag-1.0.6/pyproject.toml +353 -0
  9. deeplightrag-1.0.6/requirements-macos.txt +10 -0
  10. deeplightrag-1.0.6/requirements.txt +53 -0
  11. deeplightrag-1.0.6/setup.cfg +4 -0
  12. deeplightrag-1.0.6/src/deeplightrag/__init__.py +47 -0
  13. deeplightrag-1.0.6/src/deeplightrag/cli.py +177 -0
  14. deeplightrag-1.0.6/src/deeplightrag/core.py +778 -0
  15. deeplightrag-1.0.6/src/deeplightrag/graph/__init__.py +9 -0
  16. deeplightrag-1.0.6/src/deeplightrag/graph/dual_layer.py +480 -0
  17. deeplightrag-1.0.6/src/deeplightrag/graph/entity_relationship.py +1268 -0
  18. deeplightrag-1.0.6/src/deeplightrag/graph/visual_spatial.py +654 -0
  19. deeplightrag-1.0.6/src/deeplightrag/interfaces.py +74 -0
  20. deeplightrag-1.0.6/src/deeplightrag/ner/__init__.py +17 -0
  21. deeplightrag-1.0.6/src/deeplightrag/ner/enhanced_ner_pipeline.py +473 -0
  22. deeplightrag-1.0.6/src/deeplightrag/ner/entity_filter.py +276 -0
  23. deeplightrag-1.0.6/src/deeplightrag/ner/entity_processor.py +400 -0
  24. deeplightrag-1.0.6/src/deeplightrag/ner/entity_schema.py +189 -0
  25. deeplightrag-1.0.6/src/deeplightrag/ner/entity_type_mapper.py +345 -0
  26. deeplightrag-1.0.6/src/deeplightrag/ner/gliner_ner.py +1020 -0
  27. deeplightrag-1.0.6/src/deeplightrag/ner/text_classifier.py +304 -0
  28. deeplightrag-1.0.6/src/deeplightrag/ner/visual_aware_ner.py +743 -0
  29. deeplightrag-1.0.6/src/deeplightrag/ner/working_enhanced_ner.py +328 -0
  30. deeplightrag-1.0.6/src/deeplightrag/ner/working_gliner_ner.py +293 -0
  31. deeplightrag-1.0.6/src/deeplightrag/ocr/__init__.py +4 -0
  32. deeplightrag-1.0.6/src/deeplightrag/ocr/clean_ocr.py +360 -0
  33. deeplightrag-1.0.6/src/deeplightrag/ocr/deepseek_model.py +416 -0
  34. deeplightrag-1.0.6/src/deeplightrag/ocr/deepseek_ocr.py +11 -0
  35. deeplightrag-1.0.6/src/deeplightrag/ocr/deepseek_output_parser.py +301 -0
  36. deeplightrag-1.0.6/src/deeplightrag/ocr/enhanced_ocr.py +679 -0
  37. deeplightrag-1.0.6/src/deeplightrag/ocr/geometry.py +34 -0
  38. deeplightrag-1.0.6/src/deeplightrag/ocr/processor.py +434 -0
  39. deeplightrag-1.0.6/src/deeplightrag/ocr/visual_embedding_extractor.py +271 -0
  40. deeplightrag-1.0.6/src/deeplightrag/ocr/visual_feature_extractor.py +1616 -0
  41. deeplightrag-1.0.6/src/deeplightrag/ocr/visual_token.py +96 -0
  42. deeplightrag-1.0.6/src/deeplightrag/retrieval/__init__.py +5 -0
  43. deeplightrag-1.0.6/src/deeplightrag/retrieval/adaptive_retriever.py +991 -0
  44. deeplightrag-1.0.6/src/deeplightrag/retrieval/multimodal_visual_retriever.py +900 -0
  45. deeplightrag-1.0.6/src/deeplightrag/retrieval/query_classifier.py +255 -0
  46. deeplightrag-1.0.6/src/deeplightrag/retrieval/toon_formatter.py +65 -0
  47. deeplightrag-1.0.6/src/deeplightrag/retrieval/visual_aware_retriever.py +498 -0
  48. deeplightrag-1.0.6/src/deeplightrag/retrieval/visual_similarity.py +645 -0
  49. deeplightrag-1.0.6/src/deeplightrag/utils/__init__.py +3 -0
  50. deeplightrag-1.0.6/src/deeplightrag/utils/component_factory.py +115 -0
  51. deeplightrag-1.0.6/src/deeplightrag/utils/config_manager.py +154 -0
  52. deeplightrag-1.0.6/src/deeplightrag/utils/device.py +74 -0
  53. deeplightrag-1.0.6/src/deeplightrag/utils/helpers.py +127 -0
  54. deeplightrag-1.0.6/src/deeplightrag.egg-info/PKG-INFO +121 -0
  55. deeplightrag-1.0.6/src/deeplightrag.egg-info/SOURCES.txt +57 -0
  56. deeplightrag-1.0.6/src/deeplightrag.egg-info/dependency_links.txt +1 -0
  57. deeplightrag-1.0.6/src/deeplightrag.egg-info/entry_points.txt +2 -0
  58. deeplightrag-1.0.6/src/deeplightrag.egg-info/requires.txt +64 -0
  59. deeplightrag-1.0.6/src/deeplightrag.egg-info/top_level.txt +1 -0
@@ -0,0 +1,13 @@
1
+ # Environment Variables Template
2
+ # Copy this file to .env and fill in your actual values
3
+ # NEVER commit .env to git!
4
+
5
+ # LLM API Keys (Optional - only if you want to use LLM features)
6
+ OPENAI_API_KEY=your_openai_key_here
7
+ ANTHROPIC_API_KEY=your_anthropic_key_here
8
+ GOOGLE_API_KEY=your_gemini_key_here
9
+ COHERE_API_KEY=your_cohere_key_here
10
+
11
+ # Other Configuration
12
+ DEEPLIGHTRAG_STORAGE_DIR=./deeplightrag_data
13
+ DEEPLIGHTRAG_DEVICE=auto # auto, cpu, cuda, mps
@@ -0,0 +1,184 @@
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ *.manifest
32
+ *.spec
33
+
34
+ # Installer logs
35
+ pip-log.txt
36
+ pip-delete-this-directory.txt
37
+
38
+ # Unit test / coverage reports
39
+ htmlcov/
40
+ .tox/
41
+ .nox/
42
+ .coverage
43
+ .coverage.*
44
+ .cache
45
+ nosetests.xml
46
+ coverage.xml
47
+ *.cover
48
+ *.py,cover
49
+ .hypothesis/
50
+ .pytest_cache/
51
+
52
+ # Translations
53
+ *.mo
54
+ *.pot
55
+
56
+ # Django stuff:
57
+ *.log
58
+ local_settings.py
59
+ db.sqlite3
60
+ db.sqlite3-journal
61
+
62
+ # Flask stuff:
63
+ instance/
64
+ .webassets-cache
65
+
66
+ # Scrapy stuff:
67
+ .scrapy
68
+
69
+ # Sphinx documentation
70
+ docs/_build/
71
+
72
+ # PyBuilder
73
+ target/
74
+
75
+ # Jupyter Notebook
76
+ .ipynb_checkpoints
77
+
78
+ # IPython
79
+ profile_default/
80
+ ipython_config.py
81
+
82
+ # pyenv
83
+ .python-version
84
+
85
+ # pipenv
86
+ Pipfile.lock
87
+
88
+ # PEP 582
89
+ __pypackages__/
90
+
91
+ # Celery stuff
92
+ celerybeat-schedule
93
+ celerybeat.pid
94
+
95
+ # SageMath parsed files
96
+ *.sage.py
97
+
98
+ # Environments
99
+ .env
100
+ .venv
101
+ env/
102
+ venv/
103
+ ENV/
104
+ env.bak/
105
+ venv.bak/
106
+
107
+ # Spyder project settings
108
+ .spyderproject
109
+ .spyproject
110
+
111
+ # Rope project settings
112
+ .ropeproject
113
+
114
+ # mkdocs documentation
115
+ /site
116
+
117
+ # mypy
118
+ .mypy_cache/
119
+ .dmypy.json
120
+ dmypy.json
121
+
122
+ # Pyre type checker
123
+ .pyre/
124
+
125
+ # IDE
126
+ .vscode/
127
+ .idea/
128
+ *.swp
129
+ *.swo
130
+ *~
131
+ .DS_Store
132
+ *.sublime-project
133
+ *.sublime-workspace
134
+ .vim/
135
+
136
+ # Project specific
137
+ deeplightrag_data/
138
+ config/
139
+ *.yaml.local
140
+ *.env.local
141
+ *.env.*.local
142
+
143
+ # Build artifacts
144
+ dist/
145
+ build/
146
+ *.egg-info/
147
+ .eggs/
148
+
149
+ # Coverage reports
150
+ htmlcov/
151
+ .coverage
152
+ coverage.xml
153
+
154
+ # Test artifacts
155
+ .pytest_cache/
156
+ .tox/
157
+ .nox/
158
+
159
+ # IDE and editor files
160
+ .vscode/
161
+ .idea/
162
+ *.code-workspace
163
+ *.iml
164
+
165
+ # OS
166
+ .DS_Store
167
+ Thumbs.db
168
+
169
+ # Temporary files
170
+ *.tmp
171
+ *.bak
172
+ *.swp
173
+ *.swo
174
+ *~
175
+
176
+ # Local test files
177
+ test_*.py
178
+ *_test.py
179
+ config.yaml
180
+
181
+ # Environment files with secrets
182
+ .env
183
+ *.env
184
+ config.yaml
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 Visual-Graph RAG Team
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,18 @@
1
+ # Include documentation
2
+ include README.md
3
+ include LICENSE
4
+ include CHANGELOG.md
5
+
6
+ # Include configuration files
7
+ include pyproject.toml
8
+ include requirements.txt
9
+ include requirements-macos.txt
10
+
11
+ # Include documentation
12
+ recursive-include docs *.md *.rst *.txt
13
+
14
+ # Exclude unnecessary files
15
+ global-exclude *.py[cod] __pycache__ *.so *.dylib .DS_Store
16
+ prune test*
17
+ prune example*
18
+ prune .git*
@@ -0,0 +1,121 @@
1
+ Metadata-Version: 2.4
2
+ Name: deeplightrag
3
+ Version: 1.0.6
4
+ Summary: DeepLightRAG: High-performance Document Indexing and Retrieval System (use with any LLM)
5
+ Author-email: Phuong Nguyen <nhphuong.code@gmail.com>
6
+ Maintainer-email: Phuong Nguyen <nhphuong.code@gmail.com>
7
+ License: MIT
8
+ Project-URL: Homepage, https://github.com/png261/DeepLightRag
9
+ Project-URL: Repository, https://github.com/png261/DeepLightRag
10
+ Project-URL: Bug Tracker, https://github.com/png261/DeepLightRag/issues
11
+ Project-URL: Changelog, https://github.com/png261/DeepLightRag/releases
12
+ Keywords: rag,retrieval,augmented,generation,ocr,vision,graph,nlp,llm,deepseek,document-processing
13
+ Classifier: Development Status :: 4 - Beta
14
+ Classifier: Environment :: Console
15
+ Classifier: Intended Audience :: Developers
16
+ Classifier: Intended Audience :: Science/Research
17
+ Classifier: License :: OSI Approved :: MIT License
18
+ Classifier: Operating System :: OS Independent
19
+ Classifier: Programming Language :: Python
20
+ Classifier: Programming Language :: Python :: 3
21
+ Classifier: Programming Language :: Python :: 3.9
22
+ Classifier: Programming Language :: Python :: 3.10
23
+ Classifier: Programming Language :: Python :: 3.11
24
+ Classifier: Programming Language :: Python :: 3.12
25
+ Classifier: Programming Language :: Python :: 3.13
26
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
27
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
28
+ Classifier: Topic :: Text Processing
29
+ Classifier: Typing :: Typed
30
+ Requires-Python: >=3.9
31
+ Description-Content-Type: text/markdown
32
+ License-File: LICENSE
33
+ Requires-Dist: numpy>=1.24.0
34
+ Requires-Dist: networkx>=3.0
35
+ Requires-Dist: Pillow>=10.0.0
36
+ Requires-Dist: PyYAML>=6.0
37
+ Requires-Dist: tqdm>=4.65.0
38
+ Requires-Dist: typing-extensions>=4.0.0; python_version < "3.10"
39
+ Requires-Dist: pdf2image>=1.16.0
40
+ Requires-Dist: PyMuPDF>=1.23.0
41
+ Requires-Dist: easyocr>=1.7.0
42
+ Requires-Dist: torch>=2.0.0
43
+ Requires-Dist: torchvision>=0.15.0
44
+ Requires-Dist: transformers>=4.40.0
45
+ Requires-Dist: accelerate>=0.24.0
46
+ Requires-Dist: sentence-transformers>=2.2.0
47
+ Requires-Dist: setfit>=1.0.0
48
+ Requires-Dist: gliner>=0.1.12
49
+ Requires-Dist: faiss-cpu>=1.7.4
50
+ Requires-Dist: toon-python>=0.1.2
51
+ Provides-Extra: gpu
52
+ Requires-Dist: bitsandbytes>=0.41.0; extra == "gpu"
53
+ Provides-Extra: macos
54
+ Requires-Dist: mlx>=0.21.0; extra == "macos"
55
+ Requires-Dist: mlx-lm>=0.19.0; extra == "macos"
56
+ Requires-Dist: mlx-vlm>=0.0.3; extra == "macos"
57
+ Provides-Extra: llm
58
+ Requires-Dist: google-generativeai>=0.3.0; extra == "llm"
59
+ Requires-Dist: openai>=1.0.0; extra == "llm"
60
+ Requires-Dist: anthropic>=0.25.0; extra == "llm"
61
+ Provides-Extra: advanced-re
62
+ Requires-Dist: opennre>=1.1.0; extra == "advanced-re"
63
+ Provides-Extra: web
64
+ Requires-Dist: streamlit>=1.30.0; extra == "web"
65
+ Requires-Dist: plotly>=5.18.0; extra == "web"
66
+ Requires-Dist: pandas>=2.0.0; extra == "web"
67
+ Provides-Extra: dev
68
+ Requires-Dist: pytest>=7.4.0; extra == "dev"
69
+ Requires-Dist: pytest-cov>=4.1.0; extra == "dev"
70
+ Requires-Dist: pytest-xdist>=3.3.0; extra == "dev"
71
+ Requires-Dist: pytest-timeout>=2.1.0; extra == "dev"
72
+ Requires-Dist: pytest-mock>=3.11.0; extra == "dev"
73
+ Requires-Dist: black>=23.9.0; extra == "dev"
74
+ Requires-Dist: ruff>=0.0.290; extra == "dev"
75
+ Requires-Dist: mypy>=1.5.0; extra == "dev"
76
+ Requires-Dist: pre-commit>=3.3.0; extra == "dev"
77
+ Requires-Dist: build>=0.10.0; extra == "dev"
78
+ Requires-Dist: twine>=4.0.0; extra == "dev"
79
+ Requires-Dist: wheel>=0.41.0; extra == "dev"
80
+ Provides-Extra: docs
81
+ Requires-Dist: sphinx>=7.1.0; extra == "docs"
82
+ Requires-Dist: sphinx-rtd-theme>=1.3.0; extra == "docs"
83
+ Requires-Dist: sphinx-autodoc-typehints>=1.24.0; extra == "docs"
84
+ Requires-Dist: myst-parser>=1.0.0; extra == "docs"
85
+ Provides-Extra: all
86
+ Requires-Dist: deeplightrag[advanced-re,dev,docs,gpu,llm,web]; extra == "all"
87
+ Dynamic: license-file
88
+
89
+ # DeepLightRAG
90
+
91
+ DeepLightRAG is a high-performance document indexing and retrieval system designed to work with any Large Language Model (LLM). It features a dual-layer graph architecture (Visual-Spatial and Entity-Relationship) to provide context-aware and visually-grounded retrieval.
92
+
93
+ ## Features
94
+
95
+ - **Dual-Layer Graph**: Combines visual layout awareness with semantic entity relationships.
96
+ - **Visual-Grounded Retrieval**: Retrieves not just text, but visual regions and their spatial context.
97
+ - **Robust OCR**: Integrated with DeepSeek-OCR and EasyOCR fallback for reliable text extraction.
98
+ - **Advanced NER**: Uses GLiNER for zero-shot entity recognition.
99
+ - **Flexible LLM Support**: Compatible with OpenAI, Google Gemini, Anthropic, and local LLMs via MLX/Ollama.
100
+
101
+ ## Installation
102
+
103
+ ```bash
104
+ pip install deeplightrag
105
+ ```
106
+
107
+ ## Usage
108
+
109
+ Index a document:
110
+ ```bash
111
+ deeplightrag index document.pdf
112
+ ```
113
+
114
+ Query the index:
115
+ ```bash
116
+ deeplightrag query "What is the main topic?"
117
+ ```
118
+
119
+ ## License
120
+
121
+ MIT License
@@ -0,0 +1,33 @@
1
+ # DeepLightRAG
2
+
3
+ DeepLightRAG is a high-performance document indexing and retrieval system designed to work with any Large Language Model (LLM). It features a dual-layer graph architecture (Visual-Spatial and Entity-Relationship) to provide context-aware and visually-grounded retrieval.
4
+
5
+ ## Features
6
+
7
+ - **Dual-Layer Graph**: Combines visual layout awareness with semantic entity relationships.
8
+ - **Visual-Grounded Retrieval**: Retrieves not just text, but visual regions and their spatial context.
9
+ - **Robust OCR**: Integrated with DeepSeek-OCR and EasyOCR fallback for reliable text extraction.
10
+ - **Advanced NER**: Uses GLiNER for zero-shot entity recognition.
11
+ - **Flexible LLM Support**: Compatible with OpenAI, Google Gemini, Anthropic, and local LLMs via MLX/Ollama.
12
+
13
+ ## Installation
14
+
15
+ ```bash
16
+ pip install deeplightrag
17
+ ```
18
+
19
+ ## Usage
20
+
21
+ Index a document:
22
+ ```bash
23
+ deeplightrag index document.pdf
24
+ ```
25
+
26
+ Query the index:
27
+ ```bash
28
+ deeplightrag query "What is the main topic?"
29
+ ```
30
+
31
+ ## License
32
+
33
+ MIT License
@@ -0,0 +1,212 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ DeepLightRAG CLI Interface
4
+ Main entry point for the system
5
+ """
6
+
7
+ import argparse
8
+ import sys
9
+ from pathlib import Path
10
+ import yaml
11
+ import json
12
+
13
+ from src.deeplightrag import DeepLightRAG
14
+
15
+
16
+ def load_config(config_path: str = "config.yaml") -> dict:
17
+ """Load configuration from YAML file with automatic Kaggle detection"""
18
+ import os
19
+
20
+ # Auto-detect Kaggle environment and use appropriate config
21
+ is_kaggle = (
22
+ os.path.exists("/kaggle")
23
+ or "KAGGLE_KERNEL_RUN_TYPE" in os.environ
24
+ or "/kaggle/" in os.getcwd()
25
+ )
26
+
27
+ if is_kaggle and Path("config_kaggle.yaml").exists():
28
+ config_path = "config_kaggle.yaml"
29
+ print(f"🔍 Detected Kaggle environment, using {config_path}")
30
+
31
+ if Path(config_path).exists():
32
+ with open(config_path, "r") as f:
33
+ config = yaml.safe_load(f)
34
+
35
+ # Auto-configure GPU settings
36
+ try:
37
+ import torch
38
+
39
+ if torch.cuda.is_available():
40
+ print(f"🎮 GPU detected: {torch.cuda.get_device_name(0)}")
41
+ # Update config for GPU usage
42
+ if "ocr" in config:
43
+ config["ocr"]["device"] = "cuda"
44
+ if "ner" in config:
45
+ config["ner"]["device"] = "cuda"
46
+ if "relation_extraction" in config:
47
+ config["relation_extraction"]["device"] = "cuda"
48
+ except ImportError:
49
+ pass
50
+
51
+ return config
52
+ return {}
53
+
54
+
55
+ def cmd_index(args, rag_system):
56
+ """Index a PDF document"""
57
+ print(f"Indexing document: {args.pdf}")
58
+ results = rag_system.index_document(
59
+ args.pdf, document_id=args.doc_id, save_to_disk=not args.no_save
60
+ )
61
+
62
+ if args.output:
63
+ with open(args.output, "w") as f:
64
+ json.dump(results, f, indent=2)
65
+ print(f"\nResults saved to {args.output}")
66
+
67
+ return results
68
+
69
+
70
+ def cmd_query(args, rag_system):
71
+ """Retrieve context for a query"""
72
+ if args.load_doc:
73
+ rag_system.load_document(args.load_doc)
74
+
75
+ result = rag_system.retrieve(args.question, override_level=args.level)
76
+
77
+ print(f"\n{'='*60}")
78
+ print("RETRIEVED CONTEXT")
79
+ print(f"{'='*60}")
80
+ print(f"\n{result.get('context', 'No context')}\n")
81
+ print(f"{'='*60}")
82
+ print(f"Query Level: {result.get('level_name', 'N/A')} (Level {result.get('query_level', 'N/A')})")
83
+ print(f"Tokens Used: {result.get('tokens_used', 'N/A')}/{result.get('token_budget', 'N/A')}")
84
+ print(f"Entities Found: {result.get('entities_found', 'N/A')}")
85
+ print(f"Nodes Retrieved: {result.get('nodes_retrieved', 'N/A')}")
86
+ print(f"Retrieval Time: {result.get('retrieval_time', 'N/A')}")
87
+ print(f"\n💡 Use this context with your own LLM for generation!")
88
+
89
+ if args.output:
90
+ with open(args.output, "w") as f:
91
+ json.dump(result, f, indent=2, default=str)
92
+ print(f"\nResults saved to {args.output}")
93
+
94
+ return result
95
+
96
+
97
+ def cmd_interactive(args, rag_system):
98
+ """Interactive retrieval mode"""
99
+ if args.load_doc:
100
+ rag_system.load_document(args.load_doc)
101
+
102
+ print("\n" + "=" * 60)
103
+ print("DeepLightRAG Interactive Retrieval Mode")
104
+ print("=" * 60)
105
+ print("Type your questions (or 'quit' to exit)")
106
+ print("Commands: !stats, !level N")
107
+ print("💡 Context retrieved - use with your own LLM for generation")
108
+ print()
109
+
110
+ override_level = None
111
+
112
+ while True:
113
+ try:
114
+ question = input("\nQuestion: ").strip()
115
+
116
+ if not question:
117
+ continue
118
+
119
+ if question.lower() in ["quit", "exit", "q"]:
120
+ print("Goodbye!")
121
+ break
122
+
123
+ # Handle commands
124
+ if question.startswith("!"):
125
+ if question == "!stats":
126
+ stats = rag_system.get_statistics()
127
+ print(json.dumps(stats, indent=2, default=str))
128
+ elif question.startswith("!level"):
129
+ parts = question.split()
130
+ if len(parts) == 2:
131
+ override_level = int(parts[1])
132
+ print(f"Query level override set to {override_level}")
133
+ else:
134
+ override_level = None
135
+ print("Query level override cleared")
136
+ else:
137
+ print("Unknown command")
138
+ continue
139
+
140
+ # Regular retrieval
141
+ result = rag_system.retrieve(question, override_level=override_level)
142
+
143
+ print(f"\n{'='*60}")
144
+ print("CONTEXT")
145
+ print(f"{'='*60}")
146
+ print(f"{result.get('context', 'No context')[:500]}...")
147
+ print(f"{'='*60}")
148
+ print(
149
+ f"Level: {result.get('level_name', 'N/A')} ({result.get('query_level', 'N/A')}) | "
150
+ f"Tokens: {result.get('tokens_used', 'N/A')}/{result.get('token_budget', 'N/A')} | "
151
+ f"Entities: {result.get('entities_found', 'N/A')}"
152
+ )
153
+
154
+ except KeyboardInterrupt:
155
+ print("\n\nInterrupted. Goodbye!")
156
+ break
157
+ except Exception as e:
158
+ print(f"Error: {e}")
159
+
160
+
161
+ def main():
162
+ parser = argparse.ArgumentParser(
163
+ description="DeepLightRAG: Efficient Document-based RAG System"
164
+ )
165
+ parser.add_argument("--config", default="config.yaml", help="Path to configuration file")
166
+ parser.add_argument(
167
+ "--storage", default="./deeplightrag_data", help="Storage directory for graphs and indices"
168
+ )
169
+
170
+ subparsers = parser.add_subparsers(dest="command", help="Command to execute")
171
+
172
+ # Index command
173
+ index_parser = subparsers.add_parser("index", help="Index a PDF document")
174
+ index_parser.add_argument("pdf", help="Path to PDF file")
175
+ index_parser.add_argument("--doc-id", help="Document identifier")
176
+ index_parser.add_argument("--no-save", action="store_true", help="Don't save to disk")
177
+ index_parser.add_argument("--output", help="Save results to JSON file")
178
+
179
+ # Query command
180
+ query_parser = subparsers.add_parser("query", help="Retrieve context for a query")
181
+ query_parser.add_argument("question", help="Question to ask")
182
+ query_parser.add_argument("--load-doc", help="Load specific document")
183
+ query_parser.add_argument("--level", type=int, help="Override query level (1-5)")
184
+ query_parser.add_argument("--output", help="Save results to JSON file")
185
+
186
+ # Interactive command
187
+ interactive_parser = subparsers.add_parser("interactive", help="Interactive retrieval mode")
188
+ interactive_parser.add_argument("--load-doc", help="Load specific document")
189
+
190
+ args = parser.parse_args()
191
+
192
+ if not args.command:
193
+ parser.print_help()
194
+ sys.exit(1)
195
+
196
+ # Load configuration
197
+ config = load_config(args.config)
198
+
199
+ # Initialize system
200
+ rag_system = DeepLightRAG(config=config, storage_dir=args.storage)
201
+
202
+ # Execute command
203
+ if args.command == "index":
204
+ cmd_index(args, rag_system)
205
+ elif args.command == "query":
206
+ cmd_query(args, rag_system)
207
+ elif args.command == "interactive":
208
+ cmd_interactive(args, rag_system)
209
+
210
+
211
+ if __name__ == "__main__":
212
+ main()