flexible-graphrag 0.4.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- flexible_graphrag-0.4.0/Dockerfile +89 -0
- flexible_graphrag-0.4.0/PKG-INFO +135 -0
- flexible_graphrag-0.4.0/README.md +3 -0
- flexible_graphrag-0.4.0/backend.py +1858 -0
- flexible_graphrag-0.4.0/check_elasticsearch.py +76 -0
- flexible_graphrag-0.4.0/cleanup.py +655 -0
- flexible_graphrag-0.4.0/cmis_util.py +69 -0
- flexible_graphrag-0.4.0/config.py +574 -0
- flexible_graphrag-0.4.0/document_processor.py +817 -0
- flexible_graphrag-0.4.0/env-sample.txt +595 -0
- flexible_graphrag-0.4.0/factories.py +1463 -0
- flexible_graphrag-0.4.0/flexible_graphrag.egg-info/PKG-INFO +135 -0
- flexible_graphrag-0.4.0/flexible_graphrag.egg-info/SOURCES.txt +74 -0
- flexible_graphrag-0.4.0/flexible_graphrag.egg-info/dependency_links.txt +1 -0
- flexible_graphrag-0.4.0/flexible_graphrag.egg-info/entry_points.txt +2 -0
- flexible_graphrag-0.4.0/flexible_graphrag.egg-info/requires.txt +119 -0
- flexible_graphrag-0.4.0/flexible_graphrag.egg-info/top_level.txt +18 -0
- flexible_graphrag-0.4.0/hybrid_system.py +2764 -0
- flexible_graphrag-0.4.0/incremental_system.py +237 -0
- flexible_graphrag-0.4.0/incremental_updates/__init__.py +17 -0
- flexible_graphrag-0.4.0/incremental_updates/config_manager.py +260 -0
- flexible_graphrag-0.4.0/incremental_updates/datasource-queries.sql +303 -0
- flexible_graphrag-0.4.0/incremental_updates/detectors/README.md +532 -0
- flexible_graphrag-0.4.0/incremental_updates/detectors/__init__.py +45 -0
- flexible_graphrag-0.4.0/incremental_updates/detectors/alfresco_broadcaster.py +274 -0
- flexible_graphrag-0.4.0/incremental_updates/detectors/alfresco_detector.py +1551 -0
- flexible_graphrag-0.4.0/incremental_updates/detectors/azure_blob_detector.py +780 -0
- flexible_graphrag-0.4.0/incremental_updates/detectors/base.py +111 -0
- flexible_graphrag-0.4.0/incremental_updates/detectors/box_detector.py +634 -0
- flexible_graphrag-0.4.0/incremental_updates/detectors/factory.py +62 -0
- flexible_graphrag-0.4.0/incremental_updates/detectors/filesystem_detector.py +535 -0
- flexible_graphrag-0.4.0/incremental_updates/detectors/gcs_detector.py +830 -0
- flexible_graphrag-0.4.0/incremental_updates/detectors/google_drive_detector.py +794 -0
- flexible_graphrag-0.4.0/incremental_updates/detectors/msgraph_detector.py +911 -0
- flexible_graphrag-0.4.0/incremental_updates/detectors/s3_detector.py +700 -0
- flexible_graphrag-0.4.0/incremental_updates/diagnostic_queries.sql +177 -0
- flexible_graphrag-0.4.0/incremental_updates/engine.py +997 -0
- flexible_graphrag-0.4.0/incremental_updates/logging_config.py +178 -0
- flexible_graphrag-0.4.0/incremental_updates/orchestrator.py +418 -0
- flexible_graphrag-0.4.0/incremental_updates/path_utils.py +23 -0
- flexible_graphrag-0.4.0/incremental_updates/s3_helpers.py +452 -0
- flexible_graphrag-0.4.0/incremental_updates/schema.sql +81 -0
- flexible_graphrag-0.4.0/incremental_updates/state_manager.py +353 -0
- flexible_graphrag-0.4.0/ingest/__init__.py +13 -0
- flexible_graphrag-0.4.0/ingest/factory.py +82 -0
- flexible_graphrag-0.4.0/ingest/manager.py +229 -0
- flexible_graphrag-0.4.0/install.py +52 -0
- flexible_graphrag-0.4.0/main.py +1468 -0
- flexible_graphrag-0.4.0/neptune_database_wrapper.py +131 -0
- flexible_graphrag-0.4.0/observability/__init__.py +25 -0
- flexible_graphrag-0.4.0/observability/custom_hooks.py +252 -0
- flexible_graphrag-0.4.0/observability/metrics.py +172 -0
- flexible_graphrag-0.4.0/observability/telemetry_openlit.py +163 -0
- flexible_graphrag-0.4.0/observability/telemetry_setup.py +222 -0
- flexible_graphrag-0.4.0/post_ingestion_state.py +586 -0
- flexible_graphrag-0.4.0/pyproject.toml +247 -0
- flexible_graphrag-0.4.0/requirements.txt +135 -0
- flexible_graphrag-0.4.0/setup.cfg +4 -0
- flexible_graphrag-0.4.0/sources/__init__.py +38 -0
- flexible_graphrag-0.4.0/sources/alfresco.py +856 -0
- flexible_graphrag-0.4.0/sources/azure_blob.py +267 -0
- flexible_graphrag-0.4.0/sources/base.py +89 -0
- flexible_graphrag-0.4.0/sources/box.py +244 -0
- flexible_graphrag-0.4.0/sources/cmis.py +356 -0
- flexible_graphrag-0.4.0/sources/filesystem.py +217 -0
- flexible_graphrag-0.4.0/sources/gcs.py +274 -0
- flexible_graphrag-0.4.0/sources/google_drive.py +278 -0
- flexible_graphrag-0.4.0/sources/onedrive.py +313 -0
- flexible_graphrag-0.4.0/sources/passthrough_extractor.py +257 -0
- flexible_graphrag-0.4.0/sources/s3.py +342 -0
- flexible_graphrag-0.4.0/sources/sharepoint.py +556 -0
- flexible_graphrag-0.4.0/sources/web.py +105 -0
- flexible_graphrag-0.4.0/sources/wikipedia.py +389 -0
- flexible_graphrag-0.4.0/sources/youtube.py +219 -0
- flexible_graphrag-0.4.0/start.py +24 -0
- flexible_graphrag-0.4.0/uv.toml +53 -0
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
# Base Image Options:
|
|
2
|
+
#
|
|
3
|
+
# IMPORTANT: Image size mainly impacts:
|
|
4
|
+
# - Docker image distribution/download time (pushing/pulling from registry)
|
|
5
|
+
# - Storage space on disk
|
|
6
|
+
# - Initial docker build time (first time only)
|
|
7
|
+
#
|
|
8
|
+
# Image size does NOT significantly impact:
|
|
9
|
+
# - Development workflow or startup speed after first build
|
|
10
|
+
# - Runtime performance (all options perform the same once running)
|
|
11
|
+
# - Docker container start time (negligible difference)
|
|
12
|
+
#
|
|
13
|
+
# Option 1: python:3.12 (CURRENT - ~1GB, most compatible)
|
|
14
|
+
# - Full Debian-based image with gcc, g++, make, python-dev pre-installed
|
|
15
|
+
# - Required for building C extensions (pystemmer, etc.) on all platforms
|
|
16
|
+
# - Fixes build failures on both Linux and macOS (especially Apple Silicon)
|
|
17
|
+
# - Final image size: ~1GB
|
|
18
|
+
# - Guaranteed compatibility
|
|
19
|
+
FROM python:3.12
|
|
20
|
+
|
|
21
|
+
# Option 2: python:3.12-slim (~200MB smaller but requires build tools)
|
|
22
|
+
# Uncomment below and comment out python:3.12 above to use slim
|
|
23
|
+
# - Debian-based but minimal, needs build tools installed manually
|
|
24
|
+
# - Base image: ~150MB, final image: ~800MB (after adding build tools + dependencies)
|
|
25
|
+
# - Saves ~200MB compared to full image
|
|
26
|
+
# - Use this if you're distributing images and want smaller downloads
|
|
27
|
+
# - Note: Requires installing build-essential for C extensions to compile
|
|
28
|
+
# FROM python:3.12-slim
|
|
29
|
+
# RUN apt-get update && apt-get install -y \
|
|
30
|
+
# gcc g++ make python3-dev \
|
|
31
|
+
# curl \
|
|
32
|
+
# && rm -rf /var/lib/apt/lists/*
|
|
33
|
+
|
|
34
|
+
# Option 3: python:3.12-alpine (~50MB base, smallest but compatibility issues)
|
|
35
|
+
# Uncomment below and comment out python:3.12 above to use Alpine
|
|
36
|
+
# - Alpine uses musl libc instead of glibc, which causes compatibility issues
|
|
37
|
+
# - Base image: ~50MB, final image: ~200-300MB
|
|
38
|
+
# - Many Python packages with C extensions fail to build on Alpine
|
|
39
|
+
# - Python 3.13-alpine would NOT help - the musl libc issue exists in all Alpine versions
|
|
40
|
+
# - Not recommended for projects with complex dependencies like this one
|
|
41
|
+
# FROM python:3.12-alpine
|
|
42
|
+
# RUN apk add --no-cache \
|
|
43
|
+
# gcc musl-dev python3-dev \
|
|
44
|
+
# libffi-dev openssl-dev \
|
|
45
|
+
# curl \
|
|
46
|
+
# && rm -rf /var/cache/apk/*
|
|
47
|
+
|
|
48
|
+
WORKDIR /app
|
|
49
|
+
|
|
50
|
+
# Install system dependencies (for Option 1: python:3.12)
|
|
51
|
+
# Only curl needed since build tools are already included
|
|
52
|
+
RUN apt-get update && apt-get install -y \
|
|
53
|
+
curl \
|
|
54
|
+
&& rm -rf /var/lib/apt/lists/*
|
|
55
|
+
|
|
56
|
+
# Note: If using Option 2 (slim), uncomment the gcc/g++/make/python3-dev lines in Option 2 above
|
|
57
|
+
# Note: If using Option 3 (alpine), use the apk commands in Option 3 above instead of apt-get
|
|
58
|
+
|
|
59
|
+
# Install uv for faster package installation
|
|
60
|
+
RUN pip install uv
|
|
61
|
+
|
|
62
|
+
# Copy pyproject.toml and uv.toml first for better caching
|
|
63
|
+
COPY pyproject.toml uv.toml ./
|
|
64
|
+
|
|
65
|
+
# Method 1: Install using pyproject.toml (CURRENT - modern approach)
|
|
66
|
+
# This installs the package in editable mode with all dependencies
|
|
67
|
+
RUN uv pip install --system -e .
|
|
68
|
+
|
|
69
|
+
# Method 2: Install using requirements.txt (OLD - commented out)
|
|
70
|
+
# Uncomment the lines below and comment out the pyproject.toml method above
|
|
71
|
+
# if you want to use the legacy requirements.txt approach
|
|
72
|
+
# COPY requirements.txt .
|
|
73
|
+
# RUN uv pip install --system -r requirements.txt
|
|
74
|
+
|
|
75
|
+
# Copy application code
|
|
76
|
+
COPY . .
|
|
77
|
+
|
|
78
|
+
# Create necessary directories
|
|
79
|
+
RUN mkdir -p uploads sample_docs
|
|
80
|
+
|
|
81
|
+
# Expose port
|
|
82
|
+
EXPOSE 8000
|
|
83
|
+
|
|
84
|
+
# Health check
|
|
85
|
+
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
|
|
86
|
+
CMD curl -f http://localhost:8000/health || exit 1
|
|
87
|
+
|
|
88
|
+
# Run the application with standard asyncio loop (uvloop conflicts with nest_asyncio)
|
|
89
|
+
CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--loop", "asyncio"]
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: flexible-graphrag
|
|
3
|
+
Version: 0.4.0
|
|
4
|
+
Summary: Flexible GraphRAG system supporting multiple LLM providers, graph databases, vector stores, and data sources
|
|
5
|
+
Author: Steve Reiner
|
|
6
|
+
License-Expression: Apache-2.0
|
|
7
|
+
Project-URL: Homepage, https://github.com/stevereiner/flexible-graphrag
|
|
8
|
+
Project-URL: Repository, https://github.com/stevereiner/flexible-graphrag
|
|
9
|
+
Project-URL: Documentation, https://github.com/stevereiner/flexible-graphrag/blob/main/README.md
|
|
10
|
+
Keywords: graphrag,rag,llm,knowledge-graph,neo4j,kuzu,vector-database
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
14
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
15
|
+
Requires-Python: <3.14,>=3.12
|
|
16
|
+
Description-Content-Type: text/markdown
|
|
17
|
+
Requires-Dist: fastapi
|
|
18
|
+
Requires-Dist: uvicorn
|
|
19
|
+
Requires-Dist: python-multipart
|
|
20
|
+
Requires-Dist: fastmcp
|
|
21
|
+
Requires-Dist: python-jose[cryptography]
|
|
22
|
+
Requires-Dist: passlib[bcrypt]
|
|
23
|
+
Requires-Dist: python-dotenv
|
|
24
|
+
Requires-Dist: nest-asyncio
|
|
25
|
+
Requires-Dist: wikipedia
|
|
26
|
+
Requires-Dist: cmislib
|
|
27
|
+
Requires-Dist: python-alfresco-api>=1.1.5
|
|
28
|
+
Requires-Dist: docling
|
|
29
|
+
Requires-Dist: llama-parse
|
|
30
|
+
Requires-Dist: neo4j
|
|
31
|
+
Requires-Dist: numpy
|
|
32
|
+
Requires-Dist: rapidfuzz
|
|
33
|
+
Requires-Dist: spacy
|
|
34
|
+
Requires-Dist: openai
|
|
35
|
+
Requires-Dist: ollama
|
|
36
|
+
Requires-Dist: pydantic
|
|
37
|
+
Requires-Dist: pydantic-settings
|
|
38
|
+
Requires-Dist: llama-index-core
|
|
39
|
+
Requires-Dist: llama-index-llms-openai
|
|
40
|
+
Requires-Dist: llama-index-llms-ollama
|
|
41
|
+
Requires-Dist: llama-index-llms-google-genai
|
|
42
|
+
Requires-Dist: llama-index-llms-azure-openai
|
|
43
|
+
Requires-Dist: llama-index-llms-anthropic
|
|
44
|
+
Requires-Dist: llama-index-llms-bedrock-converse
|
|
45
|
+
Requires-Dist: llama-index-llms-groq
|
|
46
|
+
Requires-Dist: llama-index-llms-fireworks
|
|
47
|
+
Requires-Dist: llama-index-embeddings-openai
|
|
48
|
+
Requires-Dist: llama-index-embeddings-ollama
|
|
49
|
+
Requires-Dist: llama-index-embeddings-google-genai
|
|
50
|
+
Requires-Dist: llama-index-embeddings-azure-openai
|
|
51
|
+
Requires-Dist: llama-index-embeddings-bedrock
|
|
52
|
+
Requires-Dist: llama-index-embeddings-fireworks
|
|
53
|
+
Requires-Dist: llama-index-vector-stores-neo4jvector
|
|
54
|
+
Requires-Dist: llama-index-vector-stores-qdrant
|
|
55
|
+
Requires-Dist: llama-index-vector-stores-elasticsearch
|
|
56
|
+
Requires-Dist: llama-index-vector-stores-opensearch
|
|
57
|
+
Requires-Dist: llama-index-vector-stores-chroma
|
|
58
|
+
Requires-Dist: llama-index-vector-stores-milvus
|
|
59
|
+
Requires-Dist: llama-index-vector-stores-weaviate
|
|
60
|
+
Requires-Dist: llama-index-vector-stores-pinecone
|
|
61
|
+
Requires-Dist: llama-index-vector-stores-postgres
|
|
62
|
+
Requires-Dist: llama-index-vector-stores-lancedb
|
|
63
|
+
Requires-Dist: llama-index-graph-stores-neo4j
|
|
64
|
+
Requires-Dist: llama-index-graph-stores-kuzu
|
|
65
|
+
Requires-Dist: llama-index-graph-stores-falkordb
|
|
66
|
+
Requires-Dist: llama-index-graph-stores-memgraph
|
|
67
|
+
Requires-Dist: llama-index-graph-stores-nebula
|
|
68
|
+
Requires-Dist: llama-index-graph-stores-neptune
|
|
69
|
+
Requires-Dist: llama-index-retrievers-bm25
|
|
70
|
+
Requires-Dist: arcadedb-python>=0.4.0
|
|
71
|
+
Requires-Dist: llama-index-graph-stores-arcadedb>=0.4.1
|
|
72
|
+
Requires-Dist: kuzu
|
|
73
|
+
Requires-Dist: elasticsearch
|
|
74
|
+
Requires-Dist: opensearch-py
|
|
75
|
+
Requires-Dist: qdrant-client
|
|
76
|
+
Requires-Dist: chromadb
|
|
77
|
+
Requires-Dist: pymilvus
|
|
78
|
+
Requires-Dist: weaviate-client
|
|
79
|
+
Requires-Dist: pinecone-client
|
|
80
|
+
Requires-Dist: psycopg2-binary
|
|
81
|
+
Requires-Dist: lancedb
|
|
82
|
+
Requires-Dist: pinecone
|
|
83
|
+
Requires-Dist: llama-index-readers-web
|
|
84
|
+
Requires-Dist: llama-index-readers-wikipedia
|
|
85
|
+
Requires-Dist: llama-index-readers-youtube-transcript
|
|
86
|
+
Requires-Dist: youtube_transcript_api
|
|
87
|
+
Requires-Dist: llama-index-readers-s3
|
|
88
|
+
Requires-Dist: llama-index-readers-gcs
|
|
89
|
+
Requires-Dist: llama-index-readers-google
|
|
90
|
+
Requires-Dist: llama-index-readers-azstorage-blob
|
|
91
|
+
Requires-Dist: llama-index-readers-microsoft-onedrive
|
|
92
|
+
Requires-Dist: llama-index-readers-microsoft-sharepoint
|
|
93
|
+
Requires-Dist: llama-index-readers-box
|
|
94
|
+
Requires-Dist: llama-index-readers-file
|
|
95
|
+
Requires-Dist: box-sdk-gen>=1.17.0
|
|
96
|
+
Requires-Dist: s3fs
|
|
97
|
+
Requires-Dist: boto3
|
|
98
|
+
Requires-Dist: google-cloud-storage
|
|
99
|
+
Requires-Dist: google-api-python-client
|
|
100
|
+
Requires-Dist: google-auth-httplib2
|
|
101
|
+
Requires-Dist: google-auth-oauthlib
|
|
102
|
+
Requires-Dist: azure-storage-blob
|
|
103
|
+
Requires-Dist: asyncpg>=0.29.0
|
|
104
|
+
Requires-Dist: watchdog>=4.0.0
|
|
105
|
+
Requires-Dist: google-cloud-pubsub>=2.18.0
|
|
106
|
+
Requires-Dist: azure-storage-blob-changefeed>=12.0.0b5
|
|
107
|
+
Requires-Dist: msgraph-sdk>=1.0.0
|
|
108
|
+
Requires-Dist: azure-identity>=1.14.0
|
|
109
|
+
Provides-Extra: observability
|
|
110
|
+
Requires-Dist: openinference-instrumentation-llama-index; extra == "observability"
|
|
111
|
+
Requires-Dist: opentelemetry-exporter-otlp; extra == "observability"
|
|
112
|
+
Requires-Dist: opentelemetry-sdk; extra == "observability"
|
|
113
|
+
Requires-Dist: opentelemetry-api; extra == "observability"
|
|
114
|
+
Provides-Extra: observability-openlit
|
|
115
|
+
Requires-Dist: openlit; extra == "observability-openlit"
|
|
116
|
+
Requires-Dist: opentelemetry-exporter-otlp; extra == "observability-openlit"
|
|
117
|
+
Requires-Dist: opentelemetry-sdk; extra == "observability-openlit"
|
|
118
|
+
Requires-Dist: opentelemetry-api; extra == "observability-openlit"
|
|
119
|
+
Provides-Extra: observability-dual
|
|
120
|
+
Requires-Dist: openinference-instrumentation-llama-index; extra == "observability-dual"
|
|
121
|
+
Requires-Dist: openlit>=1.36.0; extra == "observability-dual"
|
|
122
|
+
Requires-Dist: opentelemetry-exporter-otlp; extra == "observability-dual"
|
|
123
|
+
Requires-Dist: opentelemetry-sdk; extra == "observability-dual"
|
|
124
|
+
Requires-Dist: opentelemetry-api; extra == "observability-dual"
|
|
125
|
+
Provides-Extra: dev
|
|
126
|
+
Requires-Dist: pytest; extra == "dev"
|
|
127
|
+
Requires-Dist: pytest-asyncio; extra == "dev"
|
|
128
|
+
Requires-Dist: pytest-cov; extra == "dev"
|
|
129
|
+
Requires-Dist: black; extra == "dev"
|
|
130
|
+
Requires-Dist: ruff; extra == "dev"
|
|
131
|
+
Requires-Dist: mypy; extra == "dev"
|
|
132
|
+
|
|
133
|
+
# Flexible GraphRAG
|
|
134
|
+
|
|
135
|
+
See the full documentation in the [project root README.md](https://github.com/stevereiner/flexible-graphrag/blob/main/README.md).
|