flexible-graphrag 0.4.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (76) hide show
  1. flexible_graphrag-0.4.0/Dockerfile +89 -0
  2. flexible_graphrag-0.4.0/PKG-INFO +135 -0
  3. flexible_graphrag-0.4.0/README.md +3 -0
  4. flexible_graphrag-0.4.0/backend.py +1858 -0
  5. flexible_graphrag-0.4.0/check_elasticsearch.py +76 -0
  6. flexible_graphrag-0.4.0/cleanup.py +655 -0
  7. flexible_graphrag-0.4.0/cmis_util.py +69 -0
  8. flexible_graphrag-0.4.0/config.py +574 -0
  9. flexible_graphrag-0.4.0/document_processor.py +817 -0
  10. flexible_graphrag-0.4.0/env-sample.txt +595 -0
  11. flexible_graphrag-0.4.0/factories.py +1463 -0
  12. flexible_graphrag-0.4.0/flexible_graphrag.egg-info/PKG-INFO +135 -0
  13. flexible_graphrag-0.4.0/flexible_graphrag.egg-info/SOURCES.txt +74 -0
  14. flexible_graphrag-0.4.0/flexible_graphrag.egg-info/dependency_links.txt +1 -0
  15. flexible_graphrag-0.4.0/flexible_graphrag.egg-info/entry_points.txt +2 -0
  16. flexible_graphrag-0.4.0/flexible_graphrag.egg-info/requires.txt +119 -0
  17. flexible_graphrag-0.4.0/flexible_graphrag.egg-info/top_level.txt +18 -0
  18. flexible_graphrag-0.4.0/hybrid_system.py +2764 -0
  19. flexible_graphrag-0.4.0/incremental_system.py +237 -0
  20. flexible_graphrag-0.4.0/incremental_updates/__init__.py +17 -0
  21. flexible_graphrag-0.4.0/incremental_updates/config_manager.py +260 -0
  22. flexible_graphrag-0.4.0/incremental_updates/datasource-queries.sql +303 -0
  23. flexible_graphrag-0.4.0/incremental_updates/detectors/README.md +532 -0
  24. flexible_graphrag-0.4.0/incremental_updates/detectors/__init__.py +45 -0
  25. flexible_graphrag-0.4.0/incremental_updates/detectors/alfresco_broadcaster.py +274 -0
  26. flexible_graphrag-0.4.0/incremental_updates/detectors/alfresco_detector.py +1551 -0
  27. flexible_graphrag-0.4.0/incremental_updates/detectors/azure_blob_detector.py +780 -0
  28. flexible_graphrag-0.4.0/incremental_updates/detectors/base.py +111 -0
  29. flexible_graphrag-0.4.0/incremental_updates/detectors/box_detector.py +634 -0
  30. flexible_graphrag-0.4.0/incremental_updates/detectors/factory.py +62 -0
  31. flexible_graphrag-0.4.0/incremental_updates/detectors/filesystem_detector.py +535 -0
  32. flexible_graphrag-0.4.0/incremental_updates/detectors/gcs_detector.py +830 -0
  33. flexible_graphrag-0.4.0/incremental_updates/detectors/google_drive_detector.py +794 -0
  34. flexible_graphrag-0.4.0/incremental_updates/detectors/msgraph_detector.py +911 -0
  35. flexible_graphrag-0.4.0/incremental_updates/detectors/s3_detector.py +700 -0
  36. flexible_graphrag-0.4.0/incremental_updates/diagnostic_queries.sql +177 -0
  37. flexible_graphrag-0.4.0/incremental_updates/engine.py +997 -0
  38. flexible_graphrag-0.4.0/incremental_updates/logging_config.py +178 -0
  39. flexible_graphrag-0.4.0/incremental_updates/orchestrator.py +418 -0
  40. flexible_graphrag-0.4.0/incremental_updates/path_utils.py +23 -0
  41. flexible_graphrag-0.4.0/incremental_updates/s3_helpers.py +452 -0
  42. flexible_graphrag-0.4.0/incremental_updates/schema.sql +81 -0
  43. flexible_graphrag-0.4.0/incremental_updates/state_manager.py +353 -0
  44. flexible_graphrag-0.4.0/ingest/__init__.py +13 -0
  45. flexible_graphrag-0.4.0/ingest/factory.py +82 -0
  46. flexible_graphrag-0.4.0/ingest/manager.py +229 -0
  47. flexible_graphrag-0.4.0/install.py +52 -0
  48. flexible_graphrag-0.4.0/main.py +1468 -0
  49. flexible_graphrag-0.4.0/neptune_database_wrapper.py +131 -0
  50. flexible_graphrag-0.4.0/observability/__init__.py +25 -0
  51. flexible_graphrag-0.4.0/observability/custom_hooks.py +252 -0
  52. flexible_graphrag-0.4.0/observability/metrics.py +172 -0
  53. flexible_graphrag-0.4.0/observability/telemetry_openlit.py +163 -0
  54. flexible_graphrag-0.4.0/observability/telemetry_setup.py +222 -0
  55. flexible_graphrag-0.4.0/post_ingestion_state.py +586 -0
  56. flexible_graphrag-0.4.0/pyproject.toml +247 -0
  57. flexible_graphrag-0.4.0/requirements.txt +135 -0
  58. flexible_graphrag-0.4.0/setup.cfg +4 -0
  59. flexible_graphrag-0.4.0/sources/__init__.py +38 -0
  60. flexible_graphrag-0.4.0/sources/alfresco.py +856 -0
  61. flexible_graphrag-0.4.0/sources/azure_blob.py +267 -0
  62. flexible_graphrag-0.4.0/sources/base.py +89 -0
  63. flexible_graphrag-0.4.0/sources/box.py +244 -0
  64. flexible_graphrag-0.4.0/sources/cmis.py +356 -0
  65. flexible_graphrag-0.4.0/sources/filesystem.py +217 -0
  66. flexible_graphrag-0.4.0/sources/gcs.py +274 -0
  67. flexible_graphrag-0.4.0/sources/google_drive.py +278 -0
  68. flexible_graphrag-0.4.0/sources/onedrive.py +313 -0
  69. flexible_graphrag-0.4.0/sources/passthrough_extractor.py +257 -0
  70. flexible_graphrag-0.4.0/sources/s3.py +342 -0
  71. flexible_graphrag-0.4.0/sources/sharepoint.py +556 -0
  72. flexible_graphrag-0.4.0/sources/web.py +105 -0
  73. flexible_graphrag-0.4.0/sources/wikipedia.py +389 -0
  74. flexible_graphrag-0.4.0/sources/youtube.py +219 -0
  75. flexible_graphrag-0.4.0/start.py +24 -0
  76. flexible_graphrag-0.4.0/uv.toml +53 -0
@@ -0,0 +1,89 @@
1
+ # Base Image Options:
2
+ #
3
+ # IMPORTANT: Image size mainly impacts:
4
+ # - Docker image distribution/download time (pushing/pulling from registry)
5
+ # - Storage space on disk
6
+ # - Initial docker build time (first time only)
7
+ #
8
+ # Image size does NOT significantly impact:
9
+ # - Development workflow or startup speed after first build
10
+ # - Runtime performance (all options perform the same once running)
11
+ # - Docker container start time (negligible difference)
12
+ #
13
+ # Option 1: python:3.12 (CURRENT - ~1GB, most compatible)
14
+ # - Full Debian-based image with gcc, g++, make, python-dev pre-installed
15
+ # - Required for building C extensions (pystemmer, etc.) on all platforms
16
+ # - Fixes build failures on both Linux and macOS (especially Apple Silicon)
17
+ # - Final image size: ~1GB
18
+ # - Guaranteed compatibility
19
+ FROM python:3.12
20
+
21
+ # Option 2: python:3.12-slim (~200MB smaller but requires build tools)
22
+ # Uncomment below and comment out python:3.12 above to use slim
23
+ # - Debian-based but minimal, needs build tools installed manually
24
+ # - Base image: ~150MB, final image: ~800MB (after adding build tools + dependencies)
25
+ # - Saves ~200MB compared to full image
26
+ # - Use this if you're distributing images and want smaller downloads
27
+ # - Note: Requires installing build-essential for C extensions to compile
28
+ # FROM python:3.12-slim
29
+ # RUN apt-get update && apt-get install -y \
30
+ # gcc g++ make python3-dev \
31
+ # curl \
32
+ # && rm -rf /var/lib/apt/lists/*
33
+
34
+ # Option 3: python:3.12-alpine (~50MB base, smallest but compatibility issues)
35
+ # Uncomment below and comment out python:3.12 above to use Alpine
36
+ # - Alpine uses musl libc instead of glibc, which causes compatibility issues
37
+ # - Base image: ~50MB, final image: ~200-300MB
38
+ # - Many Python packages with C extensions fail to build on Alpine
39
+ # - Python 3.13-alpine would NOT help - the musl libc issue exists in all Alpine versions
40
+ # - Not recommended for projects with complex dependencies like this one
41
+ # FROM python:3.12-alpine
42
+ # RUN apk add --no-cache \
43
+ # gcc musl-dev python3-dev \
44
+ # libffi-dev openssl-dev \
45
+ # curl \
46
+ # && rm -rf /var/cache/apk/*
47
+
48
+ WORKDIR /app
49
+
50
+ # Install system dependencies (for Option 1: python:3.12)
51
+ # Only curl needed since build tools are already included
52
+ RUN apt-get update && apt-get install -y \
53
+ curl \
54
+ && rm -rf /var/lib/apt/lists/*
55
+
56
+ # Note: If using Option 2 (slim), uncomment the gcc/g++/make/python3-dev lines in Option 2 above
57
+ # Note: If using Option 3 (alpine), use the apk commands in Option 3 above instead of apt-get
58
+
59
+ # Install uv for faster package installation
60
+ RUN pip install uv
61
+
62
+ # Copy pyproject.toml and uv.toml first for better caching
63
+ COPY pyproject.toml uv.toml ./
64
+
65
+ # Method 1: Install using pyproject.toml (CURRENT - modern approach)
66
+ # This installs the package in editable mode with all dependencies
67
+ RUN uv pip install --system -e .
68
+
69
+ # Method 2: Install using requirements.txt (OLD - commented out)
70
+ # Uncomment the lines below and comment out the pyproject.toml method above
71
+ # if you want to use the legacy requirements.txt approach
72
+ # COPY requirements.txt .
73
+ # RUN uv pip install --system -r requirements.txt
74
+
75
+ # Copy application code
76
+ COPY . .
77
+
78
+ # Create necessary directories
79
+ RUN mkdir -p uploads sample_docs
80
+
81
+ # Expose port
82
+ EXPOSE 8000
83
+
84
+ # Health check
85
+ HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
86
+ CMD curl -f http://localhost:8000/health || exit 1
87
+
88
+ # Run the application with standard asyncio loop (uvloop conflicts with nest_asyncio)
89
+ CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000", "--loop", "asyncio"]
@@ -0,0 +1,135 @@
1
+ Metadata-Version: 2.4
2
+ Name: flexible-graphrag
3
+ Version: 0.4.0
4
+ Summary: Flexible GraphRAG system supporting multiple LLM providers, graph databases, vector stores, and data sources
5
+ Author: Steve Reiner
6
+ License-Expression: Apache-2.0
7
+ Project-URL: Homepage, https://github.com/stevereiner/flexible-graphrag
8
+ Project-URL: Repository, https://github.com/stevereiner/flexible-graphrag
9
+ Project-URL: Documentation, https://github.com/stevereiner/flexible-graphrag/blob/main/README.md
10
+ Keywords: graphrag,rag,llm,knowledge-graph,neo4j,kuzu,vector-database
11
+ Classifier: Development Status :: 4 - Beta
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Programming Language :: Python :: 3.12
14
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
15
+ Requires-Python: <3.14,>=3.12
16
+ Description-Content-Type: text/markdown
17
+ Requires-Dist: fastapi
18
+ Requires-Dist: uvicorn
19
+ Requires-Dist: python-multipart
20
+ Requires-Dist: fastmcp
21
+ Requires-Dist: python-jose[cryptography]
22
+ Requires-Dist: passlib[bcrypt]
23
+ Requires-Dist: python-dotenv
24
+ Requires-Dist: nest-asyncio
25
+ Requires-Dist: wikipedia
26
+ Requires-Dist: cmislib
27
+ Requires-Dist: python-alfresco-api>=1.1.5
28
+ Requires-Dist: docling
29
+ Requires-Dist: llama-parse
30
+ Requires-Dist: neo4j
31
+ Requires-Dist: numpy
32
+ Requires-Dist: rapidfuzz
33
+ Requires-Dist: spacy
34
+ Requires-Dist: openai
35
+ Requires-Dist: ollama
36
+ Requires-Dist: pydantic
37
+ Requires-Dist: pydantic-settings
38
+ Requires-Dist: llama-index-core
39
+ Requires-Dist: llama-index-llms-openai
40
+ Requires-Dist: llama-index-llms-ollama
41
+ Requires-Dist: llama-index-llms-google-genai
42
+ Requires-Dist: llama-index-llms-azure-openai
43
+ Requires-Dist: llama-index-llms-anthropic
44
+ Requires-Dist: llama-index-llms-bedrock-converse
45
+ Requires-Dist: llama-index-llms-groq
46
+ Requires-Dist: llama-index-llms-fireworks
47
+ Requires-Dist: llama-index-embeddings-openai
48
+ Requires-Dist: llama-index-embeddings-ollama
49
+ Requires-Dist: llama-index-embeddings-google-genai
50
+ Requires-Dist: llama-index-embeddings-azure-openai
51
+ Requires-Dist: llama-index-embeddings-bedrock
52
+ Requires-Dist: llama-index-embeddings-fireworks
53
+ Requires-Dist: llama-index-vector-stores-neo4jvector
54
+ Requires-Dist: llama-index-vector-stores-qdrant
55
+ Requires-Dist: llama-index-vector-stores-elasticsearch
56
+ Requires-Dist: llama-index-vector-stores-opensearch
57
+ Requires-Dist: llama-index-vector-stores-chroma
58
+ Requires-Dist: llama-index-vector-stores-milvus
59
+ Requires-Dist: llama-index-vector-stores-weaviate
60
+ Requires-Dist: llama-index-vector-stores-pinecone
61
+ Requires-Dist: llama-index-vector-stores-postgres
62
+ Requires-Dist: llama-index-vector-stores-lancedb
63
+ Requires-Dist: llama-index-graph-stores-neo4j
64
+ Requires-Dist: llama-index-graph-stores-kuzu
65
+ Requires-Dist: llama-index-graph-stores-falkordb
66
+ Requires-Dist: llama-index-graph-stores-memgraph
67
+ Requires-Dist: llama-index-graph-stores-nebula
68
+ Requires-Dist: llama-index-graph-stores-neptune
69
+ Requires-Dist: llama-index-retrievers-bm25
70
+ Requires-Dist: arcadedb-python>=0.4.0
71
+ Requires-Dist: llama-index-graph-stores-arcadedb>=0.4.1
72
+ Requires-Dist: kuzu
73
+ Requires-Dist: elasticsearch
74
+ Requires-Dist: opensearch-py
75
+ Requires-Dist: qdrant-client
76
+ Requires-Dist: chromadb
77
+ Requires-Dist: pymilvus
78
+ Requires-Dist: weaviate-client
79
+ Requires-Dist: pinecone-client
80
+ Requires-Dist: psycopg2-binary
81
+ Requires-Dist: lancedb
82
+ Requires-Dist: pinecone
83
+ Requires-Dist: llama-index-readers-web
84
+ Requires-Dist: llama-index-readers-wikipedia
85
+ Requires-Dist: llama-index-readers-youtube-transcript
86
+ Requires-Dist: youtube_transcript_api
87
+ Requires-Dist: llama-index-readers-s3
88
+ Requires-Dist: llama-index-readers-gcs
89
+ Requires-Dist: llama-index-readers-google
90
+ Requires-Dist: llama-index-readers-azstorage-blob
91
+ Requires-Dist: llama-index-readers-microsoft-onedrive
92
+ Requires-Dist: llama-index-readers-microsoft-sharepoint
93
+ Requires-Dist: llama-index-readers-box
94
+ Requires-Dist: llama-index-readers-file
95
+ Requires-Dist: box-sdk-gen>=1.17.0
96
+ Requires-Dist: s3fs
97
+ Requires-Dist: boto3
98
+ Requires-Dist: google-cloud-storage
99
+ Requires-Dist: google-api-python-client
100
+ Requires-Dist: google-auth-httplib2
101
+ Requires-Dist: google-auth-oauthlib
102
+ Requires-Dist: azure-storage-blob
103
+ Requires-Dist: asyncpg>=0.29.0
104
+ Requires-Dist: watchdog>=4.0.0
105
+ Requires-Dist: google-cloud-pubsub>=2.18.0
106
+ Requires-Dist: azure-storage-blob-changefeed>=12.0.0b5
107
+ Requires-Dist: msgraph-sdk>=1.0.0
108
+ Requires-Dist: azure-identity>=1.14.0
109
+ Provides-Extra: observability
110
+ Requires-Dist: openinference-instrumentation-llama-index; extra == "observability"
111
+ Requires-Dist: opentelemetry-exporter-otlp; extra == "observability"
112
+ Requires-Dist: opentelemetry-sdk; extra == "observability"
113
+ Requires-Dist: opentelemetry-api; extra == "observability"
114
+ Provides-Extra: observability-openlit
115
+ Requires-Dist: openlit; extra == "observability-openlit"
116
+ Requires-Dist: opentelemetry-exporter-otlp; extra == "observability-openlit"
117
+ Requires-Dist: opentelemetry-sdk; extra == "observability-openlit"
118
+ Requires-Dist: opentelemetry-api; extra == "observability-openlit"
119
+ Provides-Extra: observability-dual
120
+ Requires-Dist: openinference-instrumentation-llama-index; extra == "observability-dual"
121
+ Requires-Dist: openlit>=1.36.0; extra == "observability-dual"
122
+ Requires-Dist: opentelemetry-exporter-otlp; extra == "observability-dual"
123
+ Requires-Dist: opentelemetry-sdk; extra == "observability-dual"
124
+ Requires-Dist: opentelemetry-api; extra == "observability-dual"
125
+ Provides-Extra: dev
126
+ Requires-Dist: pytest; extra == "dev"
127
+ Requires-Dist: pytest-asyncio; extra == "dev"
128
+ Requires-Dist: pytest-cov; extra == "dev"
129
+ Requires-Dist: black; extra == "dev"
130
+ Requires-Dist: ruff; extra == "dev"
131
+ Requires-Dist: mypy; extra == "dev"
132
+
133
+ # Flexible GraphRAG
134
+
135
+ See the full documentation in the [project root README.md](https://github.com/stevereiner/flexible-graphrag/blob/main/README.md).
@@ -0,0 +1,3 @@
1
+ # Flexible GraphRAG
2
+
3
+ See the full documentation in the [project root README.md](https://github.com/stevereiner/flexible-graphrag/blob/main/README.md).