grt-lexical-graph 3.17.2.dev0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- grt_lexical_graph-3.17.2.dev0/.gitignore +33 -0
- grt_lexical_graph-3.17.2.dev0/PKG-INFO +190 -0
- grt_lexical_graph-3.17.2.dev0/README.md +168 -0
- grt_lexical_graph-3.17.2.dev0/pyproject.toml +18 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/__init__.py +56 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/config.py +1277 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/errors.py +17 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/__init__.py +11 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/__init__.py +10 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/build_filter.py +135 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/build_filters.py +118 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/build_pipeline.py +321 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/checkpoint.py +247 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/chunk_graph_builder.py +149 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/chunk_node_builder.py +129 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/delete_sources.py +397 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/entity_graph_builder.py +133 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/entity_relation_graph_builder.py +169 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/fact_graph_builder.py +180 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/graph_batch_client.py +335 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/graph_builder.py +68 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/graph_construction.py +176 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/graph_summary_builder.py +122 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/local_entity_rewrites_graph_builder.py +116 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/node_builder.py +148 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/node_builders.py +189 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/null_builder.py +44 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/source_graph_builder.py +138 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/source_node_builder.py +116 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/statement_graph_builder.py +147 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/statement_node_builder.py +218 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/topic_graph_builder.py +94 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/topic_node_builder.py +198 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/vector_batch_client.py +177 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/vector_indexing.py +168 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/version_manager.py +273 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/constants.py +28 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/__init__.py +14 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/batch_config.py +42 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/batch_extractor_base.py +322 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/batch_llm_proposition_extractor_sync.py +93 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/batch_topic_extractor_sync.py +105 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/docs_to_nodes.py +69 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/extraction_pipeline.py +427 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/file_system_tap.py +142 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/id_rewriter.py +182 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/infer_classifications.py +192 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/infer_config.py +35 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/llm_proposition_extractor.py +213 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/pipeline_decorator.py +56 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/preferred_values.py +26 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/proposition_extractor.py +249 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/source_doc_parser.py +63 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/topic_extractor.py +216 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/id_generator.py +173 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/__init__.py +7 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/file_based_docs.py +185 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/json_array_reader.py +175 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/__init__.py +86 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/base_reader_provider.py +51 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/llama_index_reader_provider_base.py +41 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/__init__.py +46 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/advanced_pdf_reader_provider.py +87 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/csv_reader_provider.py +56 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/database_reader_provider.py +64 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/directory_reader_provider.py +57 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/document_graph_reader_provider.py +78 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/docx_reader_provider.py +55 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/github_reader_provider.py +76 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/json_reader_provider.py +67 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/markdown_reader_provider.py +60 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/pdf_reader_provider.py +61 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/pptx_reader_provider.py +55 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/s3_directory_reader_provider.py +73 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/streaming_jsonl_reader_provider.py +310 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/structured_data_reader_provider.py +130 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/universal_directory_reader_provider.py +134 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/web_reader_provider.py +45 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/wikipedia_reader_provider.py +91 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/youtube_reader_provider.py +126 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/pydantic_reader_provider_base.py +55 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/reader_provider_base.py +26 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/reader_provider_config.py +191 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/reader_provider_config_base.py +26 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/s3_file_mixin.py +163 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/validated_reader_provider_base.py +86 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/s3_based_docs.py +518 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/source_documents.py +54 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/model.py +263 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/node_handler.py +60 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/prompts.py +232 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/utils/__init__.py +2 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/utils/batch_inference_utils.py +361 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/utils/fact_utils.py +11 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/utils/hash_utils.py +20 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/utils/metadata_utils.py +31 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/utils/pipeline_utils.py +54 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/utils/topic_utils.py +185 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/lexical_graph_index.py +773 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/lexical_graph_query_engine.py +573 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/logging.py +309 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/metadata.py +451 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/prompts/__init__.py +17 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/prompts/bedrock_prompt_provider.py +123 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/prompts/file_prompt_provider.py +119 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/prompts/prompt_provider_base.py +26 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/prompts/prompt_provider_config.py +232 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/prompts/prompt_provider_config_base.py +30 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/prompts/prompt_provider_factory.py +42 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/prompts/prompt_provider_registry.py +69 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/prompts/s3_prompt_provider.py +95 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/prompts/static_prompt_provider.py +54 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/protocols/__init__.py +9 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/protocols/mcp_server.py +277 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/requirements.txt +16 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/__init__.py +7 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/model.py +270 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/post_processors/__init__.py +9 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/post_processors/bedrock_context_format.py +126 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/post_processors/bge_reranker.py +218 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/post_processors/enrich_source_details.py +194 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/post_processors/reranker_mixin.py +63 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/post_processors/sentence_reranker.py +131 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/post_processors/statement_diversity.py +251 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/post_processors/statement_enhancement.py +150 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/__init__.py +25 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/clear_chunks.py +64 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/clear_scores.py +54 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/clear_topic_ids.py +26 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/dedup_results.py +97 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/disaggregate_results.py +69 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/filter_by_metadata.py +63 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/format_sources.py +188 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/populate_statement_strs.py +97 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/processor_args.py +121 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/processor_base.py +217 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/prune_results.py +57 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/prune_statements.py +50 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/remove_versioning_metadata.py +26 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/rerank_statements.py +318 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/rescore_results.py +86 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/simplify_single_topic_results.py +79 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/sort_results.py +63 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/statements_to_strings.py +109 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/truncate_results.py +56 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/truncate_statements.py +87 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/update_chunk_metadata.py +29 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/zero_scores.py +83 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/prompts.py +161 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/query_context/__init__.py +12 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/query_context/entity_context_provider.py +366 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/query_context/entity_from_top_statement_provider.py +120 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/query_context/entity_provider.py +133 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/query_context/entity_provider_base.py +45 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/query_context/entity_vss_provider.py +118 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/query_context/keyword_nlp_provider.py +37 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/query_context/keyword_provider.py +81 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/query_context/keyword_provider_base.py +32 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/query_context/keyword_vss_provider.py +168 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/query_context/pass_thru_keyword_provider.py +18 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/query_context/query_mode.py +56 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/__init__.py +19 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/chunk_based_search.py +189 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/chunk_based_semantic_search.py +237 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/chunk_cosine_search.py +93 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/composite_traversal_based_retriever.py +239 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/entity_based_search.py +262 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/entity_context_search.py +153 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/entity_network_search.py +172 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/keyword_ranking_search.py +265 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/query_mode_retriever.py +68 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/rerank_beam_search.py +391 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/semantic_beam_search.py +249 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/semantic_chunk_beam_search.py +211 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/semantic_guided_base_chunk_retriever.py +36 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/semantic_guided_base_retriever.py +91 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/semantic_guided_chunk_retriever.py +229 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/semantic_guided_retriever.py +262 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/statement_cosine_seach.py +123 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/topic_based_search.py +197 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/traversal_based_base_retriever.py +383 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/summary/__init__.py +4 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/summary/graph_summary.py +173 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/utils/__init__.py +2 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/utils/chunk_utils.py +116 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/utils/entity_utils.py +85 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/utils/query_decomposition.py +129 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/utils/statement_utils.py +286 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/utils/vector_utils.py +68 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/__init__.py +6 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/constants.py +18 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/graph/__init__.py +8 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/graph/dummy_graph_store.py +84 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/graph/graph_store.py +537 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/graph/graph_store_factory_method.py +36 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/graph/graph_utils.py +306 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/graph/multi_tenant_graph_store.py +164 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/graph/neo4j_graph_store.py +183 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/graph/neo4j_graph_store_factory.py +26 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/graph/neptune_graph_stores.py +547 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/graph/query_tree.py +66 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/graph_store_factory.py +101 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/__init__.py +9 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/dummy_vector_index.py +124 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/multi_tenant_vector_store.py +72 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/neptune_vector_indexes.py +296 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/opensearch_vector_index_factory.py +59 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/opensearch_vector_indexes.py +1174 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/pg_vector_index_factory.py +50 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/pg_vector_indexes.py +742 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/read_only_vector_store.py +79 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/repair_opensearch_vector_store.py +210 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/s3_vector_index_factory.py +47 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/s3_vector_indexes.py +651 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/vector_index.py +187 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/vector_index_factory_method.py +26 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/vector_store.py +79 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector_store_factory.py +122 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/tenant_id.py +195 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/utils/__init__.py +4 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/utils/arg_utils.py +7 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/utils/bedrock_utils.py +290 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/utils/fm_observability.py +964 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/utils/io_utils.py +120 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/utils/llm_cache.py +164 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/utils/reranker_utils.py +62 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/versioning.py +125 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/visualisation/__init__.py +8 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/visualisation/graph_notebook/__init__.py +4 -0
- grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/visualisation/graph_notebook/graph_notebook_visualisation.py +590 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/conftest.py +43 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/__init__.py +2 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/build_tests/__init__.py +2 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/build_tests/test_node_builder.py +56 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_advanced_pdf_reader_provider.py +18 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_csv_reader_provider.py +13 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_database_reader_provider.py +13 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_docx_reader_provider.py +13 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_github_reader_provider.py +13 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_json_reader_provider.py +13 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_markdown_reader_provider.py +13 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_pdf_reader_provider.py +18 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_pptx_reader_provider.py +13 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_s3_directory_reader_provider.py +13 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_streaming_jsonl_reader_property.py +1340 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_streaming_jsonl_reader_provider.py +390 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_web_reader_provider.py +13 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_wikipedia_reader_provider.py +13 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_youtube_reader_provider.py +13 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/test_id_generator.py +493 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/utils/__init__.py +2 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/utils/test_batch_inference_utils.py +215 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/utils/test_fact_utils.py +52 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/utils/test_hash_utils.py +75 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/utils/test_metadata_utils.py +100 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/utils/test_topic_utils.py +517 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/storage/vector/test_s3_vector_index_factory.py +52 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/test_metadata.py +12 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/test_tenant_id.py +170 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/utils/__init__.py +2 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/utils/test_arg_utils.py +11 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/utils/test_bedrock_embedding_empty_text.py +193 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/utils/test_io_utils.py +88 -0
- grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/utils/test_reranker_utils.py +84 -0
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# ignore virtual environment directories
|
|
2
|
+
.venv/
|
|
3
|
+
|
|
4
|
+
# ignore generated files and directories from version control
|
|
5
|
+
/examples/lexical-graph/notebooks/extracted/
|
|
6
|
+
/examples/lexical-graph/notebooks/output/
|
|
7
|
+
/examples/lexical-graph-local-dev/notebooks/output/
|
|
8
|
+
/examples/lexical-graph-hybrid-dev/notebooks/output/
|
|
9
|
+
/examples/lexical-graph-hybrid-dev/notebooks/extracted/
|
|
10
|
+
|
|
11
|
+
# ignore IDE files
|
|
12
|
+
.vscode/
|
|
13
|
+
.idea/
|
|
14
|
+
|
|
15
|
+
# Jupyter Notebook checkpoints
|
|
16
|
+
.ipynb_checkpoints/
|
|
17
|
+
|
|
18
|
+
# cache files
|
|
19
|
+
__pycache__/
|
|
20
|
+
|
|
21
|
+
# Byte-compiled / optimized / DLL files
|
|
22
|
+
*.pyc
|
|
23
|
+
|
|
24
|
+
# hypothesis artifacts
|
|
25
|
+
.hypothesis/
|
|
26
|
+
|
|
27
|
+
# coverage artifact
|
|
28
|
+
.coverage
|
|
29
|
+
|
|
30
|
+
# kiro config
|
|
31
|
+
.config.kiro
|
|
32
|
+
/temp/
|
|
33
|
+
/temp/
|
|
@@ -0,0 +1,190 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: grt-lexical-graph
|
|
3
|
+
Version: 3.17.2.dev0
|
|
4
|
+
Summary: AWS GraphRAG Toolkit, lexical graph
|
|
5
|
+
License-Expression: Apache-2.0
|
|
6
|
+
Requires-Python: >=3.10
|
|
7
|
+
Requires-Dist: anthropic-bedrock==0.8.0
|
|
8
|
+
Requires-Dist: boto3>=1.40.61
|
|
9
|
+
Requires-Dist: botocore>=1.40.61
|
|
10
|
+
Requires-Dist: json2xml==5.2.0
|
|
11
|
+
Requires-Dist: llama-index-core==0.14.17
|
|
12
|
+
Requires-Dist: llama-index-embeddings-bedrock==0.8.0
|
|
13
|
+
Requires-Dist: llama-index-llms-anthropic==0.11.0
|
|
14
|
+
Requires-Dist: llama-index-llms-bedrock-converse==0.14.0
|
|
15
|
+
Requires-Dist: lru-dict==1.3.0
|
|
16
|
+
Requires-Dist: pipe==2.2
|
|
17
|
+
Requires-Dist: python-dotenv==1.1.1
|
|
18
|
+
Requires-Dist: smart-open==7.1.0
|
|
19
|
+
Requires-Dist: spacy==3.8.7
|
|
20
|
+
Requires-Dist: tfidf-matcher==0.3.0
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
|
|
23
|
+
## Lexical Graph
|
|
24
|
+
|
|
25
|
+
The lexical-graph package provides a framework for automating the construction of a [hierarchical lexical graph](../docs/lexical-graph/graph-model.md) from unstructured data, and composing question-answering strategies that query this graph when answering user questions.
|
|
26
|
+
|
|
27
|
+
### Features
|
|
28
|
+
|
|
29
|
+
- Built-in graph store support for [Amazon Neptune Analytics](https://docs.aws.amazon.com/neptune-analytics/latest/userguide/what-is-neptune-analytics.html), [Amazon Neptune Database](https://docs.aws.amazon.com/neptune/latest/userguide/intro.html), and [Neo4j](https://neo4j.com/docs/).
|
|
30
|
+
- Built-in vector store support for Neptune Analytics, [Amazon OpenSearch Serverless](https://docs.aws.amazon.com/opensearch-service/latest/developerguide/serverless.html), [Amazon S3 Vectors](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-vectors.html) and Postgres with the pgvector extension.
|
|
31
|
+
- Built-in support for foundation models (LLMs and embedding models) on [Amazon Bedrock](https://docs.aws.amazon.com/bedrock/).
|
|
32
|
+
- Easily extended to support additional graph and vector stores and model backends.
|
|
33
|
+
- [Multi-tenancy](../docs/lexical-graph/multi-tenancy.md) – multiple separate lexical graphs in the same underlying graph and vector stores.
|
|
34
|
+
- Continuous ingest and [batch extraction](../docs/lexical-graph/batch-extraction.md) (using [Bedrock batch inference](https://docs.aws.amazon.com/bedrock/latest/userguide/batch-inference.html)) modes.
|
|
35
|
+
- [Versioned updates](../docs/lexical-graph/versioned-updates.md) for updating source documents and querying the state of the graph and vector stores at a point in time.
|
|
36
|
+
- Quickstart [AWS CloudFormation templates](../examples/lexical-graph/cloudformation-templates/) for Neptune Database, OpenSearch Serverless, and Amazon Aurora Postgres.
|
|
37
|
+
|
|
38
|
+
## Installation
|
|
39
|
+
|
|
40
|
+
The lexical-graph requires Python 3.10 or greater and [pip](http://www.pip-installer.org/en/latest/).
|
|
41
|
+
|
|
42
|
+
Install from the latest release tag:
|
|
43
|
+
|
|
44
|
+
```
|
|
45
|
+
$ pip install https://github.com/awslabs/graphrag-toolkit/archive/refs/tags/v3.16.2.zip#subdirectory=lexical-graph
|
|
46
|
+
```
|
|
47
|
+
|
|
48
|
+
Or install from the `main` branch to get the latest changes:
|
|
49
|
+
|
|
50
|
+
```
|
|
51
|
+
$ pip install https://github.com/awslabs/graphrag-toolkit/archive/refs/heads/main.zip#subdirectory=lexical-graph
|
|
52
|
+
```
|
|
53
|
+
|
|
54
|
+
If you're running on AWS, you must run your application in an AWS region containing the Amazon Bedrock foundation models used by the lexical graph (see the [configuration](../docs/lexical-graph/configuration.md#graphragconfig) section in the documentation for details on the default models used), and must [enable access](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html) to these models before running any part of the solution.
|
|
55
|
+
|
|
56
|
+
### Additional dependencies
|
|
57
|
+
|
|
58
|
+
You will need to install additional dependencies for specific graph and vector store backends:
|
|
59
|
+
|
|
60
|
+
#### Amazon OpenSearch Serverless
|
|
61
|
+
|
|
62
|
+
```bash
|
|
63
|
+
$ pip install opensearch-py llama-index-vector-stores-opensearch
|
|
64
|
+
```
|
|
65
|
+
|
|
66
|
+
#### Postgres with pgvector
|
|
67
|
+
|
|
68
|
+
```bash
|
|
69
|
+
$ pip install psycopg2-binary pgvector
|
|
70
|
+
```
|
|
71
|
+
|
|
72
|
+
#### Neo4j
|
|
73
|
+
|
|
74
|
+
``` bash
|
|
75
|
+
$ pip install neo4j
|
|
76
|
+
```
|
|
77
|
+
|
|
78
|
+
### Connection strings
|
|
79
|
+
|
|
80
|
+
Pass a connection string to `GraphStoreFactory.for_graph_store()` or `VectorStoreFactory.for_vector_store()` to select a backend:
|
|
81
|
+
|
|
82
|
+
| Store | Connection string |
|
|
83
|
+
| --- | --- |
|
|
84
|
+
| Neptune Analytics (graph) | `neptune-graph://<graph-id>` |
|
|
85
|
+
| Neptune Database (graph) | `neptune-db://<hostname>` or any hostname ending `.neptune.amazonaws.com` |
|
|
86
|
+
| Neo4j (graph) | `bolt://`, `bolt+ssc://`, `bolt+s://`, `neo4j://`, `neo4j+ssc://`, or `neo4j+s://` URLs |
|
|
87
|
+
| OpenSearch Serverless (vector) | `aoss://<url>` |
|
|
88
|
+
| Neptune Analytics (vector) | `neptune-graph://<graph-id>` |
|
|
89
|
+
| pgvector (vector) | constructed via `PGVectorIndexFactory` |
|
|
90
|
+
| S3 Vectors (vector) | constructed via `S3VectorIndexFactory` |
|
|
91
|
+
| Dummy / no-op | `None` or any unrecognised string — falls back to `DummyGraphStore` / `DummyVectorIndex` |
|
|
92
|
+
|
|
93
|
+
## Example of use
|
|
94
|
+
|
|
95
|
+
### Indexing
|
|
96
|
+
|
|
97
|
+
```python
|
|
98
|
+
from graphrag_toolkit.lexical_graph import LexicalGraphIndex
|
|
99
|
+
from graphrag_toolkit.lexical_graph.storage import GraphStoreFactory
|
|
100
|
+
from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory
|
|
101
|
+
|
|
102
|
+
# requires pip install llama-index-readers-web
|
|
103
|
+
from llama_index.readers.web import SimpleWebPageReader
|
|
104
|
+
|
|
105
|
+
def run_extract_and_build():
|
|
106
|
+
|
|
107
|
+
with (
|
|
108
|
+
GraphStoreFactory.for_graph_store(
|
|
109
|
+
'neptune-db://my-graph.cluster-abcdefghijkl.us-east-1.neptune.amazonaws.com'
|
|
110
|
+
) as graph_store,
|
|
111
|
+
VectorStoreFactory.for_vector_store(
|
|
112
|
+
'aoss://https://abcdefghijkl.us-east-1.aoss.amazonaws.com'
|
|
113
|
+
) as vector_store
|
|
114
|
+
):
|
|
115
|
+
|
|
116
|
+
graph_index = LexicalGraphIndex(
|
|
117
|
+
graph_store,
|
|
118
|
+
vector_store
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
doc_urls = [
|
|
122
|
+
'https://docs.aws.amazon.com/neptune/latest/userguide/intro.html',
|
|
123
|
+
'https://docs.aws.amazon.com/neptune-analytics/latest/userguide/what-is-neptune-analytics.html',
|
|
124
|
+
'https://docs.aws.amazon.com/neptune-analytics/latest/userguide/neptune-analytics-features.html',
|
|
125
|
+
'https://docs.aws.amazon.com/neptune-analytics/latest/userguide/neptune-analytics-vs-neptune-database.html'
|
|
126
|
+
]
|
|
127
|
+
|
|
128
|
+
docs = SimpleWebPageReader(
|
|
129
|
+
html_to_text=True,
|
|
130
|
+
metadata_fn=lambda url:{'url': url}
|
|
131
|
+
).load_data(doc_urls)
|
|
132
|
+
|
|
133
|
+
graph_index.extract_and_build(docs, show_progress=True)
|
|
134
|
+
|
|
135
|
+
if __name__ == '__main__':
|
|
136
|
+
run_extract_and_build()
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
### Querying
|
|
140
|
+
|
|
141
|
+
```python
|
|
142
|
+
from graphrag_toolkit.lexical_graph import LexicalGraphQueryEngine
|
|
143
|
+
from graphrag_toolkit.lexical_graph.storage import GraphStoreFactory
|
|
144
|
+
from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory
|
|
145
|
+
|
|
146
|
+
def run_query():
|
|
147
|
+
|
|
148
|
+
with (
|
|
149
|
+
GraphStoreFactory.for_graph_store(
|
|
150
|
+
'neptune-db://my-graph.cluster-abcdefghijkl.us-east-1.neptune.amazonaws.com'
|
|
151
|
+
) as graph_store,
|
|
152
|
+
VectorStoreFactory.for_vector_store(
|
|
153
|
+
'aoss://https://abcdefghijkl.us-east-1.aoss.amazonaws.com'
|
|
154
|
+
) as vector_store
|
|
155
|
+
):
|
|
156
|
+
|
|
157
|
+
query_engine = LexicalGraphQueryEngine.for_traversal_based_search(
|
|
158
|
+
graph_store,
|
|
159
|
+
vector_store
|
|
160
|
+
)
|
|
161
|
+
|
|
162
|
+
response = query_engine.query('''What are the differences between Neptune Database
|
|
163
|
+
and Neptune Analytics?''')
|
|
164
|
+
|
|
165
|
+
print(response.response)
|
|
166
|
+
|
|
167
|
+
if __name__ == '__main__':
|
|
168
|
+
run_query()
|
|
169
|
+
```
|
|
170
|
+
|
|
171
|
+
## Documentation
|
|
172
|
+
|
|
173
|
+
- [Overview](../docs/lexical-graph/overview.md)
|
|
174
|
+
- [Graph Model](../docs/lexical-graph/graph-model.md)
|
|
175
|
+
- [Storage Model](../docs/lexical-graph/storage-model.md)
|
|
176
|
+
- [Indexing](../docs/lexical-graph/indexing.md)
|
|
177
|
+
- [Batch Extraction](../docs/lexical-graph/batch-extraction.md)
|
|
178
|
+
- [Configuring Batch Extraction](../docs/lexical-graph/configuring-batch-extraction.md)
|
|
179
|
+
- [Versioned Updates](../docs/lexical-graph/versioned-updates.md)
|
|
180
|
+
- [Querying](../docs/lexical-graph/querying.md)
|
|
181
|
+
- [Traversal-Based Search](../docs/lexical-graph/traversal-based-search.md)
|
|
182
|
+
- [Traversal-Based Search Configuration](../docs/lexical-graph/traversal-based-search-configuration.md)
|
|
183
|
+
- [Configuration](../docs/lexical-graph/configuration.md)
|
|
184
|
+
- [Security](../docs/lexical-graph/security.md)
|
|
185
|
+
- [FAQ](../docs/lexical-graph/faq.md)
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
## License
|
|
189
|
+
|
|
190
|
+
This project is licensed under the Apache-2.0 License.
|
|
@@ -0,0 +1,168 @@
|
|
|
1
|
+
## Lexical Graph
|
|
2
|
+
|
|
3
|
+
The lexical-graph package provides a framework for automating the construction of a [hierarchical lexical graph](../docs/lexical-graph/graph-model.md) from unstructured data, and composing question-answering strategies that query this graph when answering user questions.
|
|
4
|
+
|
|
5
|
+
### Features
|
|
6
|
+
|
|
7
|
+
- Built-in graph store support for [Amazon Neptune Analytics](https://docs.aws.amazon.com/neptune-analytics/latest/userguide/what-is-neptune-analytics.html), [Amazon Neptune Database](https://docs.aws.amazon.com/neptune/latest/userguide/intro.html), and [Neo4j](https://neo4j.com/docs/).
|
|
8
|
+
- Built-in vector store support for Neptune Analytics, [Amazon OpenSearch Serverless](https://docs.aws.amazon.com/opensearch-service/latest/developerguide/serverless.html), [Amazon S3 Vectors](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-vectors.html) and Postgres with the pgvector extension.
|
|
9
|
+
- Built-in support for foundation models (LLMs and embedding models) on [Amazon Bedrock](https://docs.aws.amazon.com/bedrock/).
|
|
10
|
+
- Easily extended to support additional graph and vector stores and model backends.
|
|
11
|
+
- [Multi-tenancy](../docs/lexical-graph/multi-tenancy.md) – multiple separate lexical graphs in the same underlying graph and vector stores.
|
|
12
|
+
- Continuous ingest and [batch extraction](../docs/lexical-graph/batch-extraction.md) (using [Bedrock batch inference](https://docs.aws.amazon.com/bedrock/latest/userguide/batch-inference.html)) modes.
|
|
13
|
+
- [Versioned updates](../docs/lexical-graph/versioned-updates.md) for updating source documents and querying the state of the graph and vector stores at a point in time.
|
|
14
|
+
- Quickstart [AWS CloudFormation templates](../examples/lexical-graph/cloudformation-templates/) for Neptune Database, OpenSearch Serverless, and Amazon Aurora Postgres.
|
|
15
|
+
|
|
16
|
+
## Installation
|
|
17
|
+
|
|
18
|
+
The lexical-graph requires Python 3.10 or greater and [pip](http://www.pip-installer.org/en/latest/).
|
|
19
|
+
|
|
20
|
+
Install from the latest release tag:
|
|
21
|
+
|
|
22
|
+
```
|
|
23
|
+
$ pip install https://github.com/awslabs/graphrag-toolkit/archive/refs/tags/v3.16.2.zip#subdirectory=lexical-graph
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
Or install from the `main` branch to get the latest changes:
|
|
27
|
+
|
|
28
|
+
```
|
|
29
|
+
$ pip install https://github.com/awslabs/graphrag-toolkit/archive/refs/heads/main.zip#subdirectory=lexical-graph
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
If you're running on AWS, you must run your application in an AWS region containing the Amazon Bedrock foundation models used by the lexical graph (see the [configuration](../docs/lexical-graph/configuration.md#graphragconfig) section in the documentation for details on the default models used), and must [enable access](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html) to these models before running any part of the solution.
|
|
33
|
+
|
|
34
|
+
### Additional dependencies
|
|
35
|
+
|
|
36
|
+
You will need to install additional dependencies for specific graph and vector store backends:
|
|
37
|
+
|
|
38
|
+
#### Amazon OpenSearch Serverless
|
|
39
|
+
|
|
40
|
+
```bash
|
|
41
|
+
$ pip install opensearch-py llama-index-vector-stores-opensearch
|
|
42
|
+
```
|
|
43
|
+
|
|
44
|
+
#### Postgres with pgvector
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
$ pip install psycopg2-binary pgvector
|
|
48
|
+
```
|
|
49
|
+
|
|
50
|
+
#### Neo4j
|
|
51
|
+
|
|
52
|
+
``` bash
|
|
53
|
+
$ pip install neo4j
|
|
54
|
+
```
|
|
55
|
+
|
|
56
|
+
### Connection strings
|
|
57
|
+
|
|
58
|
+
Pass a connection string to `GraphStoreFactory.for_graph_store()` or `VectorStoreFactory.for_vector_store()` to select a backend:
|
|
59
|
+
|
|
60
|
+
| Store | Connection string |
|
|
61
|
+
| --- | --- |
|
|
62
|
+
| Neptune Analytics (graph) | `neptune-graph://<graph-id>` |
|
|
63
|
+
| Neptune Database (graph) | `neptune-db://<hostname>` or any hostname ending `.neptune.amazonaws.com` |
|
|
64
|
+
| Neo4j (graph) | `bolt://`, `bolt+ssc://`, `bolt+s://`, `neo4j://`, `neo4j+ssc://`, or `neo4j+s://` URLs |
|
|
65
|
+
| OpenSearch Serverless (vector) | `aoss://<url>` |
|
|
66
|
+
| Neptune Analytics (vector) | `neptune-graph://<graph-id>` |
|
|
67
|
+
| pgvector (vector) | constructed via `PGVectorIndexFactory` |
|
|
68
|
+
| S3 Vectors (vector) | constructed via `S3VectorIndexFactory` |
|
|
69
|
+
| Dummy / no-op | `None` or any unrecognised string — falls back to `DummyGraphStore` / `DummyVectorIndex` |
|
|
70
|
+
|
|
71
|
+
## Example of use
|
|
72
|
+
|
|
73
|
+
### Indexing
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
from graphrag_toolkit.lexical_graph import LexicalGraphIndex
|
|
77
|
+
from graphrag_toolkit.lexical_graph.storage import GraphStoreFactory
|
|
78
|
+
from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory
|
|
79
|
+
|
|
80
|
+
# requires pip install llama-index-readers-web
|
|
81
|
+
from llama_index.readers.web import SimpleWebPageReader
|
|
82
|
+
|
|
83
|
+
def run_extract_and_build():
|
|
84
|
+
|
|
85
|
+
with (
|
|
86
|
+
GraphStoreFactory.for_graph_store(
|
|
87
|
+
'neptune-db://my-graph.cluster-abcdefghijkl.us-east-1.neptune.amazonaws.com'
|
|
88
|
+
) as graph_store,
|
|
89
|
+
VectorStoreFactory.for_vector_store(
|
|
90
|
+
'aoss://https://abcdefghijkl.us-east-1.aoss.amazonaws.com'
|
|
91
|
+
) as vector_store
|
|
92
|
+
):
|
|
93
|
+
|
|
94
|
+
graph_index = LexicalGraphIndex(
|
|
95
|
+
graph_store,
|
|
96
|
+
vector_store
|
|
97
|
+
)
|
|
98
|
+
|
|
99
|
+
doc_urls = [
|
|
100
|
+
'https://docs.aws.amazon.com/neptune/latest/userguide/intro.html',
|
|
101
|
+
'https://docs.aws.amazon.com/neptune-analytics/latest/userguide/what-is-neptune-analytics.html',
|
|
102
|
+
'https://docs.aws.amazon.com/neptune-analytics/latest/userguide/neptune-analytics-features.html',
|
|
103
|
+
'https://docs.aws.amazon.com/neptune-analytics/latest/userguide/neptune-analytics-vs-neptune-database.html'
|
|
104
|
+
]
|
|
105
|
+
|
|
106
|
+
docs = SimpleWebPageReader(
|
|
107
|
+
html_to_text=True,
|
|
108
|
+
metadata_fn=lambda url:{'url': url}
|
|
109
|
+
).load_data(doc_urls)
|
|
110
|
+
|
|
111
|
+
graph_index.extract_and_build(docs, show_progress=True)
|
|
112
|
+
|
|
113
|
+
if __name__ == '__main__':
|
|
114
|
+
run_extract_and_build()
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
### Querying
|
|
118
|
+
|
|
119
|
+
```python
|
|
120
|
+
from graphrag_toolkit.lexical_graph import LexicalGraphQueryEngine
|
|
121
|
+
from graphrag_toolkit.lexical_graph.storage import GraphStoreFactory
|
|
122
|
+
from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory
|
|
123
|
+
|
|
124
|
+
def run_query():
|
|
125
|
+
|
|
126
|
+
with (
|
|
127
|
+
GraphStoreFactory.for_graph_store(
|
|
128
|
+
'neptune-db://my-graph.cluster-abcdefghijkl.us-east-1.neptune.amazonaws.com'
|
|
129
|
+
) as graph_store,
|
|
130
|
+
VectorStoreFactory.for_vector_store(
|
|
131
|
+
'aoss://https://abcdefghijkl.us-east-1.aoss.amazonaws.com'
|
|
132
|
+
) as vector_store
|
|
133
|
+
):
|
|
134
|
+
|
|
135
|
+
query_engine = LexicalGraphQueryEngine.for_traversal_based_search(
|
|
136
|
+
graph_store,
|
|
137
|
+
vector_store
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
response = query_engine.query('''What are the differences between Neptune Database
|
|
141
|
+
and Neptune Analytics?''')
|
|
142
|
+
|
|
143
|
+
print(response.response)
|
|
144
|
+
|
|
145
|
+
if __name__ == '__main__':
|
|
146
|
+
run_query()
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
## Documentation
|
|
150
|
+
|
|
151
|
+
- [Overview](../docs/lexical-graph/overview.md)
|
|
152
|
+
- [Graph Model](../docs/lexical-graph/graph-model.md)
|
|
153
|
+
- [Storage Model](../docs/lexical-graph/storage-model.md)
|
|
154
|
+
- [Indexing](../docs/lexical-graph/indexing.md)
|
|
155
|
+
- [Batch Extraction](../docs/lexical-graph/batch-extraction.md)
|
|
156
|
+
- [Configuring Batch Extraction](../docs/lexical-graph/configuring-batch-extraction.md)
|
|
157
|
+
- [Versioned Updates](../docs/lexical-graph/versioned-updates.md)
|
|
158
|
+
- [Querying](../docs/lexical-graph/querying.md)
|
|
159
|
+
- [Traversal-Based Search](../docs/lexical-graph/traversal-based-search.md)
|
|
160
|
+
- [Traversal-Based Search Configuration](../docs/lexical-graph/traversal-based-search-configuration.md)
|
|
161
|
+
- [Configuration](../docs/lexical-graph/configuration.md)
|
|
162
|
+
- [Security](../docs/lexical-graph/security.md)
|
|
163
|
+
- [FAQ](../docs/lexical-graph/faq.md)
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
## License
|
|
167
|
+
|
|
168
|
+
This project is licensed under the Apache-2.0 License.
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["hatchling", "hatch-requirements-txt"]
|
|
3
|
+
build-backend = "hatchling.build"
|
|
4
|
+
|
|
5
|
+
[tool.hatch.build.targets.wheel]
|
|
6
|
+
packages = ["src/graphrag_toolkit"]
|
|
7
|
+
|
|
8
|
+
[project]
|
|
9
|
+
name = "grt-lexical-graph"
|
|
10
|
+
version = "3.17.2.dev0"
|
|
11
|
+
description = "AWS GraphRAG Toolkit, lexical graph"
|
|
12
|
+
readme = "README.md"
|
|
13
|
+
requires-python = ">=3.10"
|
|
14
|
+
dynamic = ["dependencies"]
|
|
15
|
+
license = "Apache-2.0"
|
|
16
|
+
|
|
17
|
+
[tool.hatch.metadata.hooks.requirements_txt]
|
|
18
|
+
files = ["src/graphrag_toolkit/lexical_graph/requirements.txt"]
|
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
|
|
2
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
3
|
+
|
|
4
|
+
import warnings
|
|
5
|
+
from pydantic.warnings import UnsupportedFieldAttributeWarning
|
|
6
|
+
|
|
7
|
+
warnings.filterwarnings('ignore', category=UnsupportedFieldAttributeWarning)
|
|
8
|
+
warnings.filterwarnings('ignore', message="Can't initialize NVML")
|
|
9
|
+
|
|
10
|
+
import asyncio
|
|
11
|
+
import llama_index.core.async_utils
|
|
12
|
+
import logging as l
|
|
13
|
+
|
|
14
|
+
logger = l.getLogger(__name__)
|
|
15
|
+
|
|
16
|
+
def _asyncio_run(coro):
|
|
17
|
+
|
|
18
|
+
l.debug('Patching asyncio_run() to run coroutine on existing event loop to support notebooks.')
|
|
19
|
+
|
|
20
|
+
try:
|
|
21
|
+
loop = asyncio.get_event_loop()
|
|
22
|
+
|
|
23
|
+
return loop.run_until_complete(coro)
|
|
24
|
+
|
|
25
|
+
except RuntimeError as e:
|
|
26
|
+
try:
|
|
27
|
+
return asyncio.run(coro)
|
|
28
|
+
except RuntimeError as e:
|
|
29
|
+
raise RuntimeError(
|
|
30
|
+
"Detected nested async. Please use nest_asyncio.apply() to allow nested event loops."
|
|
31
|
+
"Or, use async entry methods like `aquery()`, `aretriever`, `achat`, etc."
|
|
32
|
+
)
|
|
33
|
+
|
|
34
|
+
try:
|
|
35
|
+
loop = asyncio.get_event_loop()
|
|
36
|
+
if loop.is_running:
|
|
37
|
+
llama_index.core.async_utils.asyncio_run = _asyncio_run
|
|
38
|
+
except RuntimeError as e:
|
|
39
|
+
pass
|
|
40
|
+
|
|
41
|
+
from .tenant_id import TenantId, DEFAULT_TENANT_ID, DEFAULT_TENANT_NAME, TenantIdType, to_tenant_id
|
|
42
|
+
from .config import GraphRAGConfig as GraphRAGConfig, LLMType, EmbeddingType
|
|
43
|
+
from .errors import ModelError, BatchJobError, IndexError, GraphQueryError, ConfigurationError
|
|
44
|
+
from .logging import set_logging_config, set_advanced_logging_config
|
|
45
|
+
from .lexical_graph_query_engine import LexicalGraphQueryEngine
|
|
46
|
+
from .lexical_graph_index import LexicalGraphIndex
|
|
47
|
+
from .lexical_graph_index import ExtractionConfig, BuildConfig, IndexingConfig
|
|
48
|
+
from .metadata import to_metadata_filter
|
|
49
|
+
from .versioning import add_versioning_info, VersioningConfig, VersioningMode
|
|
50
|
+
from . import utils
|
|
51
|
+
from . import indexing
|
|
52
|
+
from . import retrieval
|
|
53
|
+
from . import storage
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
|