grt-lexical-graph 3.17.2.dev0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (264) hide show
  1. grt_lexical_graph-3.17.2.dev0/.gitignore +33 -0
  2. grt_lexical_graph-3.17.2.dev0/PKG-INFO +190 -0
  3. grt_lexical_graph-3.17.2.dev0/README.md +168 -0
  4. grt_lexical_graph-3.17.2.dev0/pyproject.toml +18 -0
  5. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/__init__.py +56 -0
  6. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/config.py +1277 -0
  7. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/errors.py +17 -0
  8. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/__init__.py +11 -0
  9. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/__init__.py +10 -0
  10. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/build_filter.py +135 -0
  11. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/build_filters.py +118 -0
  12. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/build_pipeline.py +321 -0
  13. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/checkpoint.py +247 -0
  14. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/chunk_graph_builder.py +149 -0
  15. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/chunk_node_builder.py +129 -0
  16. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/delete_sources.py +397 -0
  17. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/entity_graph_builder.py +133 -0
  18. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/entity_relation_graph_builder.py +169 -0
  19. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/fact_graph_builder.py +180 -0
  20. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/graph_batch_client.py +335 -0
  21. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/graph_builder.py +68 -0
  22. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/graph_construction.py +176 -0
  23. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/graph_summary_builder.py +122 -0
  24. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/local_entity_rewrites_graph_builder.py +116 -0
  25. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/node_builder.py +148 -0
  26. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/node_builders.py +189 -0
  27. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/null_builder.py +44 -0
  28. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/source_graph_builder.py +138 -0
  29. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/source_node_builder.py +116 -0
  30. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/statement_graph_builder.py +147 -0
  31. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/statement_node_builder.py +218 -0
  32. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/topic_graph_builder.py +94 -0
  33. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/topic_node_builder.py +198 -0
  34. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/vector_batch_client.py +177 -0
  35. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/vector_indexing.py +168 -0
  36. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/build/version_manager.py +273 -0
  37. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/constants.py +28 -0
  38. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/__init__.py +14 -0
  39. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/batch_config.py +42 -0
  40. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/batch_extractor_base.py +322 -0
  41. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/batch_llm_proposition_extractor_sync.py +93 -0
  42. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/batch_topic_extractor_sync.py +105 -0
  43. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/docs_to_nodes.py +69 -0
  44. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/extraction_pipeline.py +427 -0
  45. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/file_system_tap.py +142 -0
  46. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/id_rewriter.py +182 -0
  47. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/infer_classifications.py +192 -0
  48. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/infer_config.py +35 -0
  49. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/llm_proposition_extractor.py +213 -0
  50. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/pipeline_decorator.py +56 -0
  51. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/preferred_values.py +26 -0
  52. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/proposition_extractor.py +249 -0
  53. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/source_doc_parser.py +63 -0
  54. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/extract/topic_extractor.py +216 -0
  55. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/id_generator.py +173 -0
  56. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/__init__.py +7 -0
  57. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/file_based_docs.py +185 -0
  58. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/json_array_reader.py +175 -0
  59. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/__init__.py +86 -0
  60. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/base_reader_provider.py +51 -0
  61. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/llama_index_reader_provider_base.py +41 -0
  62. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/__init__.py +46 -0
  63. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/advanced_pdf_reader_provider.py +87 -0
  64. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/csv_reader_provider.py +56 -0
  65. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/database_reader_provider.py +64 -0
  66. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/directory_reader_provider.py +57 -0
  67. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/document_graph_reader_provider.py +78 -0
  68. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/docx_reader_provider.py +55 -0
  69. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/github_reader_provider.py +76 -0
  70. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/json_reader_provider.py +67 -0
  71. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/markdown_reader_provider.py +60 -0
  72. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/pdf_reader_provider.py +61 -0
  73. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/pptx_reader_provider.py +55 -0
  74. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/s3_directory_reader_provider.py +73 -0
  75. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/streaming_jsonl_reader_provider.py +310 -0
  76. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/structured_data_reader_provider.py +130 -0
  77. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/universal_directory_reader_provider.py +134 -0
  78. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/web_reader_provider.py +45 -0
  79. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/wikipedia_reader_provider.py +91 -0
  80. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/youtube_reader_provider.py +126 -0
  81. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/pydantic_reader_provider_base.py +55 -0
  82. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/reader_provider_base.py +26 -0
  83. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/reader_provider_config.py +191 -0
  84. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/reader_provider_config_base.py +26 -0
  85. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/s3_file_mixin.py +163 -0
  86. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/readers/validated_reader_provider_base.py +86 -0
  87. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/s3_based_docs.py +518 -0
  88. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/load/source_documents.py +54 -0
  89. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/model.py +263 -0
  90. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/node_handler.py +60 -0
  91. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/prompts.py +232 -0
  92. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/utils/__init__.py +2 -0
  93. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/utils/batch_inference_utils.py +361 -0
  94. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/utils/fact_utils.py +11 -0
  95. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/utils/hash_utils.py +20 -0
  96. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/utils/metadata_utils.py +31 -0
  97. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/utils/pipeline_utils.py +54 -0
  98. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/indexing/utils/topic_utils.py +185 -0
  99. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/lexical_graph_index.py +773 -0
  100. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/lexical_graph_query_engine.py +573 -0
  101. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/logging.py +309 -0
  102. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/metadata.py +451 -0
  103. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/prompts/__init__.py +17 -0
  104. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/prompts/bedrock_prompt_provider.py +123 -0
  105. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/prompts/file_prompt_provider.py +119 -0
  106. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/prompts/prompt_provider_base.py +26 -0
  107. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/prompts/prompt_provider_config.py +232 -0
  108. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/prompts/prompt_provider_config_base.py +30 -0
  109. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/prompts/prompt_provider_factory.py +42 -0
  110. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/prompts/prompt_provider_registry.py +69 -0
  111. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/prompts/s3_prompt_provider.py +95 -0
  112. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/prompts/static_prompt_provider.py +54 -0
  113. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/protocols/__init__.py +9 -0
  114. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/protocols/mcp_server.py +277 -0
  115. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/requirements.txt +16 -0
  116. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/__init__.py +7 -0
  117. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/model.py +270 -0
  118. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/post_processors/__init__.py +9 -0
  119. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/post_processors/bedrock_context_format.py +126 -0
  120. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/post_processors/bge_reranker.py +218 -0
  121. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/post_processors/enrich_source_details.py +194 -0
  122. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/post_processors/reranker_mixin.py +63 -0
  123. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/post_processors/sentence_reranker.py +131 -0
  124. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/post_processors/statement_diversity.py +251 -0
  125. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/post_processors/statement_enhancement.py +150 -0
  126. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/__init__.py +25 -0
  127. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/clear_chunks.py +64 -0
  128. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/clear_scores.py +54 -0
  129. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/clear_topic_ids.py +26 -0
  130. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/dedup_results.py +97 -0
  131. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/disaggregate_results.py +69 -0
  132. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/filter_by_metadata.py +63 -0
  133. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/format_sources.py +188 -0
  134. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/populate_statement_strs.py +97 -0
  135. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/processor_args.py +121 -0
  136. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/processor_base.py +217 -0
  137. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/prune_results.py +57 -0
  138. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/prune_statements.py +50 -0
  139. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/remove_versioning_metadata.py +26 -0
  140. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/rerank_statements.py +318 -0
  141. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/rescore_results.py +86 -0
  142. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/simplify_single_topic_results.py +79 -0
  143. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/sort_results.py +63 -0
  144. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/statements_to_strings.py +109 -0
  145. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/truncate_results.py +56 -0
  146. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/truncate_statements.py +87 -0
  147. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/update_chunk_metadata.py +29 -0
  148. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/processors/zero_scores.py +83 -0
  149. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/prompts.py +161 -0
  150. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/query_context/__init__.py +12 -0
  151. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/query_context/entity_context_provider.py +366 -0
  152. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/query_context/entity_from_top_statement_provider.py +120 -0
  153. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/query_context/entity_provider.py +133 -0
  154. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/query_context/entity_provider_base.py +45 -0
  155. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/query_context/entity_vss_provider.py +118 -0
  156. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/query_context/keyword_nlp_provider.py +37 -0
  157. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/query_context/keyword_provider.py +81 -0
  158. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/query_context/keyword_provider_base.py +32 -0
  159. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/query_context/keyword_vss_provider.py +168 -0
  160. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/query_context/pass_thru_keyword_provider.py +18 -0
  161. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/query_context/query_mode.py +56 -0
  162. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/__init__.py +19 -0
  163. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/chunk_based_search.py +189 -0
  164. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/chunk_based_semantic_search.py +237 -0
  165. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/chunk_cosine_search.py +93 -0
  166. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/composite_traversal_based_retriever.py +239 -0
  167. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/entity_based_search.py +262 -0
  168. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/entity_context_search.py +153 -0
  169. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/entity_network_search.py +172 -0
  170. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/keyword_ranking_search.py +265 -0
  171. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/query_mode_retriever.py +68 -0
  172. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/rerank_beam_search.py +391 -0
  173. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/semantic_beam_search.py +249 -0
  174. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/semantic_chunk_beam_search.py +211 -0
  175. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/semantic_guided_base_chunk_retriever.py +36 -0
  176. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/semantic_guided_base_retriever.py +91 -0
  177. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/semantic_guided_chunk_retriever.py +229 -0
  178. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/semantic_guided_retriever.py +262 -0
  179. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/statement_cosine_seach.py +123 -0
  180. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/topic_based_search.py +197 -0
  181. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/retrievers/traversal_based_base_retriever.py +383 -0
  182. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/summary/__init__.py +4 -0
  183. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/summary/graph_summary.py +173 -0
  184. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/utils/__init__.py +2 -0
  185. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/utils/chunk_utils.py +116 -0
  186. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/utils/entity_utils.py +85 -0
  187. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/utils/query_decomposition.py +129 -0
  188. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/utils/statement_utils.py +286 -0
  189. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/retrieval/utils/vector_utils.py +68 -0
  190. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/__init__.py +6 -0
  191. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/constants.py +18 -0
  192. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/graph/__init__.py +8 -0
  193. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/graph/dummy_graph_store.py +84 -0
  194. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/graph/graph_store.py +537 -0
  195. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/graph/graph_store_factory_method.py +36 -0
  196. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/graph/graph_utils.py +306 -0
  197. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/graph/multi_tenant_graph_store.py +164 -0
  198. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/graph/neo4j_graph_store.py +183 -0
  199. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/graph/neo4j_graph_store_factory.py +26 -0
  200. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/graph/neptune_graph_stores.py +547 -0
  201. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/graph/query_tree.py +66 -0
  202. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/graph_store_factory.py +101 -0
  203. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/__init__.py +9 -0
  204. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/dummy_vector_index.py +124 -0
  205. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/multi_tenant_vector_store.py +72 -0
  206. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/neptune_vector_indexes.py +296 -0
  207. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/opensearch_vector_index_factory.py +59 -0
  208. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/opensearch_vector_indexes.py +1174 -0
  209. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/pg_vector_index_factory.py +50 -0
  210. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/pg_vector_indexes.py +742 -0
  211. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/read_only_vector_store.py +79 -0
  212. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/repair_opensearch_vector_store.py +210 -0
  213. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/s3_vector_index_factory.py +47 -0
  214. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/s3_vector_indexes.py +651 -0
  215. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/vector_index.py +187 -0
  216. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/vector_index_factory_method.py +26 -0
  217. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector/vector_store.py +79 -0
  218. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/storage/vector_store_factory.py +122 -0
  219. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/tenant_id.py +195 -0
  220. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/utils/__init__.py +4 -0
  221. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/utils/arg_utils.py +7 -0
  222. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/utils/bedrock_utils.py +290 -0
  223. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/utils/fm_observability.py +964 -0
  224. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/utils/io_utils.py +120 -0
  225. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/utils/llm_cache.py +164 -0
  226. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/utils/reranker_utils.py +62 -0
  227. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/versioning.py +125 -0
  228. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/visualisation/__init__.py +8 -0
  229. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/visualisation/graph_notebook/__init__.py +4 -0
  230. grt_lexical_graph-3.17.2.dev0/src/graphrag_toolkit/lexical_graph/visualisation/graph_notebook/graph_notebook_visualisation.py +590 -0
  231. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/conftest.py +43 -0
  232. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/__init__.py +2 -0
  233. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/build_tests/__init__.py +2 -0
  234. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/build_tests/test_node_builder.py +56 -0
  235. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_advanced_pdf_reader_provider.py +18 -0
  236. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_csv_reader_provider.py +13 -0
  237. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_database_reader_provider.py +13 -0
  238. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_docx_reader_provider.py +13 -0
  239. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_github_reader_provider.py +13 -0
  240. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_json_reader_provider.py +13 -0
  241. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_markdown_reader_provider.py +13 -0
  242. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_pdf_reader_provider.py +18 -0
  243. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_pptx_reader_provider.py +13 -0
  244. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_s3_directory_reader_provider.py +13 -0
  245. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_streaming_jsonl_reader_property.py +1340 -0
  246. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_streaming_jsonl_reader_provider.py +390 -0
  247. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_web_reader_provider.py +13 -0
  248. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_wikipedia_reader_provider.py +13 -0
  249. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/load/readers/providers/test_youtube_reader_provider.py +13 -0
  250. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/test_id_generator.py +493 -0
  251. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/utils/__init__.py +2 -0
  252. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/utils/test_batch_inference_utils.py +215 -0
  253. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/utils/test_fact_utils.py +52 -0
  254. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/utils/test_hash_utils.py +75 -0
  255. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/utils/test_metadata_utils.py +100 -0
  256. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/indexing/utils/test_topic_utils.py +517 -0
  257. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/storage/vector/test_s3_vector_index_factory.py +52 -0
  258. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/test_metadata.py +12 -0
  259. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/test_tenant_id.py +170 -0
  260. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/utils/__init__.py +2 -0
  261. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/utils/test_arg_utils.py +11 -0
  262. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/utils/test_bedrock_embedding_empty_text.py +193 -0
  263. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/utils/test_io_utils.py +88 -0
  264. grt_lexical_graph-3.17.2.dev0/tests/graphrag_toolkit/lexical_graph/utils/test_reranker_utils.py +84 -0
@@ -0,0 +1,33 @@
1
+ # ignore virtual environment directories
2
+ .venv/
3
+
4
+ # ignore generated files and directories from version control
5
+ /examples/lexical-graph/notebooks/extracted/
6
+ /examples/lexical-graph/notebooks/output/
7
+ /examples/lexical-graph-local-dev/notebooks/output/
8
+ /examples/lexical-graph-hybrid-dev/notebooks/output/
9
+ /examples/lexical-graph-hybrid-dev/notebooks/extracted/
10
+
11
+ # ignore IDE files
12
+ .vscode/
13
+ .idea/
14
+
15
+ # Jupyter Notebook checkpoints
16
+ .ipynb_checkpoints/
17
+
18
+ # cache files
19
+ __pycache__/
20
+
21
+ # Byte-compiled / optimized / DLL files
22
+ *.pyc
23
+
24
+ # hypothesis artifacts
25
+ .hypothesis/
26
+
27
+ # coverage artifact
28
+ .coverage
29
+
30
+ # kiro config
31
+ .config.kiro
32
+ /temp/
33
+ /temp/
@@ -0,0 +1,190 @@
1
+ Metadata-Version: 2.4
2
+ Name: grt-lexical-graph
3
+ Version: 3.17.2.dev0
4
+ Summary: AWS GraphRAG Toolkit, lexical graph
5
+ License-Expression: Apache-2.0
6
+ Requires-Python: >=3.10
7
+ Requires-Dist: anthropic-bedrock==0.8.0
8
+ Requires-Dist: boto3>=1.40.61
9
+ Requires-Dist: botocore>=1.40.61
10
+ Requires-Dist: json2xml==5.2.0
11
+ Requires-Dist: llama-index-core==0.14.17
12
+ Requires-Dist: llama-index-embeddings-bedrock==0.8.0
13
+ Requires-Dist: llama-index-llms-anthropic==0.11.0
14
+ Requires-Dist: llama-index-llms-bedrock-converse==0.14.0
15
+ Requires-Dist: lru-dict==1.3.0
16
+ Requires-Dist: pipe==2.2
17
+ Requires-Dist: python-dotenv==1.1.1
18
+ Requires-Dist: smart-open==7.1.0
19
+ Requires-Dist: spacy==3.8.7
20
+ Requires-Dist: tfidf-matcher==0.3.0
21
+ Description-Content-Type: text/markdown
22
+
23
+ ## Lexical Graph
24
+
25
+ The lexical-graph package provides a framework for automating the construction of a [hierarchical lexical graph](../docs/lexical-graph/graph-model.md) from unstructured data, and composing question-answering strategies that query this graph when answering user questions.
26
+
27
+ ### Features
28
+
29
+ - Built-in graph store support for [Amazon Neptune Analytics](https://docs.aws.amazon.com/neptune-analytics/latest/userguide/what-is-neptune-analytics.html), [Amazon Neptune Database](https://docs.aws.amazon.com/neptune/latest/userguide/intro.html), and [Neo4j](https://neo4j.com/docs/).
30
+ - Built-in vector store support for Neptune Analytics, [Amazon OpenSearch Serverless](https://docs.aws.amazon.com/opensearch-service/latest/developerguide/serverless.html), [Amazon S3 Vectors](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-vectors.html) and Postgres with the pgvector extension.
31
+ - Built-in support for foundation models (LLMs and embedding models) on [Amazon Bedrock](https://docs.aws.amazon.com/bedrock/).
32
+ - Easily extended to support additional graph and vector stores and model backends.
33
+ - [Multi-tenancy](../docs/lexical-graph/multi-tenancy.md) – multiple separate lexical graphs in the same underlying graph and vector stores.
34
+ - Continuous ingest and [batch extraction](../docs/lexical-graph/batch-extraction.md) (using [Bedrock batch inference](https://docs.aws.amazon.com/bedrock/latest/userguide/batch-inference.html)) modes.
35
+ - [Versioned updates](../docs/lexical-graph/versioned-updates.md) for updating source documents and querying the state of the graph and vector stores at a point in time.
36
+ - Quickstart [AWS CloudFormation templates](../examples/lexical-graph/cloudformation-templates/) for Neptune Database, OpenSearch Serverless, and Amazon Aurora Postgres.
37
+
38
+ ## Installation
39
+
40
+ The lexical-graph requires Python 3.10 or greater and [pip](http://www.pip-installer.org/en/latest/).
41
+
42
+ Install from the latest release tag:
43
+
44
+ ```
45
+ $ pip install https://github.com/awslabs/graphrag-toolkit/archive/refs/tags/v3.16.2.zip#subdirectory=lexical-graph
46
+ ```
47
+
48
+ Or install from the `main` branch to get the latest changes:
49
+
50
+ ```
51
+ $ pip install https://github.com/awslabs/graphrag-toolkit/archive/refs/heads/main.zip#subdirectory=lexical-graph
52
+ ```
53
+
54
+ If you're running on AWS, you must run your application in an AWS region containing the Amazon Bedrock foundation models used by the lexical graph (see the [configuration](../docs/lexical-graph/configuration.md#graphragconfig) section in the documentation for details on the default models used), and must [enable access](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html) to these models before running any part of the solution.
55
+
56
+ ### Additional dependencies
57
+
58
+ You will need to install additional dependencies for specific graph and vector store backends:
59
+
60
+ #### Amazon OpenSearch Serverless
61
+
62
+ ```bash
63
+ $ pip install opensearch-py llama-index-vector-stores-opensearch
64
+ ```
65
+
66
+ #### Postgres with pgvector
67
+
68
+ ```bash
69
+ $ pip install psycopg2-binary pgvector
70
+ ```
71
+
72
+ #### Neo4j
73
+
74
+ ``` bash
75
+ $ pip install neo4j
76
+ ```
77
+
78
+ ### Connection strings
79
+
80
+ Pass a connection string to `GraphStoreFactory.for_graph_store()` or `VectorStoreFactory.for_vector_store()` to select a backend:
81
+
82
+ | Store | Connection string |
83
+ | --- | --- |
84
+ | Neptune Analytics (graph) | `neptune-graph://<graph-id>` |
85
+ | Neptune Database (graph) | `neptune-db://<hostname>` or any hostname ending `.neptune.amazonaws.com` |
86
+ | Neo4j (graph) | `bolt://`, `bolt+ssc://`, `bolt+s://`, `neo4j://`, `neo4j+ssc://`, or `neo4j+s://` URLs |
87
+ | OpenSearch Serverless (vector) | `aoss://<url>` |
88
+ | Neptune Analytics (vector) | `neptune-graph://<graph-id>` |
89
+ | pgvector (vector) | constructed via `PGVectorIndexFactory` |
90
+ | S3 Vectors (vector) | constructed via `S3VectorIndexFactory` |
91
+ | Dummy / no-op | `None` or any unrecognised string — falls back to `DummyGraphStore` / `DummyVectorIndex` |
92
+
93
+ ## Example of use
94
+
95
+ ### Indexing
96
+
97
+ ```python
98
+ from graphrag_toolkit.lexical_graph import LexicalGraphIndex
99
+ from graphrag_toolkit.lexical_graph.storage import GraphStoreFactory
100
+ from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory
101
+
102
+ # requires pip install llama-index-readers-web
103
+ from llama_index.readers.web import SimpleWebPageReader
104
+
105
+ def run_extract_and_build():
106
+
107
+ with (
108
+ GraphStoreFactory.for_graph_store(
109
+ 'neptune-db://my-graph.cluster-abcdefghijkl.us-east-1.neptune.amazonaws.com'
110
+ ) as graph_store,
111
+ VectorStoreFactory.for_vector_store(
112
+ 'aoss://https://abcdefghijkl.us-east-1.aoss.amazonaws.com'
113
+ ) as vector_store
114
+ ):
115
+
116
+ graph_index = LexicalGraphIndex(
117
+ graph_store,
118
+ vector_store
119
+ )
120
+
121
+ doc_urls = [
122
+ 'https://docs.aws.amazon.com/neptune/latest/userguide/intro.html',
123
+ 'https://docs.aws.amazon.com/neptune-analytics/latest/userguide/what-is-neptune-analytics.html',
124
+ 'https://docs.aws.amazon.com/neptune-analytics/latest/userguide/neptune-analytics-features.html',
125
+ 'https://docs.aws.amazon.com/neptune-analytics/latest/userguide/neptune-analytics-vs-neptune-database.html'
126
+ ]
127
+
128
+ docs = SimpleWebPageReader(
129
+ html_to_text=True,
130
+ metadata_fn=lambda url:{'url': url}
131
+ ).load_data(doc_urls)
132
+
133
+ graph_index.extract_and_build(docs, show_progress=True)
134
+
135
+ if __name__ == '__main__':
136
+ run_extract_and_build()
137
+ ```
138
+
139
+ ### Querying
140
+
141
+ ```python
142
+ from graphrag_toolkit.lexical_graph import LexicalGraphQueryEngine
143
+ from graphrag_toolkit.lexical_graph.storage import GraphStoreFactory
144
+ from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory
145
+
146
+ def run_query():
147
+
148
+ with (
149
+ GraphStoreFactory.for_graph_store(
150
+ 'neptune-db://my-graph.cluster-abcdefghijkl.us-east-1.neptune.amazonaws.com'
151
+ ) as graph_store,
152
+ VectorStoreFactory.for_vector_store(
153
+ 'aoss://https://abcdefghijkl.us-east-1.aoss.amazonaws.com'
154
+ ) as vector_store
155
+ ):
156
+
157
+ query_engine = LexicalGraphQueryEngine.for_traversal_based_search(
158
+ graph_store,
159
+ vector_store
160
+ )
161
+
162
+ response = query_engine.query('''What are the differences between Neptune Database
163
+ and Neptune Analytics?''')
164
+
165
+ print(response.response)
166
+
167
+ if __name__ == '__main__':
168
+ run_query()
169
+ ```
170
+
171
+ ## Documentation
172
+
173
+ - [Overview](../docs/lexical-graph/overview.md)
174
+ - [Graph Model](../docs/lexical-graph/graph-model.md)
175
+ - [Storage Model](../docs/lexical-graph/storage-model.md)
176
+ - [Indexing](../docs/lexical-graph/indexing.md)
177
+ - [Batch Extraction](../docs/lexical-graph/batch-extraction.md)
178
+ - [Configuring Batch Extraction](../docs/lexical-graph/configuring-batch-extraction.md)
179
+ - [Versioned Updates](../docs/lexical-graph/versioned-updates.md)
180
+ - [Querying](../docs/lexical-graph/querying.md)
181
+ - [Traversal-Based Search](../docs/lexical-graph/traversal-based-search.md)
182
+ - [Traversal-Based Search Configuration](../docs/lexical-graph/traversal-based-search-configuration.md)
183
+ - [Configuration](../docs/lexical-graph/configuration.md)
184
+ - [Security](../docs/lexical-graph/security.md)
185
+ - [FAQ](../docs/lexical-graph/faq.md)
186
+
187
+
188
+ ## License
189
+
190
+ This project is licensed under the Apache-2.0 License.
@@ -0,0 +1,168 @@
1
+ ## Lexical Graph
2
+
3
+ The lexical-graph package provides a framework for automating the construction of a [hierarchical lexical graph](../docs/lexical-graph/graph-model.md) from unstructured data, and composing question-answering strategies that query this graph when answering user questions.
4
+
5
+ ### Features
6
+
7
+ - Built-in graph store support for [Amazon Neptune Analytics](https://docs.aws.amazon.com/neptune-analytics/latest/userguide/what-is-neptune-analytics.html), [Amazon Neptune Database](https://docs.aws.amazon.com/neptune/latest/userguide/intro.html), and [Neo4j](https://neo4j.com/docs/).
8
+ - Built-in vector store support for Neptune Analytics, [Amazon OpenSearch Serverless](https://docs.aws.amazon.com/opensearch-service/latest/developerguide/serverless.html), [Amazon S3 Vectors](https://docs.aws.amazon.com/AmazonS3/latest/userguide/s3-vectors.html) and Postgres with the pgvector extension.
9
+ - Built-in support for foundation models (LLMs and embedding models) on [Amazon Bedrock](https://docs.aws.amazon.com/bedrock/).
10
+ - Easily extended to support additional graph and vector stores and model backends.
11
+ - [Multi-tenancy](../docs/lexical-graph/multi-tenancy.md) – multiple separate lexical graphs in the same underlying graph and vector stores.
12
+ - Continuous ingest and [batch extraction](../docs/lexical-graph/batch-extraction.md) (using [Bedrock batch inference](https://docs.aws.amazon.com/bedrock/latest/userguide/batch-inference.html)) modes.
13
+ - [Versioned updates](../docs/lexical-graph/versioned-updates.md) for updating source documents and querying the state of the graph and vector stores at a point in time.
14
+ - Quickstart [AWS CloudFormation templates](../examples/lexical-graph/cloudformation-templates/) for Neptune Database, OpenSearch Serverless, and Amazon Aurora Postgres.
15
+
16
+ ## Installation
17
+
18
+ The lexical-graph requires Python 3.10 or greater and [pip](http://www.pip-installer.org/en/latest/).
19
+
20
+ Install from the latest release tag:
21
+
22
+ ```
23
+ $ pip install https://github.com/awslabs/graphrag-toolkit/archive/refs/tags/v3.16.2.zip#subdirectory=lexical-graph
24
+ ```
25
+
26
+ Or install from the `main` branch to get the latest changes:
27
+
28
+ ```
29
+ $ pip install https://github.com/awslabs/graphrag-toolkit/archive/refs/heads/main.zip#subdirectory=lexical-graph
30
+ ```
31
+
32
+ If you're running on AWS, you must run your application in an AWS region containing the Amazon Bedrock foundation models used by the lexical graph (see the [configuration](../docs/lexical-graph/configuration.md#graphragconfig) section in the documentation for details on the default models used), and must [enable access](https://docs.aws.amazon.com/bedrock/latest/userguide/model-access.html) to these models before running any part of the solution.
33
+
34
+ ### Additional dependencies
35
+
36
+ You will need to install additional dependencies for specific graph and vector store backends:
37
+
38
+ #### Amazon OpenSearch Serverless
39
+
40
+ ```bash
41
+ $ pip install opensearch-py llama-index-vector-stores-opensearch
42
+ ```
43
+
44
+ #### Postgres with pgvector
45
+
46
+ ```bash
47
+ $ pip install psycopg2-binary pgvector
48
+ ```
49
+
50
+ #### Neo4j
51
+
52
+ ``` bash
53
+ $ pip install neo4j
54
+ ```
55
+
56
+ ### Connection strings
57
+
58
+ Pass a connection string to `GraphStoreFactory.for_graph_store()` or `VectorStoreFactory.for_vector_store()` to select a backend:
59
+
60
+ | Store | Connection string |
61
+ | --- | --- |
62
+ | Neptune Analytics (graph) | `neptune-graph://<graph-id>` |
63
+ | Neptune Database (graph) | `neptune-db://<hostname>` or any hostname ending `.neptune.amazonaws.com` |
64
+ | Neo4j (graph) | `bolt://`, `bolt+ssc://`, `bolt+s://`, `neo4j://`, `neo4j+ssc://`, or `neo4j+s://` URLs |
65
+ | OpenSearch Serverless (vector) | `aoss://<url>` |
66
+ | Neptune Analytics (vector) | `neptune-graph://<graph-id>` |
67
+ | pgvector (vector) | constructed via `PGVectorIndexFactory` |
68
+ | S3 Vectors (vector) | constructed via `S3VectorIndexFactory` |
69
+ | Dummy / no-op | `None` or any unrecognised string — falls back to `DummyGraphStore` / `DummyVectorIndex` |
70
+
71
+ ## Example of use
72
+
73
+ ### Indexing
74
+
75
+ ```python
76
+ from graphrag_toolkit.lexical_graph import LexicalGraphIndex
77
+ from graphrag_toolkit.lexical_graph.storage import GraphStoreFactory
78
+ from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory
79
+
80
+ # requires pip install llama-index-readers-web
81
+ from llama_index.readers.web import SimpleWebPageReader
82
+
83
+ def run_extract_and_build():
84
+
85
+ with (
86
+ GraphStoreFactory.for_graph_store(
87
+ 'neptune-db://my-graph.cluster-abcdefghijkl.us-east-1.neptune.amazonaws.com'
88
+ ) as graph_store,
89
+ VectorStoreFactory.for_vector_store(
90
+ 'aoss://https://abcdefghijkl.us-east-1.aoss.amazonaws.com'
91
+ ) as vector_store
92
+ ):
93
+
94
+ graph_index = LexicalGraphIndex(
95
+ graph_store,
96
+ vector_store
97
+ )
98
+
99
+ doc_urls = [
100
+ 'https://docs.aws.amazon.com/neptune/latest/userguide/intro.html',
101
+ 'https://docs.aws.amazon.com/neptune-analytics/latest/userguide/what-is-neptune-analytics.html',
102
+ 'https://docs.aws.amazon.com/neptune-analytics/latest/userguide/neptune-analytics-features.html',
103
+ 'https://docs.aws.amazon.com/neptune-analytics/latest/userguide/neptune-analytics-vs-neptune-database.html'
104
+ ]
105
+
106
+ docs = SimpleWebPageReader(
107
+ html_to_text=True,
108
+ metadata_fn=lambda url:{'url': url}
109
+ ).load_data(doc_urls)
110
+
111
+ graph_index.extract_and_build(docs, show_progress=True)
112
+
113
+ if __name__ == '__main__':
114
+ run_extract_and_build()
115
+ ```
116
+
117
+ ### Querying
118
+
119
+ ```python
120
+ from graphrag_toolkit.lexical_graph import LexicalGraphQueryEngine
121
+ from graphrag_toolkit.lexical_graph.storage import GraphStoreFactory
122
+ from graphrag_toolkit.lexical_graph.storage import VectorStoreFactory
123
+
124
+ def run_query():
125
+
126
+ with (
127
+ GraphStoreFactory.for_graph_store(
128
+ 'neptune-db://my-graph.cluster-abcdefghijkl.us-east-1.neptune.amazonaws.com'
129
+ ) as graph_store,
130
+ VectorStoreFactory.for_vector_store(
131
+ 'aoss://https://abcdefghijkl.us-east-1.aoss.amazonaws.com'
132
+ ) as vector_store
133
+ ):
134
+
135
+ query_engine = LexicalGraphQueryEngine.for_traversal_based_search(
136
+ graph_store,
137
+ vector_store
138
+ )
139
+
140
+ response = query_engine.query('''What are the differences between Neptune Database
141
+ and Neptune Analytics?''')
142
+
143
+ print(response.response)
144
+
145
+ if __name__ == '__main__':
146
+ run_query()
147
+ ```
148
+
149
+ ## Documentation
150
+
151
+ - [Overview](../docs/lexical-graph/overview.md)
152
+ - [Graph Model](../docs/lexical-graph/graph-model.md)
153
+ - [Storage Model](../docs/lexical-graph/storage-model.md)
154
+ - [Indexing](../docs/lexical-graph/indexing.md)
155
+ - [Batch Extraction](../docs/lexical-graph/batch-extraction.md)
156
+ - [Configuring Batch Extraction](../docs/lexical-graph/configuring-batch-extraction.md)
157
+ - [Versioned Updates](../docs/lexical-graph/versioned-updates.md)
158
+ - [Querying](../docs/lexical-graph/querying.md)
159
+ - [Traversal-Based Search](../docs/lexical-graph/traversal-based-search.md)
160
+ - [Traversal-Based Search Configuration](../docs/lexical-graph/traversal-based-search-configuration.md)
161
+ - [Configuration](../docs/lexical-graph/configuration.md)
162
+ - [Security](../docs/lexical-graph/security.md)
163
+ - [FAQ](../docs/lexical-graph/faq.md)
164
+
165
+
166
+ ## License
167
+
168
+ This project is licensed under the Apache-2.0 License.
@@ -0,0 +1,18 @@
1
+ [build-system]
2
+ requires = ["hatchling", "hatch-requirements-txt"]
3
+ build-backend = "hatchling.build"
4
+
5
+ [tool.hatch.build.targets.wheel]
6
+ packages = ["src/graphrag_toolkit"]
7
+
8
+ [project]
9
+ name = "grt-lexical-graph"
10
+ version = "3.17.2.dev0"
11
+ description = "AWS GraphRAG Toolkit, lexical graph"
12
+ readme = "README.md"
13
+ requires-python = ">=3.10"
14
+ dynamic = ["dependencies"]
15
+ license = "Apache-2.0"
16
+
17
+ [tool.hatch.metadata.hooks.requirements_txt]
18
+ files = ["src/graphrag_toolkit/lexical_graph/requirements.txt"]
@@ -0,0 +1,56 @@
1
+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
+ # SPDX-License-Identifier: Apache-2.0
3
+
4
+ import warnings
5
+ from pydantic.warnings import UnsupportedFieldAttributeWarning
6
+
7
+ warnings.filterwarnings('ignore', category=UnsupportedFieldAttributeWarning)
8
+ warnings.filterwarnings('ignore', message="Can't initialize NVML")
9
+
10
+ import asyncio
11
+ import llama_index.core.async_utils
12
+ import logging as l
13
+
14
+ logger = l.getLogger(__name__)
15
+
16
+ def _asyncio_run(coro):
17
+
18
+ l.debug('Patching asyncio_run() to run coroutine on existing event loop to support notebooks.')
19
+
20
+ try:
21
+ loop = asyncio.get_event_loop()
22
+
23
+ return loop.run_until_complete(coro)
24
+
25
+ except RuntimeError as e:
26
+ try:
27
+ return asyncio.run(coro)
28
+ except RuntimeError as e:
29
+ raise RuntimeError(
30
+ "Detected nested async. Please use nest_asyncio.apply() to allow nested event loops."
31
+ "Or, use async entry methods like `aquery()`, `aretriever`, `achat`, etc."
32
+ )
33
+
34
+ try:
35
+ loop = asyncio.get_event_loop()
36
+ if loop.is_running:
37
+ llama_index.core.async_utils.asyncio_run = _asyncio_run
38
+ except RuntimeError as e:
39
+ pass
40
+
41
+ from .tenant_id import TenantId, DEFAULT_TENANT_ID, DEFAULT_TENANT_NAME, TenantIdType, to_tenant_id
42
+ from .config import GraphRAGConfig as GraphRAGConfig, LLMType, EmbeddingType
43
+ from .errors import ModelError, BatchJobError, IndexError, GraphQueryError, ConfigurationError
44
+ from .logging import set_logging_config, set_advanced_logging_config
45
+ from .lexical_graph_query_engine import LexicalGraphQueryEngine
46
+ from .lexical_graph_index import LexicalGraphIndex
47
+ from .lexical_graph_index import ExtractionConfig, BuildConfig, IndexingConfig
48
+ from .metadata import to_metadata_filter
49
+ from .versioning import add_versioning_info, VersioningConfig, VersioningMode
50
+ from . import utils
51
+ from . import indexing
52
+ from . import retrieval
53
+ from . import storage
54
+
55
+
56
+