graphrag-chunking 3.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,65 @@
1
+ # Python Artifacts
2
+ python/*/lib/
3
+ dist/
4
+ build/
5
+ *.egg-info/
6
+
7
+ # Test Output
8
+ .coverage
9
+ coverage/
10
+ licenses.txt
11
+ examples_notebooks/*/data
12
+ tests/fixtures/cache
13
+ tests/fixtures/*/cache
14
+ tests/fixtures/*/output
15
+ output/lancedb
16
+
17
+
18
+ # Random
19
+ .DS_Store
20
+ *.log*
21
+ .venv
22
+ venv/
23
+ .conda
24
+ .tmp
25
+ packages/graphrag-llm/notebooks/metrics
26
+ packages/graphrag-llm/notebooks/cache
27
+
28
+ .env
29
+ build.zip
30
+
31
+ .turbo
32
+
33
+ __pycache__
34
+
35
+ .pipeline
36
+
37
+ # Azurite
38
+ temp_azurite/
39
+ __azurite*.json
40
+ __blobstorage*.json
41
+ __blobstorage__/
42
+
43
+ # Getting started example
44
+ ragtest/
45
+ .ragtest/
46
+ .pipelines
47
+ .pipeline
48
+
49
+
50
+ # mkdocs
51
+ site/
52
+
53
+ # Docs migration
54
+ docsite/
55
+ .yarn/
56
+ .pnp*
57
+
58
+ # PyCharm
59
+ .idea/
60
+
61
+ # Jupyter notebook
62
+ .ipynb_checkpoints/
63
+
64
+ # Root build assets
65
+ packages/*/LICENSE
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) Microsoft Corporation.
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE
@@ -0,0 +1,59 @@
1
+ Metadata-Version: 2.4
2
+ Name: graphrag-chunking
3
+ Version: 3.0.0
4
+ Summary: Chunking utilities for GraphRAG
5
+ Project-URL: Source, https://github.com/microsoft/graphrag
6
+ Author: Mónica Carvajal
7
+ Author-email: Alonso Guevara Fernández <alonsog@microsoft.com>, Andrés Morales Esquivel <andresmor@microsoft.com>, Chris Trevino <chtrevin@microsoft.com>, David Tittsworth <datittsw@microsoft.com>, Dayenne de Souza <ddesouza@microsoft.com>, Derek Worthen <deworthe@microsoft.com>, Gaudy Blanco Meneses <gaudyb@microsoft.com>, Ha Trinh <trinhha@microsoft.com>, Jonathan Larson <jolarso@microsoft.com>, Josh Bradley <joshbradley@microsoft.com>, Kate Lytvynets <kalytv@microsoft.com>, Kenny Zhang <zhangken@microsoft.com>, Nathan Evans <naevans@microsoft.com>, Rodrigo Racanicci <rracanicci@microsoft.com>, Sarah Smith <smithsarah@microsoft.com>
8
+ License: MIT
9
+ License-File: LICENSE
10
+ Classifier: Programming Language :: Python :: 3
11
+ Classifier: Programming Language :: Python :: 3.11
12
+ Classifier: Programming Language :: Python :: 3.12
13
+ Classifier: Programming Language :: Python :: 3.13
14
+ Requires-Python: <3.14,>=3.11
15
+ Requires-Dist: graphrag-common==3.0.0
16
+ Requires-Dist: pydantic~=2.10
17
+ Description-Content-Type: text/markdown
18
+
19
+ # GraphRAG Chunking
20
+
21
+ This package contains a collection of text chunkers, a core config model, and a factory for acquiring instances.
22
+
23
+ ## Examples
24
+
25
+ ### Basic sentence chunking with nltk
26
+
27
+ The SentenceChunker class splits text into individual sentences by identifying sentence boundaries. It takes input text and returns a list where each element is a separate sentence, making it easy to process text at the sentence level.
28
+
29
+ ```python
30
+ chunker = SentenceChunker()
31
+ chunks = chunker.chunk("This is a test. Another sentence.")
32
+ print(chunks) # ["This is a test.", "Another sentence."]
33
+ ```
34
+
35
+ ### Token chunking
36
+
37
+ The TokenChunker splits text into fixed-size chunks based on token count rather than sentence boundaries. It uses a tokenizer to encode text into tokens, then creates chunks of a specified size with configurable overlap between chunks.
38
+
39
+ ```python
40
+ tokenizer = tiktoken.get_encoding("o200k_base")
41
+ chunker = TokenChunker(size=3, overlap=0, encode=tokenizer.encode, decode=tokenizer.decode)
42
+ chunks = chunker.chunk("This is a random test fragment of some text")
43
+ print(chunks) # ["This is a", " random test fragment", " of some text"]
44
+ ```
45
+
46
+ ### Using the factory via helper util
47
+
48
+ The create_chunker factory function provides a configuration-driven approach to instantiate chunkers by accepting a ChunkingConfig object that specifies the chunking strategy and parameters. This allows for more flexible and maintainable code by separating chunker configuration from direct instantiation.
49
+
50
+ ```python
51
+ tokenizer = tiktoken.get_encoding("o200k_base")
52
+ config = ChunkingConfig(
53
+ strategy="tokens",
54
+ size=3,
55
+ overlap=0
56
+ )
57
+ chunker = create_chunker(config, tokenizer.encode, tokenizer.decode)
58
+ ...
59
+ ```
@@ -0,0 +1,41 @@
1
+ # GraphRAG Chunking
2
+
3
+ This package contains a collection of text chunkers, a core config model, and a factory for acquiring instances.
4
+
5
+ ## Examples
6
+
7
+ ### Basic sentence chunking with nltk
8
+
9
+ The SentenceChunker class splits text into individual sentences by identifying sentence boundaries. It takes input text and returns a list where each element is a separate sentence, making it easy to process text at the sentence level.
10
+
11
+ ```python
12
+ chunker = SentenceChunker()
13
+ chunks = chunker.chunk("This is a test. Another sentence.")
14
+ print(chunks) # ["This is a test.", "Another sentence."]
15
+ ```
16
+
17
+ ### Token chunking
18
+
19
+ The TokenChunker splits text into fixed-size chunks based on token count rather than sentence boundaries. It uses a tokenizer to encode text into tokens, then creates chunks of a specified size with configurable overlap between chunks.
20
+
21
+ ```python
22
+ tokenizer = tiktoken.get_encoding("o200k_base")
23
+ chunker = TokenChunker(size=3, overlap=0, encode=tokenizer.encode, decode=tokenizer.decode)
24
+ chunks = chunker.chunk("This is a random test fragment of some text")
25
+ print(chunks) # ["This is a", " random test fragment", " of some text"]
26
+ ```
27
+
28
+ ### Using the factory via helper util
29
+
30
+ The create_chunker factory function provides a configuration-driven approach to instantiate chunkers by accepting a ChunkingConfig object that specifies the chunking strategy and parameters. This allows for more flexible and maintainable code by separating chunker configuration from direct instantiation.
31
+
32
+ ```python
33
+ tokenizer = tiktoken.get_encoding("o200k_base")
34
+ config = ChunkingConfig(
35
+ strategy="tokens",
36
+ size=3,
37
+ overlap=0
38
+ )
39
+ chunker = create_chunker(config, tokenizer.encode, tokenizer.decode)
40
+ ...
41
+ ```
@@ -0,0 +1,4 @@
1
+ # Copyright (c) 2024 Microsoft Corporation.
2
+ # Licensed under the MIT License
3
+
4
+ """System-level chunking package."""
@@ -0,0 +1,31 @@
1
+ # Copyright (c) 2024 Microsoft Corporation.
2
+ # Licensed under the MIT License
3
+
4
+ """Bootstrap definition."""
5
+
6
+ import warnings
7
+
8
+ # Ignore warnings from numba
9
+ warnings.filterwarnings("ignore", message=".*The 'nopython' keyword.*")
10
+ warnings.filterwarnings("ignore", message=".*Use no seed for parallelism.*")
11
+
12
+ initialized_nltk = False
13
+
14
+
15
+ def bootstrap():
16
+ """Bootstrap definition."""
17
+ global initialized_nltk
18
+ if not initialized_nltk:
19
+ import nltk
20
+ from nltk.corpus import wordnet as wn
21
+
22
+ nltk.download("punkt")
23
+ nltk.download("punkt_tab")
24
+ nltk.download("averaged_perceptron_tagger")
25
+ nltk.download("averaged_perceptron_tagger_eng")
26
+ nltk.download("maxent_ne_chunker")
27
+ nltk.download("maxent_ne_chunker_tab")
28
+ nltk.download("words")
29
+ nltk.download("wordnet")
30
+ wn.ensure_loaded()
31
+ initialized_nltk = True
@@ -0,0 +1,13 @@
1
+ # Copyright (c) 2024 Microsoft Corporation.
2
+ # Licensed under the MIT License
3
+
4
+ """Chunk strategy type enumeration."""
5
+
6
+ from enum import StrEnum
7
+
8
+
9
+ class ChunkerType(StrEnum):
10
+ """ChunkerType class definition."""
11
+
12
+ Tokens = "tokens"
13
+ Sentence = "sentence"
@@ -0,0 +1,24 @@
1
+ # Copyright (c) 2024 Microsoft Corporation.
2
+ # Licensed under the MIT License
3
+
4
+ """A module containing the 'Chunker' class."""
5
+
6
+ from abc import ABC, abstractmethod
7
+ from collections.abc import Callable
8
+ from typing import Any
9
+
10
+ from graphrag_chunking.text_chunk import TextChunk
11
+
12
+
13
+ class Chunker(ABC):
14
+ """Abstract base class for document chunkers."""
15
+
16
+ @abstractmethod
17
+ def __init__(self, **kwargs: Any) -> None:
18
+ """Create a chunker instance."""
19
+
20
+ @abstractmethod
21
+ def chunk(
22
+ self, text: str, transform: Callable[[str], str] | None = None
23
+ ) -> list[TextChunk]:
24
+ """Chunk method definition."""
@@ -0,0 +1,77 @@
1
+ # Copyright (c) 2024 Microsoft Corporation.
2
+ # Licensed under the MIT License
3
+
4
+ """A module containing 'ChunkerFactory', 'register_chunker', and 'create_chunker'."""
5
+
6
+ from collections.abc import Callable
7
+
8
+ from graphrag_common.factory.factory import Factory, ServiceScope
9
+
10
+ from graphrag_chunking.chunk_strategy_type import ChunkerType
11
+ from graphrag_chunking.chunker import Chunker
12
+ from graphrag_chunking.chunking_config import ChunkingConfig
13
+
14
+
15
+ class ChunkerFactory(Factory[Chunker]):
16
+ """Factory for creating Chunker instances."""
17
+
18
+
19
+ chunker_factory = ChunkerFactory()
20
+
21
+
22
+ def register_chunker(
23
+ chunker_type: str,
24
+ chunker_initializer: Callable[..., Chunker],
25
+ scope: ServiceScope = "transient",
26
+ ) -> None:
27
+ """Register a custom chunker implementation.
28
+
29
+ Args
30
+ ----
31
+ - chunker_type: str
32
+ The chunker id to register.
33
+ - chunker_initializer: Callable[..., Chunker]
34
+ The chunker initializer to register.
35
+ """
36
+ chunker_factory.register(chunker_type, chunker_initializer, scope)
37
+
38
+
39
+ def create_chunker(
40
+ config: ChunkingConfig,
41
+ encode: Callable[[str], list[int]] | None = None,
42
+ decode: Callable[[list[int]], str] | None = None,
43
+ ) -> Chunker:
44
+ """Create a chunker implementation based on the given configuration.
45
+
46
+ Args
47
+ ----
48
+ - config: ChunkingConfig
49
+ The chunker configuration to use.
50
+
51
+ Returns
52
+ -------
53
+ Chunker
54
+ The created chunker implementation.
55
+ """
56
+ config_model = config.model_dump()
57
+ if encode is not None:
58
+ config_model["encode"] = encode
59
+ if decode is not None:
60
+ config_model["decode"] = decode
61
+ chunker_strategy = config.type
62
+
63
+ if chunker_strategy not in chunker_factory:
64
+ match chunker_strategy:
65
+ case ChunkerType.Tokens:
66
+ from graphrag_chunking.token_chunker import TokenChunker
67
+
68
+ register_chunker(ChunkerType.Tokens, TokenChunker)
69
+ case ChunkerType.Sentence:
70
+ from graphrag_chunking.sentence_chunker import SentenceChunker
71
+
72
+ register_chunker(ChunkerType.Sentence, SentenceChunker)
73
+ case _:
74
+ msg = f"ChunkingConfig.strategy '{chunker_strategy}' is not registered in the ChunkerFactory. Registered types: {', '.join(chunker_factory.keys())}."
75
+ raise ValueError(msg)
76
+
77
+ return chunker_factory.create(chunker_strategy, init_args=config_model)
@@ -0,0 +1,36 @@
1
+ # Copyright (c) 2024 Microsoft Corporation.
2
+ # Licensed under the MIT License
3
+
4
+ """Parameterization settings for the default configuration."""
5
+
6
+ from pydantic import BaseModel, ConfigDict, Field
7
+
8
+ from graphrag_chunking.chunk_strategy_type import ChunkerType
9
+
10
+
11
+ class ChunkingConfig(BaseModel):
12
+ """Configuration section for chunking."""
13
+
14
+ model_config = ConfigDict(extra="allow")
15
+ """Allow extra fields to support custom cache implementations."""
16
+
17
+ type: str = Field(
18
+ description="The chunking type to use.",
19
+ default=ChunkerType.Tokens,
20
+ )
21
+ encoding_model: str | None = Field(
22
+ description="The encoding model to use.",
23
+ default=None,
24
+ )
25
+ size: int = Field(
26
+ description="The chunk size to use.",
27
+ default=1200,
28
+ )
29
+ overlap: int = Field(
30
+ description="The chunk overlap to use.",
31
+ default=100,
32
+ )
33
+ prepend_metadata: list[str] | None = Field(
34
+ description="Metadata fields from the source document to prepend on each chunk.",
35
+ default=None,
36
+ )
@@ -0,0 +1,32 @@
1
+ # Copyright (c) 2024 Microsoft Corporation.
2
+ # Licensed under the MIT License
3
+
4
+ """A module containing 'create_chunk_results' function."""
5
+
6
+ from collections.abc import Callable
7
+
8
+ from graphrag_chunking.text_chunk import TextChunk
9
+
10
+
11
+ def create_chunk_results(
12
+ chunks: list[str],
13
+ transform: Callable[[str], str] | None = None,
14
+ encode: Callable[[str], list[int]] | None = None,
15
+ ) -> list[TextChunk]:
16
+ """Create chunk results from a list of text chunks. The index assignments are 0-based and assume chunks were not stripped relative to the source text."""
17
+ results = []
18
+ start_char = 0
19
+ for index, chunk in enumerate(chunks):
20
+ end_char = start_char + len(chunk) - 1 # 0-based indices
21
+ result = TextChunk(
22
+ original=chunk,
23
+ text=transform(chunk) if transform else chunk,
24
+ index=index,
25
+ start_char=start_char,
26
+ end_char=end_char,
27
+ )
28
+ if encode:
29
+ result.token_count = len(encode(result.text))
30
+ results.append(result)
31
+ start_char = end_char + 1
32
+ return results
@@ -0,0 +1,48 @@
1
+ # Copyright (c) 2024 Microsoft Corporation.
2
+ # Licensed under the MIT License
3
+
4
+ """A module containing 'SentenceChunker' class."""
5
+
6
+ from collections.abc import Callable
7
+ from typing import Any
8
+
9
+ import nltk
10
+
11
+ from graphrag_chunking.bootstrap_nltk import bootstrap
12
+ from graphrag_chunking.chunker import Chunker
13
+ from graphrag_chunking.create_chunk_results import create_chunk_results
14
+ from graphrag_chunking.text_chunk import TextChunk
15
+
16
+
17
+ class SentenceChunker(Chunker):
18
+ """A chunker that splits text into sentence-based chunks."""
19
+
20
+ def __init__(
21
+ self, encode: Callable[[str], list[int]] | None = None, **kwargs: Any
22
+ ) -> None:
23
+ """Create a sentence chunker instance."""
24
+ self._encode = encode
25
+ bootstrap()
26
+
27
+ def chunk(
28
+ self, text: str, transform: Callable[[str], str] | None = None
29
+ ) -> list[TextChunk]:
30
+ """Chunk the text into sentence-based chunks."""
31
+ sentences = nltk.sent_tokenize(text.strip())
32
+ results = create_chunk_results(
33
+ sentences, transform=transform, encode=self._encode
34
+ )
35
+ # nltk sentence tokenizer may trim whitespace, so we need to adjust start/end chars
36
+ for index, result in enumerate(results):
37
+ txt = result.text
38
+ start = result.start_char
39
+ actual_start = text.find(txt, start)
40
+ delta = actual_start - start
41
+ if delta > 0:
42
+ result.start_char += delta
43
+ result.end_char += delta
44
+ # bump the next to keep the start check from falling too far behind
45
+ if index < len(results) - 1:
46
+ results[index + 1].start_char += delta
47
+ results[index + 1].end_char += delta
48
+ return results
@@ -0,0 +1,29 @@
1
+ # Copyright (c) 2024 Microsoft Corporation.
2
+ # Licensed under the MIT License
3
+
4
+ """The TextChunk dataclass."""
5
+
6
+ from dataclasses import dataclass
7
+
8
+
9
+ @dataclass
10
+ class TextChunk:
11
+ """Result of chunking a document."""
12
+
13
+ original: str
14
+ """Raw original text chunk before any transformation."""
15
+
16
+ text: str
17
+ """The final text content of this chunk."""
18
+
19
+ index: int
20
+ """Zero-based index of this chunk within the source document."""
21
+
22
+ start_char: int
23
+ """Character index where the raw chunk text begins in the source document."""
24
+
25
+ end_char: int
26
+ """Character index where the raw chunk text ends in the source document."""
27
+
28
+ token_count: int | None = None
29
+ """Number of tokens in the final chunk text, if computed."""
@@ -0,0 +1,69 @@
1
+ # Copyright (c) 2024 Microsoft Corporation.
2
+ # Licensed under the MIT License
3
+
4
+ """A module containing 'TokenChunker' class."""
5
+
6
+ from collections.abc import Callable
7
+ from typing import Any
8
+
9
+ from graphrag_chunking.chunker import Chunker
10
+ from graphrag_chunking.create_chunk_results import create_chunk_results
11
+ from graphrag_chunking.text_chunk import TextChunk
12
+
13
+
14
+ class TokenChunker(Chunker):
15
+ """A chunker that splits text into token-based chunks."""
16
+
17
+ def __init__(
18
+ self,
19
+ size: int,
20
+ overlap: int,
21
+ encode: Callable[[str], list[int]],
22
+ decode: Callable[[list[int]], str],
23
+ **kwargs: Any,
24
+ ) -> None:
25
+ """Create a token chunker instance."""
26
+ self._size = size
27
+ self._overlap = overlap
28
+ self._encode = encode
29
+ self._decode = decode
30
+
31
+ def chunk(
32
+ self, text: str, transform: Callable[[str], str] | None = None
33
+ ) -> list[TextChunk]:
34
+ """Chunk the text into token-based chunks."""
35
+ chunks = split_text_on_tokens(
36
+ text,
37
+ chunk_size=self._size,
38
+ chunk_overlap=self._overlap,
39
+ encode=self._encode,
40
+ decode=self._decode,
41
+ )
42
+ return create_chunk_results(chunks, transform=transform, encode=self._encode)
43
+
44
+
45
+ def split_text_on_tokens(
46
+ text: str,
47
+ chunk_size: int,
48
+ chunk_overlap: int,
49
+ encode: Callable[[str], list[int]],
50
+ decode: Callable[[list[int]], str],
51
+ ) -> list[str]:
52
+ """Split a single text and return chunks using the tokenizer."""
53
+ result = []
54
+ input_tokens = encode(text)
55
+
56
+ start_idx = 0
57
+ cur_idx = min(start_idx + chunk_size, len(input_tokens))
58
+ chunk_tokens = input_tokens[start_idx:cur_idx]
59
+
60
+ while start_idx < len(input_tokens):
61
+ chunk_text = decode(list(chunk_tokens))
62
+ result.append(chunk_text) # Append chunked text as string
63
+ if cur_idx == len(input_tokens):
64
+ break
65
+ start_idx += chunk_size - chunk_overlap
66
+ cur_idx = min(start_idx + chunk_size, len(input_tokens))
67
+ chunk_tokens = input_tokens[start_idx:cur_idx]
68
+
69
+ return result
@@ -0,0 +1,25 @@
1
+ # Copyright (c) 2024 Microsoft Corporation.
2
+ # Licensed under the MIT License
3
+
4
+ """A collection of useful built-in transformers you can use for chunking."""
5
+
6
+ from collections.abc import Callable
7
+ from typing import Any
8
+
9
+
10
+ def add_metadata(
11
+ metadata: dict[str, Any],
12
+ delimiter: str = ": ",
13
+ line_delimiter: str = "\n",
14
+ append: bool = False,
15
+ ) -> Callable[[str], str]:
16
+ """Add metadata to the given text, prepending by default. This utility writes the dict as rows of key/value pairs."""
17
+
18
+ def transformer(text: str) -> str:
19
+ metadata_str = (
20
+ line_delimiter.join(f"{k}{delimiter}{v}" for k, v in metadata.items())
21
+ + line_delimiter
22
+ )
23
+ return text + metadata_str if append else metadata_str + text
24
+
25
+ return transformer
@@ -0,0 +1,43 @@
1
+ [project]
2
+ name = "graphrag-chunking"
3
+ version = "3.0.0"
4
+ description = "Chunking utilities for GraphRAG"
5
+ authors = [
6
+ {name = "Alonso Guevara Fernández", email = "alonsog@microsoft.com"},
7
+ {name = "Andrés Morales Esquivel", email = "andresmor@microsoft.com"},
8
+ {name = "Chris Trevino", email = "chtrevin@microsoft.com"},
9
+ {name = "David Tittsworth", email = "datittsw@microsoft.com"},
10
+ {name = "Dayenne de Souza", email = "ddesouza@microsoft.com"},
11
+ {name = "Derek Worthen", email = "deworthe@microsoft.com"},
12
+ {name = "Gaudy Blanco Meneses", email = "gaudyb@microsoft.com"},
13
+ {name = "Ha Trinh", email = "trinhha@microsoft.com"},
14
+ {name = "Jonathan Larson", email = "jolarso@microsoft.com"},
15
+ {name = "Josh Bradley", email = "joshbradley@microsoft.com"},
16
+ {name = "Kate Lytvynets", email = "kalytv@microsoft.com"},
17
+ {name = "Kenny Zhang", email = "zhangken@microsoft.com"},
18
+ {name = "Mónica Carvajal"},
19
+ {name = "Nathan Evans", email = "naevans@microsoft.com"},
20
+ {name = "Rodrigo Racanicci", email = "rracanicci@microsoft.com"},
21
+ {name = "Sarah Smith", email = "smithsarah@microsoft.com"},
22
+ ]
23
+ license = {text = "MIT"}
24
+ readme = "README.md"
25
+ requires-python = ">=3.11,<3.14"
26
+ classifiers = [
27
+ "Programming Language :: Python :: 3",
28
+ "Programming Language :: Python :: 3.11",
29
+ "Programming Language :: Python :: 3.12",
30
+ "Programming Language :: Python :: 3.13",
31
+ ]
32
+ dependencies = [
33
+ "graphrag-common==3.0.0",
34
+ "pydantic~=2.10",
35
+ ]
36
+
37
+ [project.urls]
38
+ Source = "https://github.com/microsoft/graphrag"
39
+
40
+ [build-system]
41
+ requires = ["hatchling>=1.27.0,<2.0.0"]
42
+ build-backend = "hatchling.build"
43
+