cognee 0.3.8__py3-none-any.whl → 0.3.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- cognee/infrastructure/files/utils/get_file_metadata.py +3 -3
- cognee/infrastructure/files/utils/guess_file_type.py +19 -5
- cognee/infrastructure/loaders/core/audio_loader.py +1 -0
- cognee/modules/ingestion/data_types/BinaryData.py +1 -1
- {cognee-0.3.8.dist-info → cognee-0.3.9.dist-info}/METADATA +1 -1
- {cognee-0.3.8.dist-info → cognee-0.3.9.dist-info}/RECORD +10 -10
- {cognee-0.3.8.dist-info → cognee-0.3.9.dist-info}/WHEEL +0 -0
- {cognee-0.3.8.dist-info → cognee-0.3.9.dist-info}/entry_points.txt +0 -0
- {cognee-0.3.8.dist-info → cognee-0.3.9.dist-info}/licenses/LICENSE +0 -0
- {cognee-0.3.8.dist-info → cognee-0.3.9.dist-info}/licenses/NOTICE.md +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import io
|
|
2
2
|
import os.path
|
|
3
|
-
from typing import BinaryIO, TypedDict
|
|
3
|
+
from typing import BinaryIO, TypedDict, Optional
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
|
|
6
6
|
from cognee.shared.logging_utils import get_logger
|
|
@@ -27,7 +27,7 @@ class FileMetadata(TypedDict):
|
|
|
27
27
|
file_size: int
|
|
28
28
|
|
|
29
29
|
|
|
30
|
-
async def get_file_metadata(file: BinaryIO) -> FileMetadata:
|
|
30
|
+
async def get_file_metadata(file: BinaryIO, name: Optional[str] = None) -> FileMetadata:
|
|
31
31
|
"""
|
|
32
32
|
Retrieve metadata from a file object.
|
|
33
33
|
|
|
@@ -53,7 +53,7 @@ async def get_file_metadata(file: BinaryIO) -> FileMetadata:
|
|
|
53
53
|
except io.UnsupportedOperation as error:
|
|
54
54
|
logger.error(f"Error retrieving content hash for file: {file.name} \n{str(error)}\n\n")
|
|
55
55
|
|
|
56
|
-
file_type = guess_file_type(file)
|
|
56
|
+
file_type = guess_file_type(file, name)
|
|
57
57
|
|
|
58
58
|
file_path = getattr(file, "name", None) or getattr(file, "full_name", None)
|
|
59
59
|
|
|
@@ -1,6 +1,9 @@
|
|
|
1
|
-
|
|
1
|
+
import io
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
from typing import BinaryIO, Optional, Any
|
|
2
4
|
import filetype
|
|
3
|
-
from
|
|
5
|
+
from tempfile import SpooledTemporaryFile
|
|
6
|
+
from filetype.types.base import Type
|
|
4
7
|
|
|
5
8
|
|
|
6
9
|
class FileTypeException(Exception):
|
|
@@ -22,7 +25,7 @@ class FileTypeException(Exception):
|
|
|
22
25
|
self.message = message
|
|
23
26
|
|
|
24
27
|
|
|
25
|
-
def guess_file_type(file: BinaryIO) -> filetype.Type:
|
|
28
|
+
def guess_file_type(file: BinaryIO, name: Optional[str] = None) -> filetype.Type:
|
|
26
29
|
"""
|
|
27
30
|
Guess the file type from the given binary file stream.
|
|
28
31
|
|
|
@@ -39,12 +42,23 @@ def guess_file_type(file: BinaryIO) -> filetype.Type:
|
|
|
39
42
|
|
|
40
43
|
- filetype.Type: The guessed file type, represented as filetype.Type.
|
|
41
44
|
"""
|
|
45
|
+
|
|
46
|
+
# Note: If file has .txt or .text extension, consider it a plain text file as filetype.guess may not detect it properly
|
|
47
|
+
# as it contains no magic number encoding
|
|
48
|
+
ext = None
|
|
49
|
+
if isinstance(file, str):
|
|
50
|
+
ext = Path(file).suffix
|
|
51
|
+
elif name is not None:
|
|
52
|
+
ext = Path(name).suffix
|
|
53
|
+
|
|
54
|
+
if ext in [".txt", ".text"]:
|
|
55
|
+
file_type = Type("text/plain", "txt")
|
|
56
|
+
return file_type
|
|
57
|
+
|
|
42
58
|
file_type = filetype.guess(file)
|
|
43
59
|
|
|
44
60
|
# If file type could not be determined consider it a plain text file as they don't have magic number encoding
|
|
45
61
|
if file_type is None:
|
|
46
|
-
from filetype.types.base import Type
|
|
47
|
-
|
|
48
62
|
file_type = Type("text/plain", "txt")
|
|
49
63
|
|
|
50
64
|
if file_type is None:
|
|
@@ -30,7 +30,7 @@ class BinaryData(IngestionData):
|
|
|
30
30
|
|
|
31
31
|
async def ensure_metadata(self):
|
|
32
32
|
if self.metadata is None:
|
|
33
|
-
self.metadata = await get_file_metadata(self.data)
|
|
33
|
+
self.metadata = await get_file_metadata(self.data, name=self.name)
|
|
34
34
|
|
|
35
35
|
if self.metadata["name"] is None:
|
|
36
36
|
self.metadata["name"] = self.name
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: cognee
|
|
3
|
-
Version: 0.3.
|
|
3
|
+
Version: 0.3.9
|
|
4
4
|
Summary: Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning.
|
|
5
5
|
Project-URL: Homepage, https://www.cognee.ai
|
|
6
6
|
Project-URL: Repository, https://github.com/topoteretes/cognee
|
|
@@ -246,8 +246,8 @@ cognee/infrastructure/files/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
|
|
|
246
246
|
cognee/infrastructure/files/utils/extract_text_from_file.py,sha256=-v0uvK6nXP6Q2Ia0GjIi97WntPFX6sWZQXO_Fg9TrCc,1112
|
|
247
247
|
cognee/infrastructure/files/utils/get_data_file_path.py,sha256=Xz9anl6yYxK6wETKhVeK4f3ahjw58Aj8YkyJkJONOvc,1549
|
|
248
248
|
cognee/infrastructure/files/utils/get_file_content_hash.py,sha256=0L_wgsRF8zqmtisFWcp4agDs7WovvBjiVWNQ_NCPKwo,1338
|
|
249
|
-
cognee/infrastructure/files/utils/get_file_metadata.py,sha256=
|
|
250
|
-
cognee/infrastructure/files/utils/guess_file_type.py,sha256=
|
|
249
|
+
cognee/infrastructure/files/utils/get_file_metadata.py,sha256=3U0usuzEuGbVY0PBqQl9FyU1fWeHzlc8DRqNWZaSoc8,2336
|
|
250
|
+
cognee/infrastructure/files/utils/guess_file_type.py,sha256=s1CkS4DhhxyYIxlapGfVZKOQUbDwselljLRvlrP4BvE,1923
|
|
251
251
|
cognee/infrastructure/files/utils/is_text_content.py,sha256=iNZWCECNLMjlQfOQAujVQis7prA1cqsscRRSQsxccZo,1316
|
|
252
252
|
cognee/infrastructure/files/utils/open_data_file.py,sha256=3TPsTUDCH6SOuvbwNembE-YRiFDhb9yCqOC537b6iGY,2155
|
|
253
253
|
cognee/infrastructure/llm/LLMGateway.py,sha256=o_XXoj6qbTb2zO9MCxut81CkZODJUMtRbLAS854JSzY,2478
|
|
@@ -367,7 +367,7 @@ cognee/infrastructure/loaders/get_loader_engine.py,sha256=cPJefAHFAWU1HXQoWqCpwh
|
|
|
367
367
|
cognee/infrastructure/loaders/supported_loaders.py,sha256=LBCvqk6PGJsTtFB5vUpArPmxSegRr81y1oNAejzn1mE,961
|
|
368
368
|
cognee/infrastructure/loaders/use_loader.py,sha256=ncfUFVohPox296m8tMeIl6Hnk1xRvHcpRCmwZXKPZ1s,598
|
|
369
369
|
cognee/infrastructure/loaders/core/__init__.py,sha256=LTr8FWDXpG-Oxp8nwwn0KnHT97aIK6_FWiswmy7g40Q,230
|
|
370
|
-
cognee/infrastructure/loaders/core/audio_loader.py,sha256=
|
|
370
|
+
cognee/infrastructure/loaders/core/audio_loader.py,sha256=VBiJb3tgog51yP14CMLQP3RdF6cmj5dCsHeAHDMlCz8,3042
|
|
371
371
|
cognee/infrastructure/loaders/core/image_loader.py,sha256=b8etveiidIvCw7PXqM2ldyxXDhkqi4-Ak-4BbX664Is,3390
|
|
372
372
|
cognee/infrastructure/loaders/core/text_loader.py,sha256=zkFhjm_QeQu4fWv_Wkoe0O1Kpe9_uBgskkjeWn0sV-M,2991
|
|
373
373
|
cognee/infrastructure/loaders/external/__init__.py,sha256=UwLJK81I1Atuw3FN34EDy8NKe7sltxRLZiONYHfoW4o,884
|
|
@@ -483,7 +483,7 @@ cognee/modules/ingestion/discover_directory_datasets.py,sha256=wtqYoZ5MpGc_FuzyK
|
|
|
483
483
|
cognee/modules/ingestion/get_matched_datasets.py,sha256=BL2H_3t3wDWqcJxlo6uv-1u__g2E5OMwJYFsLCSDF34,475
|
|
484
484
|
cognee/modules/ingestion/identify.py,sha256=4-oD_VjdJC9oUmJjuLJ1a6BX1-GKbw-rNgWyB9GyhC8,346
|
|
485
485
|
cognee/modules/ingestion/save_data_to_file.py,sha256=SZFrWbkRCvENQ05JXAAKZgcVm4-s795ZPnhCgdGM5HY,1230
|
|
486
|
-
cognee/modules/ingestion/data_types/BinaryData.py,sha256=
|
|
486
|
+
cognee/modules/ingestion/data_types/BinaryData.py,sha256=UUo3MZdGaIePs0jbI_Nwwwr6HBnsdtqwt0FJU1d6rqw,1076
|
|
487
487
|
cognee/modules/ingestion/data_types/IngestionData.py,sha256=JLKzItByitgfQAeEo7-qaRRce_weij-t3YY_nJ4wFy0,309
|
|
488
488
|
cognee/modules/ingestion/data_types/S3BinaryData.py,sha256=Kdd4R2anhhIPQZ-5xihcWrMPY_MPHIfS4GJYP4ZeraU,1805
|
|
489
489
|
cognee/modules/ingestion/data_types/TextData.py,sha256=HpIgFqFHm66D-_bgEljUSsh4GSfsLaOj_ubFc_RalNQ,939
|
|
@@ -942,9 +942,9 @@ distributed/tasks/queued_add_edges.py,sha256=kz1DHE05y-kNHORQJjYWHUi6Q1QWUp_v3Dl
|
|
|
942
942
|
distributed/tasks/queued_add_nodes.py,sha256=aqK4Ij--ADwUWknxYpiwbYrpa6CcvFfqHWbUZW4Kh3A,452
|
|
943
943
|
distributed/workers/data_point_saving_worker.py,sha256=kmaQy2A2J7W3k9Gd5lyoiT0XYOaJmEM8MbkKVOFOQVU,4729
|
|
944
944
|
distributed/workers/graph_saving_worker.py,sha256=b5OPLLUq0OBALGekdp73JKxU0GrMlVbO4AfIhmACKkQ,4724
|
|
945
|
-
cognee-0.3.
|
|
946
|
-
cognee-0.3.
|
|
947
|
-
cognee-0.3.
|
|
948
|
-
cognee-0.3.
|
|
949
|
-
cognee-0.3.
|
|
950
|
-
cognee-0.3.
|
|
945
|
+
cognee-0.3.9.dist-info/METADATA,sha256=CdDPzdK2sCof1lzKjzfD2Fbeqyl2ObVE8EuGwxxKCiQ,14938
|
|
946
|
+
cognee-0.3.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
947
|
+
cognee-0.3.9.dist-info/entry_points.txt,sha256=fAozOD9Vs4kgYwRhBiZoLCIXu-OSZqVxKGv45l19uok,88
|
|
948
|
+
cognee-0.3.9.dist-info/licenses/LICENSE,sha256=pHHjSQj1DD8SDppW88MMs04TPk7eAanL1c5xj8NY7NQ,11344
|
|
949
|
+
cognee-0.3.9.dist-info/licenses/NOTICE.md,sha256=6L3saP3kSpcingOxDh-SGjMS8GY79Rlh2dBNLaO0o5c,339
|
|
950
|
+
cognee-0.3.9.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|