cognee 0.3.8__py3-none-any.whl → 0.3.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  import io
2
2
  import os.path
3
- from typing import BinaryIO, TypedDict
3
+ from typing import BinaryIO, TypedDict, Optional
4
4
  from pathlib import Path
5
5
 
6
6
  from cognee.shared.logging_utils import get_logger
@@ -27,7 +27,7 @@ class FileMetadata(TypedDict):
27
27
  file_size: int
28
28
 
29
29
 
30
- async def get_file_metadata(file: BinaryIO) -> FileMetadata:
30
+ async def get_file_metadata(file: BinaryIO, name: Optional[str] = None) -> FileMetadata:
31
31
  """
32
32
  Retrieve metadata from a file object.
33
33
 
@@ -53,7 +53,7 @@ async def get_file_metadata(file: BinaryIO) -> FileMetadata:
53
53
  except io.UnsupportedOperation as error:
54
54
  logger.error(f"Error retrieving content hash for file: {file.name} \n{str(error)}\n\n")
55
55
 
56
- file_type = guess_file_type(file)
56
+ file_type = guess_file_type(file, name)
57
57
 
58
58
  file_path = getattr(file, "name", None) or getattr(file, "full_name", None)
59
59
 
@@ -1,6 +1,9 @@
1
- from typing import BinaryIO
1
+ import io
2
+ from pathlib import Path
3
+ from typing import BinaryIO, Optional, Any
2
4
  import filetype
3
- from .is_text_content import is_text_content
5
+ from tempfile import SpooledTemporaryFile
6
+ from filetype.types.base import Type
4
7
 
5
8
 
6
9
  class FileTypeException(Exception):
@@ -22,7 +25,7 @@ class FileTypeException(Exception):
22
25
  self.message = message
23
26
 
24
27
 
25
- def guess_file_type(file: BinaryIO) -> filetype.Type:
28
+ def guess_file_type(file: BinaryIO, name: Optional[str] = None) -> filetype.Type:
26
29
  """
27
30
  Guess the file type from the given binary file stream.
28
31
 
@@ -39,12 +42,23 @@ def guess_file_type(file: BinaryIO) -> filetype.Type:
39
42
 
40
43
  - filetype.Type: The guessed file type, represented as filetype.Type.
41
44
  """
45
+
46
+ # Note: If file has .txt or .text extension, consider it a plain text file as filetype.guess may not detect it properly
47
+ # as it contains no magic number encoding
48
+ ext = None
49
+ if isinstance(file, str):
50
+ ext = Path(file).suffix
51
+ elif name is not None:
52
+ ext = Path(name).suffix
53
+
54
+ if ext in [".txt", ".text"]:
55
+ file_type = Type("text/plain", "txt")
56
+ return file_type
57
+
42
58
  file_type = filetype.guess(file)
43
59
 
44
60
  # If file type could not be determined consider it a plain text file as they don't have magic number encoding
45
61
  if file_type is None:
46
- from filetype.types.base import Type
47
-
48
62
  file_type = Type("text/plain", "txt")
49
63
 
50
64
  if file_type is None:
@@ -42,6 +42,7 @@ class AudioLoader(LoaderInterface):
42
42
  "audio/wav",
43
43
  "audio/amr",
44
44
  "audio/aiff",
45
+ "audio/x-wav",
45
46
  ]
46
47
 
47
48
  @property
@@ -30,7 +30,7 @@ class BinaryData(IngestionData):
30
30
 
31
31
  async def ensure_metadata(self):
32
32
  if self.metadata is None:
33
- self.metadata = await get_file_metadata(self.data)
33
+ self.metadata = await get_file_metadata(self.data, name=self.name)
34
34
 
35
35
  if self.metadata["name"] is None:
36
36
  self.metadata["name"] = self.name
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: cognee
3
- Version: 0.3.8
3
+ Version: 0.3.9
4
4
  Summary: Cognee - is a library for enriching LLM context with a semantic layer for better understanding and reasoning.
5
5
  Project-URL: Homepage, https://www.cognee.ai
6
6
  Project-URL: Repository, https://github.com/topoteretes/cognee
@@ -246,8 +246,8 @@ cognee/infrastructure/files/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeR
246
246
  cognee/infrastructure/files/utils/extract_text_from_file.py,sha256=-v0uvK6nXP6Q2Ia0GjIi97WntPFX6sWZQXO_Fg9TrCc,1112
247
247
  cognee/infrastructure/files/utils/get_data_file_path.py,sha256=Xz9anl6yYxK6wETKhVeK4f3ahjw58Aj8YkyJkJONOvc,1549
248
248
  cognee/infrastructure/files/utils/get_file_content_hash.py,sha256=0L_wgsRF8zqmtisFWcp4agDs7WovvBjiVWNQ_NCPKwo,1338
249
- cognee/infrastructure/files/utils/get_file_metadata.py,sha256=WpyOTUf2CPFT8ZlxOWuchg34xu8HVrsMP7cpahsFX7g,2292
250
- cognee/infrastructure/files/utils/guess_file_type.py,sha256=kPLubfG2XXr0FZTepWNSq5zn2W3-sPASK8F_1CCR9OE,1444
249
+ cognee/infrastructure/files/utils/get_file_metadata.py,sha256=3U0usuzEuGbVY0PBqQl9FyU1fWeHzlc8DRqNWZaSoc8,2336
250
+ cognee/infrastructure/files/utils/guess_file_type.py,sha256=s1CkS4DhhxyYIxlapGfVZKOQUbDwselljLRvlrP4BvE,1923
251
251
  cognee/infrastructure/files/utils/is_text_content.py,sha256=iNZWCECNLMjlQfOQAujVQis7prA1cqsscRRSQsxccZo,1316
252
252
  cognee/infrastructure/files/utils/open_data_file.py,sha256=3TPsTUDCH6SOuvbwNembE-YRiFDhb9yCqOC537b6iGY,2155
253
253
  cognee/infrastructure/llm/LLMGateway.py,sha256=o_XXoj6qbTb2zO9MCxut81CkZODJUMtRbLAS854JSzY,2478
@@ -367,7 +367,7 @@ cognee/infrastructure/loaders/get_loader_engine.py,sha256=cPJefAHFAWU1HXQoWqCpwh
367
367
  cognee/infrastructure/loaders/supported_loaders.py,sha256=LBCvqk6PGJsTtFB5vUpArPmxSegRr81y1oNAejzn1mE,961
368
368
  cognee/infrastructure/loaders/use_loader.py,sha256=ncfUFVohPox296m8tMeIl6Hnk1xRvHcpRCmwZXKPZ1s,598
369
369
  cognee/infrastructure/loaders/core/__init__.py,sha256=LTr8FWDXpG-Oxp8nwwn0KnHT97aIK6_FWiswmy7g40Q,230
370
- cognee/infrastructure/loaders/core/audio_loader.py,sha256=rNXsCuLCIinNOaZZXw778bp3ptUMSoLiBaq9cTZ4NFI,3015
370
+ cognee/infrastructure/loaders/core/audio_loader.py,sha256=VBiJb3tgog51yP14CMLQP3RdF6cmj5dCsHeAHDMlCz8,3042
371
371
  cognee/infrastructure/loaders/core/image_loader.py,sha256=b8etveiidIvCw7PXqM2ldyxXDhkqi4-Ak-4BbX664Is,3390
372
372
  cognee/infrastructure/loaders/core/text_loader.py,sha256=zkFhjm_QeQu4fWv_Wkoe0O1Kpe9_uBgskkjeWn0sV-M,2991
373
373
  cognee/infrastructure/loaders/external/__init__.py,sha256=UwLJK81I1Atuw3FN34EDy8NKe7sltxRLZiONYHfoW4o,884
@@ -483,7 +483,7 @@ cognee/modules/ingestion/discover_directory_datasets.py,sha256=wtqYoZ5MpGc_FuzyK
483
483
  cognee/modules/ingestion/get_matched_datasets.py,sha256=BL2H_3t3wDWqcJxlo6uv-1u__g2E5OMwJYFsLCSDF34,475
484
484
  cognee/modules/ingestion/identify.py,sha256=4-oD_VjdJC9oUmJjuLJ1a6BX1-GKbw-rNgWyB9GyhC8,346
485
485
  cognee/modules/ingestion/save_data_to_file.py,sha256=SZFrWbkRCvENQ05JXAAKZgcVm4-s795ZPnhCgdGM5HY,1230
486
- cognee/modules/ingestion/data_types/BinaryData.py,sha256=E9B6N9nJQd8uG2IUJctrWa2y1QkC4txNBAAl9a9aP2g,1060
486
+ cognee/modules/ingestion/data_types/BinaryData.py,sha256=UUo3MZdGaIePs0jbI_Nwwwr6HBnsdtqwt0FJU1d6rqw,1076
487
487
  cognee/modules/ingestion/data_types/IngestionData.py,sha256=JLKzItByitgfQAeEo7-qaRRce_weij-t3YY_nJ4wFy0,309
488
488
  cognee/modules/ingestion/data_types/S3BinaryData.py,sha256=Kdd4R2anhhIPQZ-5xihcWrMPY_MPHIfS4GJYP4ZeraU,1805
489
489
  cognee/modules/ingestion/data_types/TextData.py,sha256=HpIgFqFHm66D-_bgEljUSsh4GSfsLaOj_ubFc_RalNQ,939
@@ -942,9 +942,9 @@ distributed/tasks/queued_add_edges.py,sha256=kz1DHE05y-kNHORQJjYWHUi6Q1QWUp_v3Dl
942
942
  distributed/tasks/queued_add_nodes.py,sha256=aqK4Ij--ADwUWknxYpiwbYrpa6CcvFfqHWbUZW4Kh3A,452
943
943
  distributed/workers/data_point_saving_worker.py,sha256=kmaQy2A2J7W3k9Gd5lyoiT0XYOaJmEM8MbkKVOFOQVU,4729
944
944
  distributed/workers/graph_saving_worker.py,sha256=b5OPLLUq0OBALGekdp73JKxU0GrMlVbO4AfIhmACKkQ,4724
945
- cognee-0.3.8.dist-info/METADATA,sha256=GI-zL_URnvcCPoJI7XcApWndzMacGJ3ULKLzyxR27lE,14938
946
- cognee-0.3.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
947
- cognee-0.3.8.dist-info/entry_points.txt,sha256=fAozOD9Vs4kgYwRhBiZoLCIXu-OSZqVxKGv45l19uok,88
948
- cognee-0.3.8.dist-info/licenses/LICENSE,sha256=pHHjSQj1DD8SDppW88MMs04TPk7eAanL1c5xj8NY7NQ,11344
949
- cognee-0.3.8.dist-info/licenses/NOTICE.md,sha256=6L3saP3kSpcingOxDh-SGjMS8GY79Rlh2dBNLaO0o5c,339
950
- cognee-0.3.8.dist-info/RECORD,,
945
+ cognee-0.3.9.dist-info/METADATA,sha256=CdDPzdK2sCof1lzKjzfD2Fbeqyl2ObVE8EuGwxxKCiQ,14938
946
+ cognee-0.3.9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
947
+ cognee-0.3.9.dist-info/entry_points.txt,sha256=fAozOD9Vs4kgYwRhBiZoLCIXu-OSZqVxKGv45l19uok,88
948
+ cognee-0.3.9.dist-info/licenses/LICENSE,sha256=pHHjSQj1DD8SDppW88MMs04TPk7eAanL1c5xj8NY7NQ,11344
949
+ cognee-0.3.9.dist-info/licenses/NOTICE.md,sha256=6L3saP3kSpcingOxDh-SGjMS8GY79Rlh2dBNLaO0o5c,339
950
+ cognee-0.3.9.dist-info/RECORD,,
File without changes