sinapsis-data-readers 0.1.17__tar.gz → 0.1.20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. {sinapsis_data_readers-0.1.17/src/sinapsis_data_readers.egg-info → sinapsis_data_readers-0.1.20}/PKG-INFO +3 -3
  2. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/pyproject.toml +3 -3
  3. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/templates/datasets_readers/dataset_splitter.py +5 -1
  4. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/templates/datasets_readers/sklearn_datasets.py +2 -3
  5. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/templates/datasets_readers/sktime_datasets.py +6 -4
  6. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/templates/video_readers/video_reader_dali.py +9 -9
  7. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20/src/sinapsis_data_readers.egg-info}/PKG-INFO +3 -3
  8. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers.egg-info/requires.txt +2 -2
  9. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/LICENSE +0 -0
  10. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/README.md +0 -0
  11. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/setup.cfg +0 -0
  12. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/__init__.py +0 -0
  13. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/helpers/__init__.py +0 -0
  14. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/helpers/coco_dataclasses.py +0 -0
  15. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/helpers/csv_reader.py +0 -0
  16. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/helpers/file_path_helpers.py +0 -0
  17. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/helpers/sklearn_dataset_subset.py +0 -0
  18. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/helpers/sktime_datasets_subset.py +0 -0
  19. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/helpers/tags.py +0 -0
  20. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/helpers/text_input_helpers.py +0 -0
  21. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/templates/__init__.py +0 -0
  22. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/templates/audio_readers/__init__.py +0 -0
  23. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/templates/audio_readers/audio_reader_pydub.py +0 -0
  24. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/templates/audio_readers/audio_reader_soundfile.py +0 -0
  25. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/templates/audio_readers/audio_reader_to_bytes.py +0 -0
  26. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/templates/audio_readers/base_audio_reader.py +0 -0
  27. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/templates/base_file_data_loader.py +0 -0
  28. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/templates/datasets_readers/__init__.py +0 -0
  29. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/templates/datasets_readers/csv_datasets.py +0 -0
  30. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/templates/image_readers/__init__.py +0 -0
  31. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/templates/image_readers/base_image_folder_data_loader.py +0 -0
  32. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/templates/image_readers/coco_dataset_reader.py +0 -0
  33. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/templates/image_readers/csv_dataset_reader.py +0 -0
  34. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/templates/image_readers/image_folder_reader_cv2.py +0 -0
  35. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/templates/image_readers/image_folder_reader_kornia.py +0 -0
  36. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/templates/text_readers/__init__.py +0 -0
  37. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/templates/text_readers/text_input.py +0 -0
  38. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/templates/video_readers/__init__.py +0 -0
  39. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/templates/video_readers/base_video_reader.py +0 -0
  40. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/templates/video_readers/video_reader_cv2.py +0 -0
  41. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/templates/video_readers/video_reader_ffmpeg.py +0 -0
  42. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers/templates/video_readers/video_reader_torchcodec.py +0 -0
  43. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers.egg-info/SOURCES.txt +0 -0
  44. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers.egg-info/dependency_links.txt +0 -0
  45. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/sinapsis_data_readers.egg-info/top_level.txt +0 -0
  46. {sinapsis_data_readers-0.1.17 → sinapsis_data_readers-0.1.20}/src/test_gradio_client.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sinapsis-data-readers
3
- Version: 0.1.17
3
+ Version: 0.1.20
4
4
  Summary: Templates to read data in different formats
5
5
  Author-email: SinapsisAI <dev@sinapsis.tech>
6
6
  Project-URL: Homepage, https://sinapsis.tech
@@ -14,8 +14,8 @@ Requires-Dist: sinapsis>=0.1.1
14
14
  Provides-Extra: nvidia-dali
15
15
  Requires-Dist: nvidia-dali-cuda120>=1.43.0; extra == "nvidia-dali"
16
16
  Provides-Extra: torch-codec
17
- Requires-Dist: torch<=2.5.1; extra == "torch-codec"
18
- Requires-Dist: torchcodec>=0.3.0; extra == "torch-codec"
17
+ Requires-Dist: torch>=2.4.1; extra == "torch-codec"
18
+ Requires-Dist: torchcodec>=0.0.3; extra == "torch-codec"
19
19
  Provides-Extra: sklearn-datasets
20
20
  Requires-Dist: pandas>=2.2.3; extra == "sklearn-datasets"
21
21
  Requires-Dist: scikit-learn>=1.5.2; extra == "sklearn-datasets"
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "sinapsis-data-readers"
3
- version = "0.1.17"
3
+ version = "0.1.20"
4
4
  description = "Templates to read data in different formats"
5
5
  authors = [{ name = "SinapsisAI", email = "dev@sinapsis.tech" }]
6
6
 
@@ -14,8 +14,8 @@ dependencies = [
14
14
  [project.optional-dependencies]
15
15
  nvidia-dali = ["nvidia-dali-cuda120>=1.43.0"]
16
16
  torch-codec = [
17
- "torch<=2.5.1",
18
- "torchcodec>=0.3.0",
17
+ "torch>=2.4.1",
18
+ "torchcodec>=0.0.3",
19
19
  ]
20
20
  sklearn-datasets = ["pandas>=2.2.3", "scikit-learn>=1.5.2"]
21
21
  sktime-datareaders = ["sktime>=0.34.0"]
@@ -38,7 +38,11 @@ class ImageDatasetSplit(BaseModel):
38
38
  """allow arbitrary types"""
39
39
 
40
40
  arbitrary_types_allowed = True
41
-
41
+ json_encoders : dict = {
42
+ pd.DataFrame: lambda df: df.to_dict(orient="records"),
43
+ pd.Series: lambda s: s.to_list(),
44
+ np.ndarray: lambda arr: arr.tolist(),
45
+ }
42
46
 
43
47
  class TabularDatasetSplit(BaseModel):
44
48
  """BaseModel to store the content of the data packets as a list
@@ -144,8 +144,7 @@ class SKLearnDatasets(BaseDynamicWrapperTemplate):
144
144
  X = results.iloc[:, :n_features]
145
145
  y = results.iloc[:, n_features:]
146
146
 
147
- # x_vals = results.drop(columns=[TARGET], axis=1)
148
- # y_vals = results[TARGET]
147
+
149
148
  x_train, x_test, y_train, y_test = train_test_split(X, y, train_size=split_size, random_state=0)
150
149
  split_data = TabularDatasetSplit(
151
150
  x_train=pd.DataFrame(x_train),
@@ -154,7 +153,7 @@ class SKLearnDatasets(BaseDynamicWrapperTemplate):
154
153
  y_test=pd.DataFrame(y_test),
155
154
  )
156
155
 
157
- return split_data.model_dump()
156
+ return split_data.model_dump_json(indent=2)
158
157
 
159
158
  def execute(self, container: DataContainer) -> DataContainer:
160
159
  sklearn_dataset = self.wrapped_callable.__func__(**self.dataset_attributes.model_dump())
@@ -104,13 +104,13 @@ class SKTimeDatasets(BaseDynamicWrapperTemplate):
104
104
  TabularDatasetSplit: Object containing the split time series data
105
105
  """
106
106
  y_train, y_test = temporal_train_test_split(dataset, train_size=self.attributes.train_size)
107
- split_data = TabularDatasetSplit(
107
+ split_dataset = TabularDatasetSplit(
108
108
  x_train=pd.DataFrame(index=y_train.index),
109
109
  x_test=pd.DataFrame(index=y_test.index),
110
110
  y_train=pd.DataFrame(y_train),
111
111
  y_test=pd.DataFrame(y_test),
112
112
  )
113
- return split_data.model_dump()
113
+ return split_dataset.model_dump_json(indent=2)
114
114
 
115
115
  def split_classification_dataset(self, X: Any, y: Any) -> TabularDatasetSplit:
116
116
  """Split a classification dataset into training and testing sets
@@ -126,15 +126,17 @@ class SKTimeDatasets(BaseDynamicWrapperTemplate):
126
126
  X_train, X_test, y_train, y_test = train_test_split(
127
127
  X, y, train_size=self.attributes.train_size, random_state=0
128
128
  )
129
- return TabularDatasetSplit(
129
+ split_dataset = TabularDatasetSplit(
130
130
  x_train=pd.DataFrame(X_train),
131
131
  x_test=pd.DataFrame(X_test),
132
132
  y_train=pd.DataFrame(y_train),
133
133
  y_test=pd.DataFrame(y_test),
134
134
  )
135
+ return split_dataset.model_dump_json(indent=2)
135
136
  except ValueError:
136
137
  self.logger.debug("Wrong format for split. original values")
137
- return TabularDatasetSplit(x_train=pd.DataFrame(X), y_train=pd.DataFrame(y))
138
+ split_dataset = TabularDatasetSplit(x_train=pd.DataFrame(X), y_train=pd.DataFrame(y))
139
+ return split_dataset.model_dump_json(indent=2)
138
140
 
139
141
  def create_dataset(self):
140
142
  return self.wrapped_callable.__func__(**self.dataset_attributes.model_dump())
@@ -134,19 +134,19 @@ class VideoReaderDali(BaseVideoReader):
134
134
  del self.video_reader
135
135
 
136
136
  def _read_video_frames(self) -> list[ImagePacket]:
137
- """Reads video frames from the dali pipeline.
138
-
139
- This method runs the video reader pipeline and adds the frames to a list
140
- of ImagePacket objects.
141
-
142
- Returns:
143
- list[ImagePacket]: A list of ImagePacket objects representing the video frames.
144
- """
137
+ """Reads video frames from the dali pipeline."""
145
138
  video_frames: list[ImagePacket] = []
146
139
  sequences_out = self.video_reader.run()
140
+ tensor_batch = sequences_out[0]
141
+
142
+ shape_result = tensor_batch.shape()
143
+ batch_size = shape_result[0][0]
147
144
 
148
- for idx, frame in enumerate(sequences_out[0]):
145
+ for idx in range(batch_size):
146
+ frame_tensor = tensor_batch.at(idx)
147
+ frame = torch.as_tensor(frame_tensor, device="cuda")
149
148
  video_frames.append(self._make_image_packet(frame, frame_index=self.frame_count + idx))
149
+
150
150
  return video_frames
151
151
 
152
152
  def reset_state(self, template_name: str | None = None) -> None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sinapsis-data-readers
3
- Version: 0.1.17
3
+ Version: 0.1.20
4
4
  Summary: Templates to read data in different formats
5
5
  Author-email: SinapsisAI <dev@sinapsis.tech>
6
6
  Project-URL: Homepage, https://sinapsis.tech
@@ -14,8 +14,8 @@ Requires-Dist: sinapsis>=0.1.1
14
14
  Provides-Extra: nvidia-dali
15
15
  Requires-Dist: nvidia-dali-cuda120>=1.43.0; extra == "nvidia-dali"
16
16
  Provides-Extra: torch-codec
17
- Requires-Dist: torch<=2.5.1; extra == "torch-codec"
18
- Requires-Dist: torchcodec>=0.3.0; extra == "torch-codec"
17
+ Requires-Dist: torch>=2.4.1; extra == "torch-codec"
18
+ Requires-Dist: torchcodec>=0.0.3; extra == "torch-codec"
19
19
  Provides-Extra: sklearn-datasets
20
20
  Requires-Dist: pandas>=2.2.3; extra == "sklearn-datasets"
21
21
  Requires-Dist: scikit-learn>=1.5.2; extra == "sklearn-datasets"
@@ -35,5 +35,5 @@ sktime>=0.34.0
35
35
  soundfile>=0.12.1
36
36
 
37
37
  [torch-codec]
38
- torch<=2.5.1
39
- torchcodec>=0.3.0
38
+ torch>=2.4.1
39
+ torchcodec>=0.0.3