PyPI - opensportslib - Versions diffs - 0.1.2.dev11__tar.gz → 0.1.2.dev12__tar.gz - Mend

opensportslib 0.1.2.dev11tar.gz → 0.1.2.dev12tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (110) hide show

{opensportslib-0.1.2.dev11/opensportslib.egg-info → opensportslib-0.1.2.dev12}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: opensportslib
-Version: 0.1.2.dev11
+Version: 0.1.2.dev12
 Summary: OpenSportsLib is the professional library, designed for advanced video understanding in sports. It provides state-of-the-art tools for action recognition, spotting, retrieval, and captioning, making it ideal for researchers, analysts, and developers working with sports video data.
 Author: Jeet Vora
 Requires-Python: >=3.12
@@ -27,6 +27,7 @@ Requires-Dist: pytorch-lightning
 Requires-Dist: pandas
 Requires-Dist: pyarrow
 Requires-Dist: huggingface_hub
+Requires-Dist: easydict
 Provides-Extra: test
 Requires-Dist: pytest; extra == "test"
 Requires-Dist: pytest-cov; extra == "test"

{opensportslib-0.1.2.dev11 → opensportslib-0.1.2.dev12}/opensportslib/config/localization.yaml RENAMED Viewed

@@ -4,7 +4,7 @@ dali: True
 DATA:
   dataset_name: SoccerNet
-  data_dir: /home/vorajv/opensportslib/SoccerNet/annotations/
+  data_dir: /home/giancos/datasets/OpenSportsLab/OSL-SNBAS/224p-2024/
   classes:
     - PASS
     - DRIVE
@@ -37,7 +37,7 @@ DATA:
     classes: ${DATA.classes}
     output_map: [data, label]
     video_path: ${DATA.data_dir}/train/
-    path: ${DATA.train.video_path}/annotations-2024-224p-train.json
+    path: ${DATA.train.video_path}/train.json
     dataloader:
       batch_size: 8
       shuffle: true
@@ -49,10 +49,12 @@ DATA:
     classes: ${DATA.classes}
     output_map: [data, label]
     video_path: ${DATA.data_dir}/valid/
-    path: ${DATA.valid.video_path}/annotations-2024-224p-valid.json
+    path: ${DATA.valid.video_path}/valid.json
     dataloader:
       batch_size: 8
       shuffle: true
+      num_workers: 4
+      pin_memory: true
   valid_data_frames:
     type: VideoGameWithDaliVideo
@@ -64,13 +66,15 @@ DATA:
     dataloader:
       batch_size: 4
       shuffle: false
+      num_workers: 4
+      pin_memory: true
   test:
     type: VideoGameWithDaliVideo
     classes: ${DATA.classes}
     output_map: [data, label]
     video_path: ${DATA.data_dir}/test/
-    path: ${DATA.test.video_path}/annotations-2024-224p-test.json
+    path: ${DATA.test.video_path}/test.json
     results: results_spotting_test
     nms_window: 2
     metric: tight

{opensportslib-0.1.2.dev11 → opensportslib-0.1.2.dev12}/opensportslib/config/sngar-frames.yaml RENAMED Viewed

@@ -8,13 +8,14 @@ TASK: classification
 DATA:
   dataset_name: sngar
-  data_dir: /home/spark_user1/opensportslib/sngar-frames
+  data_dir: /home/giancos/datasets/OpenSportsLab/soccernetpro-classification-GAR/frames-parquet
   data_modality: frames_npy
   # max_samples: 100 # only used for quick testing
   num_frames: 16
   frame_size: [224, 224]
   train:
-    path: ${DATA.data_dir}/annotations_train.json
+    video_path: ${DATA.data_dir}/train
+    path: ${DATA.data_dir}/train.json
     dataloader:
       batch_size: 8 # for frozen backbone, use 64
       # for unfrozen backbone, use 32-16-8 depending on the memory available
@@ -22,13 +23,15 @@ DATA:
       num_workers: 8
       pin_memory: true
   valid:
-    path: ${DATA.data_dir}/annotations_valid.json
+    video_path: ${DATA.data_dir}/valid
+    path: ${DATA.data_dir}/valid.json
     dataloader:
       batch_size: 8
       num_workers: 8
       shuffle: false
   test:
-    path: ${DATA.data_dir}/annotations_test.json
+    video_path: ${DATA.data_dir}/test
+    path: ${DATA.data_dir}/test.json
     dataloader:
       batch_size: 8
       num_workers: 8

{opensportslib-0.1.2.dev11 → opensportslib-0.1.2.dev12}/opensportslib/config/sngar-tracking.yaml RENAMED Viewed

@@ -9,7 +9,7 @@ TASK: classification
 DATA:
   dataset_name: sngar
   data_modality: tracking_parquet
-  data_dir: /home/karkid/opensportslib/tracking-dataset
+  data_dir: /home/giancos/datasets/OpenSportsLab/soccernetpro-classification-GAR/tracking-parquet
   preload_data: false
   train:
     type: annotations_train.json
@@ -103,10 +103,10 @@ TRAIN:
     type: CrossEntropyLoss
 SYSTEM:
- log_dir: ./logs
- save_dir: ./checkpoints_tracking
- use_seed: true
- seed: 42
- GPU: 4
- device: cuda   # auto | cuda | cpu
- gpu_id: 0
+  log_dir: ./logs
+  save_dir: ./checkpoints_tracking
+  use_seed: true
+  seed: 42
+  GPU: 1
+  device: cuda   # auto | cuda | cpu
+  gpu_id: 0

{opensportslib-0.1.2.dev11 → opensportslib-0.1.2.dev12}/opensportslib/datasets/classification_dataset.py RENAMED Viewed

@@ -233,7 +233,7 @@ class VideoDataset(ClassificationDataset):
             numpy.ndarray of shape (T, H, W, C).
         """
         if path.endswith(".npy"):
-            frames = np.load(path).astype(np.float32) / 255.0
+            frames = np.load(os.path.join(self.video_path, path)).astype(np.float32) / 255.0
             if self.transform is not None:
                 frames = self.transform(frames)
             mean = np.array([0.485, 0.456, 0.406], dtype=np.float32)

{opensportslib-0.1.2.dev11 → opensportslib-0.1.2.dev12}/opensportslib/datasets/localization_dataset.py RENAMED Viewed

@@ -2,7 +2,6 @@ import os
 import torch
 import random
 from torch.utils.data import Dataset
-import tempfile
 import copy
 import math
 import numpy as np
@@ -43,6 +42,36 @@ if DALI_AVAILABLE:
 else:
     def dali_pipeline_def(func):
         return func  # dummy decorator
+def _build_dali_filenames_and_labels(labels):
+    filenames = [video["video"] for video in labels]
+    label_indices = list(range(len(labels)))
+    return filenames, label_indices
+def _dali_frame_num_to_local_frame(frame_num, stride):
+    return frame_num // stride + 1
+def _count_dali_video_samples(num_frames, clip_len, overlap_len):
+    step = clip_len - overlap_len
+    if step <= 0:
+        raise ValueError("`clip_len - overlap_len` must be strictly positive.")
+    return len(range(1, num_frames, step))
+def _pad_dali_iterator_size(size, batch_size):
+    remainder = size % batch_size
+    if remainder == 0:
+        return size
+    return size + (batch_size - remainder)
+def _resolve_dali_video_sample(labels, video_idx, frame_num, stride):
+    video_meta = labels[video_idx]
+    start = _dali_frame_num_to_local_frame(frame_num, stride)
+    return video_meta["path"], start
 class LocalizationDataset(Dataset):
     def __init__(self, config, annotations_path=None, processor=None, split="train"):
@@ -941,7 +970,6 @@ if DALI_AVAILABLE:
                     "NVIDIA DALI is required for VideoGameWithDali. "
                     "Install it or use another dataset type."
                 )
-            import random
             from opensportslib.core.utils.load_annotations import annotationstoe2eformat
             from opensportslib.core.utils.video_processing import distribute_elements, _get_deferred_rgb_transform, get_stride
@@ -975,38 +1003,20 @@ if DALI_AVAILABLE:
             self.TARGET_WIDTH = TARGET_WIDTH
             self._stride = get_stride(input_fps, extract_fps)
-            if is_eval:
-                nb_clips_per_video = math.ceil(dataset_len / len(self._labels)) * epochs
-            else:
-                nb_clips_per_video = math.ceil(dataset_len / len(self._labels)) * epochs
-            if mixup:
-                nb_clips_per_video = nb_clips_per_video * 2
-            file_list_txt = ""
-            for index, video in enumerate(self._labels):
-                video_path = video["video"]
-                #print("video_path :", video_path)
-                # video_path = os.path.join(video_dir, video["video"] + extension)
-                for _ in range(nb_clips_per_video):
-                    #print(video["num_frames"], (clip_len + 1))
-                    random_start = random.randint(1, video["num_frames"] - (clip_len + 1))
-                    file_list_txt += f"{video_path} {index} {random_start * self._stride} {(random_start+clip_len) * self._stride}\n"
-            tf = tempfile.NamedTemporaryFile()
-            tf.write(str.encode(file_list_txt))
-            tf.flush()
+            self._filenames, self._video_indices = _build_dali_filenames_and_labels(
+                self._labels
+            )
             self.pipes = [
                 self.video_pipe(
                     batch_size=self.batch_size_per_pipe[index],
+                    filenames=self._filenames,
+                    labels=self._video_indices,
                     sequence_length=self.clip_len,
                     stride_dali=self._stride,
-                    step=-1,
+                    step=self._stride,
                     num_threads=8,
                     device_id=i,
-                    file_list=tf.name,
                     shard_id=index,
                     num_shards=len(devices),
                 )
@@ -1184,7 +1194,14 @@ if DALI_AVAILABLE:
         @dali_pipeline_def
         def video_pipe(
-            self, file_list, sequence_length, stride_dali, step, shard_id, num_shards
+            self,
+            filenames,
+            labels,
+            sequence_length,
+            stride_dali,
+            step,
+            shard_id,
+            num_shards,
         ):
             """Construct the pipeline to process a video. This pipeline process a clip with specified arguments such as stride,step and sequence length.
             The first step returns clip of frames with associated labels (index of the clip in the list of clips) and the index of the first frame.
@@ -1192,7 +1209,8 @@ if DALI_AVAILABLE:
             The last step is to construct the list of labels (corresponding to events) corresponding with the extracted frames.
             Args:
-                file_list (string): Path to the file with a list of <file label [start_frame [end_frame]]> values.
+                filenames (List[string]): Video files passed directly to DALI.
+                labels (List[int]): Video indices associated with filenames.
                 sequence_length (int): Frames to load per sequence.
                 stride_dali (int): Distance between consecutive frames in the sequence.
                 step(int): Frame interval between each sequence.
@@ -1206,14 +1224,14 @@ if DALI_AVAILABLE:
             video, label, frame_num = fn.readers.video_resize(
                 device="gpu",
                 size=(self.TARGET_HEIGHT, self.TARGET_WIDTH),
-                file_list=file_list,
+                filenames=filenames,
+                labels=labels,
                 sequence_length=sequence_length,
                 random_shuffle=True,
                 shard_id=shard_id,
                 num_shards=num_shards,
                 image_type=types.RGB,
-                file_list_include_preceding_frame=True,
-                file_list_frame_num=True,
+                file_list_include_preceding_frame=False,
                 enable_frame_num=True,
                 stride=stride_dali,
                 step=step,
@@ -1257,7 +1275,9 @@ if DALI_AVAILABLE:
                 labels (np.ndarray): Label array of shape (clip_len,).
             """
             video_meta = self._labels[video_idx]
-            base_idx = frame_num // self._stride
+            # DALI frame numbers are 0-based, while localization annotations are
+            # normalized to a 1-based extracted-frame axis.
+            base_idx = _dali_frame_num_to_local_frame(frame_num, self._stride)
             labels = np.zeros(self.clip_len, np.int64)
             for event in video_meta["events"]:
@@ -1332,9 +1352,8 @@ if DALI_AVAILABLE:
                     "NVIDIA DALI is required for VideoGameWithDali. "
                     "Install it or use another dataset type."
                 )
-            import random
             from opensportslib.core.utils.load_annotations import annotationstoe2eformat, construct_labels
-            from opensportslib.core.utils.video_processing import distribute_elements, _get_deferred_rgb_transform, get_stride, get_remaining
+            from opensportslib.core.utils.video_processing import get_stride
             self._src_file = label_file
             # self.infer = False
             if label_file.endswith(".json"):
@@ -1353,70 +1372,38 @@ if DALI_AVAILABLE:
             self.crop_dim = crop_dim
             stride = 1
             self._stride = stride
+            self._stride_dali = stride_dali
             self._flip = flip
             self._multi_crop = multi_crop
             self.batch_size = batch_size // len(devices)
+            self.global_batch_size = batch_size
             self.devices = devices
-            self._clips = []
             self.IMAGENET_MEAN = IMAGENET_MEAN
             self.IMAGENET_STD = IMAGENET_STD
             self.TARGET_HEIGHT = TARGET_HEIGHT
             self.TARGET_WIDTH = TARGET_WIDTH
-            file_list_txt = ""
-            cmp = 0
-            for l in self._labels:
-                has_clip = False
-                for i in range(
-                    1,
-                    l[
-                        "num_frames"
-                    ],  # Need to ensure that all clips have at least one frame
-                    (clip_len - overlap_len) * self._stride,
-                ):
-                    if i + clip_len > l["num_frames"]:
-                        end = l["num_frames_base"]
-                    else:
-                        end = (i + clip_len) * stride_dali
-                    has_clip = True
-                    self._clips.append((l["path"], l["video"], i))
-                    # if self.infer:
-                    #     video_path = l["video"]
-                    # else:
-                    #     video_path = os.path.join(video_dir, l["video"] + extension)
-                    video_path = l["video"]
-                    file_list_txt += f"{video_path} {cmp} {i * stride_dali} {end}\n"
-                    # if cmp2 <5:
-                    #     print(file_list_txt)
-                    #     cmp2+=1
-                    cmp += 1
-                last_video = l["video"]
-                last_path = l["path"]
-                assert has_clip, l
-            x = get_remaining(len(self._clips), batch_size)
-            for _ in range(x):
-                self._clips.append((last_path, last_video, i))
-                # if self.infer:
-                #     video_path = l["video"]
-                # else:
-                #     video_path = os.path.join(video_dir, l["video"] + extension)
-                video_path = l["video"]
-                file_list_txt += f"{video_path} {cmp} {i * stride_dali} {end}\n"
-                cmp += 1
-            # print(file_list_txt)
-            tf = tempfile.NamedTemporaryFile()
-            tf.write(str.encode(file_list_txt))
-            tf.flush()
+            self._filenames, self._video_indices = _build_dali_filenames_and_labels(
+                self._labels
+            )
+            clip_count = 0
+            for video in self._labels:
+                num_clips = _count_dali_video_samples(
+                    video["num_frames"], self._clip_len, overlap_len
+                )
+                assert num_clips > 0, video
+                clip_count += num_clips
+            iterator_size = _pad_dali_iterator_size(clip_count, self.global_batch_size)
             self.pipes = [
                 self.video_pipe(
                     batch_size=self.batch_size,
+                    filenames=self._filenames,
+                    labels=self._video_indices,
                     sequence_length=self._clip_len,
                     stride_dali=stride_dali,
-                    step=-1,
+                    step=(self._clip_len - overlap_len) * stride_dali,
                     num_threads=8,
                     device_id=i,
-                    file_list=tf.name,
                     shard_id=index,
                     num_shards=len(devices),
                 )
@@ -1426,21 +1413,25 @@ if DALI_AVAILABLE:
             for pipe in self.pipes:
                 pipe.build()
-            size = len(self._clips)
-            super().__init__(self.pipes, output_map, size=size)
+            internal_output_map = ["data", "video_idx", "frame_num"]
+            super().__init__(self.pipes, internal_output_map, size=iterator_size)
         def __next__(self):
             import cupy
             out = super().__next__()
             video_names = []
-            starts = cupy.zeros(len(self.devices) * self.batch_size, np.int64)
+            total_samples = sum(batch["video_idx"].shape[0] for batch in out)
+            starts = cupy.zeros(total_samples, np.int64)
             cmp = 0
             for j in range(len(out)):
-                for i in range(out[j]["label"].shape[0]):
-                    video_path, video_name, start = self._clips[out[j]["label"][i]]
-                    video_names.append(video_path)
+                for i in range(out[j]["video_idx"].shape[0]):
+                    video_idx = int(out[j]["video_idx"][i].item())
+                    frame_num = int(out[j]["frame_num"][i].item())
+                    video_name, start = _resolve_dali_video_sample(
+                        self._labels, video_idx, frame_num, self._stride_dali
+                    )
+                    video_names.append(video_name)
                     starts[cmp] = start
                     cmp += 1
             return {
@@ -1460,14 +1451,22 @@ if DALI_AVAILABLE:
         @dali_pipeline_def
         def video_pipe(
-            self, file_list, sequence_length, stride_dali, step, shard_id, num_shards
+            self,
+            filenames,
+            labels,
+            sequence_length,
+            stride_dali,
+            step,
+            shard_id,
+            num_shards,
         ):
             """Construct the pipeline to process a video. This pipeline process a clip with specified arguments such as stride,step and sequence length.
             The first step returns clip of frames with associated labels (index of the clip in the list of clips) and the index of the first frame.
             The second step is the cropping, mirroring (only if non eval) and normalizing the frames.
             Args:
-                file_list (string): Path to the file with a list of <file label [start_frame [end_frame]]> values.
+                filenames (List[string]): Video files passed directly to DALI.
+                labels (List[int]): Video indices associated with filenames.
                 sequence_length (int): Frames to load per sequence.
                 stride_dali (int): Distance between consecutive frames in the sequence.
                 step(int): Frame interval between each sequence.
@@ -1478,17 +1477,18 @@ if DALI_AVAILABLE:
                 video (torch.tensor): The frames processed.
                 label : the index of the clip in the list of clips.
             """
-            video, label = fn.readers.video_resize(
+            video, video_idx, frame_num = fn.readers.video_resize(
                 device="gpu",
                 size=(self.TARGET_HEIGHT, self.TARGET_WIDTH),
-                file_list=file_list,
+                filenames=filenames,
+                labels=labels,
                 sequence_length=sequence_length,
                 random_shuffle=False,
                 shard_id=shard_id,
                 num_shards=num_shards,
                 image_type=types.RGB,
-                file_list_include_preceding_frame=True,
-                file_list_frame_num=True,
+                file_list_include_preceding_frame=False,
+                enable_frame_num=True,
                 stride=stride_dali,
                 step=step,
                 pad_sequences=True,
@@ -1505,7 +1505,7 @@ if DALI_AVAILABLE:
                 std=[self.IMAGENET_STD[i] * 255.0 for i in range(len(self.IMAGENET_STD))],
             )
-            return video, label
+            return video, video_idx, frame_num
         def get_dims(video):
             print(video.shape)

{opensportslib-0.1.2.dev11 → opensportslib-0.1.2.dev12/opensportslib.egg-info}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: opensportslib
-Version: 0.1.2.dev11
+Version: 0.1.2.dev12
 Summary: OpenSportsLib is the professional library, designed for advanced video understanding in sports. It provides state-of-the-art tools for action recognition, spotting, retrieval, and captioning, making it ideal for researchers, analysts, and developers working with sports video data.
 Author: Jeet Vora
 Requires-Python: >=3.12
@@ -27,6 +27,7 @@ Requires-Dist: pytorch-lightning
 Requires-Dist: pandas
 Requires-Dist: pyarrow
 Requires-Dist: huggingface_hub
+Requires-Dist: easydict
 Provides-Extra: test
 Requires-Dist: pytest; extra == "test"
 Requires-Dist: pytest-cov; extra == "test"

{opensportslib-0.1.2.dev11 → opensportslib-0.1.2.dev12}/opensportslib.egg-info/SOURCES.txt RENAMED Viewed

@@ -92,6 +92,7 @@ tests/test_classification_trainer_dataloader.py
 tests/test_config_utils_smoke.py
 tests/test_conversion_tools.py
 tests/test_hf_transfer_tools.py
+tests/test_localization_dali_filenames.py
 tests/test_package_smoke.py
 tests/test_public_apis_smoke.py
 tests/test_subset_train_infer_integration.py

{opensportslib-0.1.2.dev11 → opensportslib-0.1.2.dev12}/opensportslib.egg-info/requires.txt RENAMED Viewed

@@ -17,6 +17,7 @@ pytorch-lightning
 pandas
 pyarrow
 huggingface_hub
+easydict
 [:platform_system != "Darwin" and platform_machine != "arm64" and platform_machine != "aarch64"]
 decord

{opensportslib-0.1.2.dev11 → opensportslib-0.1.2.dev12}/pyproject.toml RENAMED Viewed

@@ -4,11 +4,11 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "opensportslib"
-version = "0.1.2.dev11"
+version = "0.1.2.dev12"
 description = "OpenSportsLib is the professional library, designed for advanced video understanding in sports. It provides state-of-the-art tools for action recognition, spotting, retrieval, and captioning, making it ideal for researchers, analysts, and developers working with sports video data."
 readme = "README.md"
 requires-python = ">=3.12"
-dependencies = [ "SoccerNet", "av", "decord; platform_system != 'Darwin' and platform_machine != 'arm64' and platform_machine != 'aarch64'", "evaluate", "scikit-learn", "torch", "torchvision", "transformers==4.57.3", "tokenizers==0.22.1", "accelerate", "wandb", "opencv-python", "omegaconf", "timm", "seaborn", "tabulate", "pytorch-lightning", "pandas", "pyarrow", "huggingface_hub",]
+dependencies = [ "SoccerNet", "av", "decord; platform_system != 'Darwin' and platform_machine != 'arm64' and platform_machine != 'aarch64'", "evaluate", "scikit-learn", "torch", "torchvision", "transformers==4.57.3", "tokenizers==0.22.1", "accelerate", "wandb", "opencv-python", "omegaconf", "timm", "seaborn", "tabulate", "pytorch-lightning", "pandas", "pyarrow", "huggingface_hub", "easydict",]
 [[project.authors]]
 name = "Jeet Vora"

opensportslib-0.1.2.dev12/tests/test_localization_dali_filenames.py ADDED Viewed

@@ -0,0 +1,59 @@
+import pytest
+from opensportslib.datasets.localization_dataset import (
+    _build_dali_filenames_and_labels,
+    _count_dali_video_samples,
+    _dali_frame_num_to_local_frame,
+    _pad_dali_iterator_size,
+    _resolve_dali_video_sample,
+)
+def test_build_dali_filenames_preserves_spaced_paths():
+    labels = [
+        {"video": "/tmp/with spaces/game one.mp4"},
+        {"video": "/tmp/plain/game-two.mp4"},
+    ]
+    filenames, label_indices = _build_dali_filenames_and_labels(labels)
+    assert filenames == [
+        "/tmp/with spaces/game one.mp4",
+        "/tmp/plain/game-two.mp4",
+    ]
+    assert label_indices == [0, 1]
+def test_dali_frame_numbers_translate_to_one_based_local_frames():
+    assert _dali_frame_num_to_local_frame(0, 12) == 1
+    assert _dali_frame_num_to_local_frame(12, 12) == 2
+    assert _dali_frame_num_to_local_frame(24, 12) == 3
+def test_count_dali_video_samples_matches_previous_manifest_schedule():
+    assert _count_dali_video_samples(10, 4, 1) == len(list(range(1, 10, 3)))
+    assert _count_dali_video_samples(101, 100, 50) == len(list(range(1, 101, 50)))
+def test_count_dali_video_samples_rejects_non_positive_step():
+    with pytest.raises(ValueError):
+        _count_dali_video_samples(10, 4, 4)
+def test_pad_dali_iterator_size_rounds_up_to_full_batch():
+    assert _pad_dali_iterator_size(10, 4) == 12
+    assert _pad_dali_iterator_size(12, 4) == 12
+def test_resolve_dali_video_sample_uses_relative_path_and_one_based_start():
+    labels = [
+        {
+            "path": "train/match one.mp4",
+            "video": "/abs/with spaces/train/match one.mp4",
+        }
+    ]
+    video_name, start = _resolve_dali_video_sample(labels, 0, 24, 12)
+    assert video_name == "train/match one.mp4"
+    assert start == 3