PyPI - pixeltable - Versions diffs - 0.2.23__py3-none-any.whl → 0.2.24__py3-none-any.whl - Mend

pixeltable 0.2.23py3-none-any.whl → 0.2.24py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of pixeltable might be problematic. Click here for more details.

Files changed (7) hide show

pixeltable/__version__.py CHANGED Viewed

@@ -1,3 +1,3 @@
 # These version placeholders will be replaced during build.
-__version__ = "0.2.23"
-__version_tuple__ = (0, 2, 23)
+__version__ = "0.2.24"
+__version_tuple__ = (0, 2, 24)

pixeltable/functions/huggingface.py CHANGED Viewed

@@ -426,12 +426,10 @@ def speech2text_for_conditional_generation(
     env.Env.get().require_package('torchaudio')
     env.Env.get().require_package('sentencepiece')
     device = resolve_torch_device('auto', allow_mps=False)  # Doesn't seem to work on 'mps'; use 'cpu' instead
-    import librosa
     import torch
+    import torchaudio  # type: ignore[import-untyped]
     from transformers import Speech2TextForConditionalGeneration, Speech2TextProcessor
-    # facebook/s2t-small-librispeech-asr
-    # facebook/s2t-small-mustc-en-fr-st
     model = _lookup_model(model_id, Speech2TextForConditionalGeneration.from_pretrained, device=device)
     processor = _lookup_processor(model_id, Speech2TextProcessor.from_pretrained)
     assert isinstance(processor, Speech2TextProcessor)
@@ -445,12 +443,23 @@ def speech2text_for_conditional_generation(
     # Get the model's sampling rate. Default to 16 kHz (the standard) if not in config
     model_sampling_rate = getattr(model.config, 'sampling_rate', 16_000)
-    waveform, sampling_rate = librosa.load(audio, sr=model_sampling_rate, mono=True)
+    waveform, sampling_rate = torchaudio.load(audio)
+    # Resample to the model's sampling rate, if necessary
+    if sampling_rate != model_sampling_rate:
+        waveform = torchaudio.transforms.Resample(sampling_rate, model_sampling_rate)(waveform)
+    # Average the channels to get a single-channel waveform as a 1D tensor (if the original waveform is already
+    # mono, this will simply squeeze the tensor)
+    assert waveform.dim() == 2
+    waveform = torch.mean(waveform, dim=0)
+    assert waveform.dim() == 1
     with torch.no_grad():
         inputs = processor(
             waveform,
-            sampling_rate=sampling_rate,
+            sampling_rate=model_sampling_rate,
             return_tensors='pt'
         )
         generated_ids = model.generate(**inputs.to(device), forced_bos_token_id=forced_bos_token_id).to('cpu')

{pixeltable-0.2.23.dist-info → pixeltable-0.2.24.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: pixeltable
-Version: 0.2.23
+Version: 0.2.24
 Summary: Pixeltable: The Multimodal AI Data Plane
 Author: Pixeltable, Inc.
 Author-email: contact@pixeltable.com

{pixeltable-0.2.23.dist-info → pixeltable-0.2.24.dist-info}/RECORD RENAMED Viewed

@@ -1,5 +1,5 @@
 pixeltable/__init__.py,sha256=gv2jvZ7H5tEjLear10E7hSH9jF5Mw6iSeryvRp88bOE,1391
-pixeltable/__version__.py,sha256=7KY-iWvvGLR_f-tQMjxiOTfMCj-z3KM_23EsiPZSTl8,114
+pixeltable/__version__.py,sha256=w8n5Ad700zSFGVbR5bQpPtdgGwCXcl22svUmysoPXE0,114
 pixeltable/catalog/__init__.py,sha256=Ar6_F_6C7tkznIlCPBHVHDop5YssBDjKQr2NPQ21QCI,484
 pixeltable/catalog/catalog.py,sha256=tyDyI5wQw7vV6_FChrp9qgGCRClcjiSdW3eygYT0p9s,7849
 pixeltable/catalog/column.py,sha256=ezeKoGl6aBTzSZBihDA6vdETcvyCguAD4OsmrxWs73A,9595
@@ -74,7 +74,7 @@ pixeltable/functions/anthropic.py,sha256=P1E5o4-8QP1LTIUsWVgo_wMJ4WOnxtXUUXuFWUa
 pixeltable/functions/audio.py,sha256=7213nTnqKJ6vM9kalaoJ283OwX5SGEJN10vDhaRNZ6E,644
 pixeltable/functions/fireworks.py,sha256=qwFC_eIaDs-glxyJ_IVXaNGkpgPzeRsQ_SdpzueBxq0,2605
 pixeltable/functions/globals.py,sha256=pCFX2a_N87SwG9GxyPjSOC3TVMowMB6XIHSWKfFOuGE,3917
-pixeltable/functions/huggingface.py,sha256=hcScloxprIuyf89_bSnQi-N3VZRQosmZfBhmDJF_eTc,20814
+pixeltable/functions/huggingface.py,sha256=s5KmOfi9-TOYyrL1Wv-voKP7ykkUN7LlLAA_uo01UQc,21210
 pixeltable/functions/image.py,sha256=3Qm4ybAT_o4YUl3bzhEXy8dKOwgZ7RCUV-ky-dbL_jc,13836
 pixeltable/functions/json.py,sha256=ehCnBA5WiIl-crV9PFVgmxrsWsiO8FpRs9LDwcSpLa4,879
 pixeltable/functions/llama_cpp.py,sha256=1awALuAXVpQH64l7vQlM8gvxLDix4c1-6DV3nG5RHu4,3881
@@ -146,8 +146,8 @@ pixeltable/utils/pytorch.py,sha256=6RvOCjy_QV4gc-aht-3d0zoASkuv-warfpl87vgmuKw,3
 pixeltable/utils/s3.py,sha256=huA5hxDGkPIu18zWet76o0FsO7Vbtp-iPmnOzCe-MvA,586
 pixeltable/utils/sql.py,sha256=j_tj0h4ffm-DhUIJbvGphxrVyBKlNTwDKqWGhRQ5_PY,795
 pixeltable/utils/transactional_directory.py,sha256=UGzCrGtLR3hEEf8sYGuWBzLVFAEQml3vdIavigWeTBM,1349
-pixeltable-0.2.23.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-pixeltable-0.2.23.dist-info/METADATA,sha256=WYtMLp2PmlhHkQl6WUyvJFuJ2X-_lSB1VUO3i56dKwc,18112
-pixeltable-0.2.23.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
-pixeltable-0.2.23.dist-info/entry_points.txt,sha256=TNI1Gb5vPwFrTdw6TimSYjO8FeK8c_HuPr28vcf7o_I,108
-pixeltable-0.2.23.dist-info/RECORD,,
+pixeltable-0.2.24.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+pixeltable-0.2.24.dist-info/METADATA,sha256=pOqK9GV-mL4WjzzxeM9w4sJGZsJNSN05DhiTslxJLtk,18112
+pixeltable-0.2.24.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
+pixeltable-0.2.24.dist-info/entry_points.txt,sha256=TNI1Gb5vPwFrTdw6TimSYjO8FeK8c_HuPr28vcf7o_I,108
+pixeltable-0.2.24.dist-info/RECORD,,

{pixeltable-0.2.23.dist-info → pixeltable-0.2.24.dist-info}/LICENSE RENAMED Viewed

File without changes

{pixeltable-0.2.23.dist-info → pixeltable-0.2.24.dist-info}/WHEEL RENAMED Viewed

File without changes

{pixeltable-0.2.23.dist-info → pixeltable-0.2.24.dist-info}/entry_points.txt RENAMED Viewed

File without changes

pixeltable 0.2.23__py3-none-any.whl → 0.2.24__py3-none-any.whl

Potentially problematic release.

pixeltable 0.2.23py3-none-any.whl → 0.2.24py3-none-any.whl