pixeltable 0.2.23__py3-none-any.whl → 0.2.24__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pixeltable might be problematic. Click here for more details.

pixeltable/__version__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  # These version placeholders will be replaced during build.
2
- __version__ = "0.2.23"
3
- __version_tuple__ = (0, 2, 23)
2
+ __version__ = "0.2.24"
3
+ __version_tuple__ = (0, 2, 24)
@@ -426,12 +426,10 @@ def speech2text_for_conditional_generation(
426
426
  env.Env.get().require_package('torchaudio')
427
427
  env.Env.get().require_package('sentencepiece')
428
428
  device = resolve_torch_device('auto', allow_mps=False) # Doesn't seem to work on 'mps'; use 'cpu' instead
429
- import librosa
430
429
  import torch
430
+ import torchaudio # type: ignore[import-untyped]
431
431
  from transformers import Speech2TextForConditionalGeneration, Speech2TextProcessor
432
432
 
433
- # facebook/s2t-small-librispeech-asr
434
- # facebook/s2t-small-mustc-en-fr-st
435
433
  model = _lookup_model(model_id, Speech2TextForConditionalGeneration.from_pretrained, device=device)
436
434
  processor = _lookup_processor(model_id, Speech2TextProcessor.from_pretrained)
437
435
  assert isinstance(processor, Speech2TextProcessor)
@@ -445,12 +443,23 @@ def speech2text_for_conditional_generation(
445
443
 
446
444
  # Get the model's sampling rate. Default to 16 kHz (the standard) if not in config
447
445
  model_sampling_rate = getattr(model.config, 'sampling_rate', 16_000)
448
- waveform, sampling_rate = librosa.load(audio, sr=model_sampling_rate, mono=True)
446
+
447
+ waveform, sampling_rate = torchaudio.load(audio)
448
+
449
+ # Resample to the model's sampling rate, if necessary
450
+ if sampling_rate != model_sampling_rate:
451
+ waveform = torchaudio.transforms.Resample(sampling_rate, model_sampling_rate)(waveform)
452
+
453
+ # Average the channels to get a single-channel waveform as a 1D tensor (if the original waveform is already
454
+ # mono, this will simply squeeze the tensor)
455
+ assert waveform.dim() == 2
456
+ waveform = torch.mean(waveform, dim=0)
457
+ assert waveform.dim() == 1
449
458
 
450
459
  with torch.no_grad():
451
460
  inputs = processor(
452
461
  waveform,
453
- sampling_rate=sampling_rate,
462
+ sampling_rate=model_sampling_rate,
454
463
  return_tensors='pt'
455
464
  )
456
465
  generated_ids = model.generate(**inputs.to(device), forced_bos_token_id=forced_bos_token_id).to('cpu')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pixeltable
3
- Version: 0.2.23
3
+ Version: 0.2.24
4
4
  Summary: Pixeltable: The Multimodal AI Data Plane
5
5
  Author: Pixeltable, Inc.
6
6
  Author-email: contact@pixeltable.com
@@ -1,5 +1,5 @@
1
1
  pixeltable/__init__.py,sha256=gv2jvZ7H5tEjLear10E7hSH9jF5Mw6iSeryvRp88bOE,1391
2
- pixeltable/__version__.py,sha256=7KY-iWvvGLR_f-tQMjxiOTfMCj-z3KM_23EsiPZSTl8,114
2
+ pixeltable/__version__.py,sha256=w8n5Ad700zSFGVbR5bQpPtdgGwCXcl22svUmysoPXE0,114
3
3
  pixeltable/catalog/__init__.py,sha256=Ar6_F_6C7tkznIlCPBHVHDop5YssBDjKQr2NPQ21QCI,484
4
4
  pixeltable/catalog/catalog.py,sha256=tyDyI5wQw7vV6_FChrp9qgGCRClcjiSdW3eygYT0p9s,7849
5
5
  pixeltable/catalog/column.py,sha256=ezeKoGl6aBTzSZBihDA6vdETcvyCguAD4OsmrxWs73A,9595
@@ -74,7 +74,7 @@ pixeltable/functions/anthropic.py,sha256=P1E5o4-8QP1LTIUsWVgo_wMJ4WOnxtXUUXuFWUa
74
74
  pixeltable/functions/audio.py,sha256=7213nTnqKJ6vM9kalaoJ283OwX5SGEJN10vDhaRNZ6E,644
75
75
  pixeltable/functions/fireworks.py,sha256=qwFC_eIaDs-glxyJ_IVXaNGkpgPzeRsQ_SdpzueBxq0,2605
76
76
  pixeltable/functions/globals.py,sha256=pCFX2a_N87SwG9GxyPjSOC3TVMowMB6XIHSWKfFOuGE,3917
77
- pixeltable/functions/huggingface.py,sha256=hcScloxprIuyf89_bSnQi-N3VZRQosmZfBhmDJF_eTc,20814
77
+ pixeltable/functions/huggingface.py,sha256=s5KmOfi9-TOYyrL1Wv-voKP7ykkUN7LlLAA_uo01UQc,21210
78
78
  pixeltable/functions/image.py,sha256=3Qm4ybAT_o4YUl3bzhEXy8dKOwgZ7RCUV-ky-dbL_jc,13836
79
79
  pixeltable/functions/json.py,sha256=ehCnBA5WiIl-crV9PFVgmxrsWsiO8FpRs9LDwcSpLa4,879
80
80
  pixeltable/functions/llama_cpp.py,sha256=1awALuAXVpQH64l7vQlM8gvxLDix4c1-6DV3nG5RHu4,3881
@@ -146,8 +146,8 @@ pixeltable/utils/pytorch.py,sha256=6RvOCjy_QV4gc-aht-3d0zoASkuv-warfpl87vgmuKw,3
146
146
  pixeltable/utils/s3.py,sha256=huA5hxDGkPIu18zWet76o0FsO7Vbtp-iPmnOzCe-MvA,586
147
147
  pixeltable/utils/sql.py,sha256=j_tj0h4ffm-DhUIJbvGphxrVyBKlNTwDKqWGhRQ5_PY,795
148
148
  pixeltable/utils/transactional_directory.py,sha256=UGzCrGtLR3hEEf8sYGuWBzLVFAEQml3vdIavigWeTBM,1349
149
- pixeltable-0.2.23.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
150
- pixeltable-0.2.23.dist-info/METADATA,sha256=WYtMLp2PmlhHkQl6WUyvJFuJ2X-_lSB1VUO3i56dKwc,18112
151
- pixeltable-0.2.23.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
152
- pixeltable-0.2.23.dist-info/entry_points.txt,sha256=TNI1Gb5vPwFrTdw6TimSYjO8FeK8c_HuPr28vcf7o_I,108
153
- pixeltable-0.2.23.dist-info/RECORD,,
149
+ pixeltable-0.2.24.dist-info/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
150
+ pixeltable-0.2.24.dist-info/METADATA,sha256=pOqK9GV-mL4WjzzxeM9w4sJGZsJNSN05DhiTslxJLtk,18112
151
+ pixeltable-0.2.24.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
152
+ pixeltable-0.2.24.dist-info/entry_points.txt,sha256=TNI1Gb5vPwFrTdw6TimSYjO8FeK8c_HuPr28vcf7o_I,108
153
+ pixeltable-0.2.24.dist-info/RECORD,,