batchalign 0.7.20.post13__tar.gz → 0.7.20.post14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of batchalign might be problematic. Click here for more details.

Files changed (125) hide show
  1. {batchalign-0.7.20.post13/batchalign.egg-info → batchalign-0.7.20.post14}/PKG-INFO +2 -3
  2. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/analysis/eval.py +1 -1
  3. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/utils/abbrev.py +0 -18
  4. batchalign-0.7.20.post14/batchalign/version +3 -0
  5. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14/batchalign.egg-info}/PKG-INFO +2 -3
  6. batchalign-0.7.20.post13/batchalign/version +0 -3
  7. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/LICENSE +0 -0
  8. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/MANIFEST.in +0 -0
  9. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/README.md +0 -0
  10. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/__init__.py +0 -0
  11. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/__main__.py +0 -0
  12. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/cli/__init__.py +0 -0
  13. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/cli/cli.py +0 -0
  14. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/cli/dispatch.py +0 -0
  15. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/constants.py +0 -0
  16. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/document.py +0 -0
  17. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/errors.py +0 -0
  18. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/formats/__init__.py +0 -0
  19. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/formats/base.py +0 -0
  20. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/formats/chat/__init__.py +0 -0
  21. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/formats/chat/file.py +0 -0
  22. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/formats/chat/generator.py +0 -0
  23. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/formats/chat/lexer.py +0 -0
  24. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/formats/chat/parser.py +0 -0
  25. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/formats/chat/utils.py +0 -0
  26. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/formats/textgrid/__init__.py +0 -0
  27. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/formats/textgrid/file.py +0 -0
  28. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/formats/textgrid/generator.py +0 -0
  29. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/formats/textgrid/parser.py +0 -0
  30. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/models/__init__.py +0 -0
  31. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/models/resolve.py +0 -0
  32. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/models/speaker/__init__.py +0 -0
  33. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/models/speaker/config.yaml +0 -0
  34. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/models/speaker/infer.py +0 -0
  35. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/models/speaker/utils.py +0 -0
  36. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/models/training/__init__.py +0 -0
  37. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/models/training/run.py +0 -0
  38. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/models/training/utils.py +0 -0
  39. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/models/utils.py +0 -0
  40. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/models/utterance/__init__.py +0 -0
  41. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/models/utterance/cantonese_infer.py +0 -0
  42. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/models/utterance/dataset.py +0 -0
  43. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/models/utterance/execute.py +0 -0
  44. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/models/utterance/infer.py +0 -0
  45. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/models/utterance/prep.py +0 -0
  46. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/models/utterance/train.py +0 -0
  47. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/models/wave2vec/__init__.py +0 -0
  48. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/models/wave2vec/infer_fa.py +0 -0
  49. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/models/whisper/__init__.py +0 -0
  50. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/models/whisper/infer_asr.py +0 -0
  51. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/models/whisper/infer_fa.py +0 -0
  52. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/__init__.py +0 -0
  53. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/analysis/__init__.py +0 -0
  54. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/asr/__init__.py +0 -0
  55. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/asr/num2chinese.py +0 -0
  56. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/asr/oai_whisper.py +0 -0
  57. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/asr/rev.py +0 -0
  58. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/asr/utils.py +0 -0
  59. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/asr/whisper.py +0 -0
  60. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/asr/whisperx.py +0 -0
  61. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/base.py +0 -0
  62. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/cleanup/__init__.py +0 -0
  63. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  64. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  65. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  66. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/cleanup/retrace.py +0 -0
  67. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  68. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  69. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/cleanup/support/test.test +0 -0
  70. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/dispatch.py +0 -0
  71. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/fa/__init__.py +0 -0
  72. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  73. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  74. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  75. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  76. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  77. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  78. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  79. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  80. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  81. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  82. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/pipeline.py +0 -0
  83. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/speaker/__init__.py +0 -0
  84. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  85. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/translate/__init__.py +0 -0
  86. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/translate/gtrans.py +0 -0
  87. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/translate/seamless.py +0 -0
  88. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/translate/utils.py +0 -0
  89. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/utr/__init__.py +0 -0
  90. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/utr/rev_utr.py +0 -0
  91. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/utr/utils.py +0 -0
  92. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  93. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/utterance/__init__.py +0 -0
  94. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  95. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/tests/__init__.py +0 -0
  96. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/tests/conftest.py +0 -0
  97. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  98. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  99. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  100. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  101. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  102. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  103. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  104. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  105. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  106. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  107. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  108. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  109. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/tests/pipelines/fixures.py +0 -0
  110. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  111. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  112. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/tests/test_document.py +0 -0
  113. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/utils/__init__.py +0 -0
  114. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/utils/compounds.py +0 -0
  115. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/utils/config.py +0 -0
  116. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/utils/dp.py +0 -0
  117. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/utils/names.py +0 -0
  118. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign/utils/utils.py +0 -0
  119. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign.egg-info/SOURCES.txt +0 -0
  120. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign.egg-info/dependency_links.txt +0 -0
  121. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign.egg-info/entry_points.txt +0 -0
  122. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign.egg-info/requires.txt +0 -0
  123. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/batchalign.egg-info/top_level.txt +0 -0
  124. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/setup.cfg +0 -0
  125. {batchalign-0.7.20.post13 → batchalign-0.7.20.post14}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.4
1
+ Metadata-Version: 2.2
2
2
  Name: batchalign
3
- Version: 0.7.20.post13
3
+ Version: 0.7.20.post14
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -48,7 +48,6 @@ Dynamic: author-email
48
48
  Dynamic: classifier
49
49
  Dynamic: description
50
50
  Dynamic: description-content-type
51
- Dynamic: license-file
52
51
  Dynamic: provides-extra
53
52
  Dynamic: requires-dist
54
53
  Dynamic: summary
@@ -20,7 +20,7 @@ L = logging.getLogger("batchalign")
20
20
  joined_compounds = ["".join(k) for k in compounds]
21
21
  lowered_abbrev = [k.lower() for k in abbrev]
22
22
 
23
- fillers = ["um", "uhm", "em", "mhm", "uhhm", "eh", "uh"]
23
+ fillers = ["um", "uhm", "em", "mhm", "uhhm", "eh", "uh", "hm"]
24
24
  def conform(x):
25
25
  result = []
26
26
  for i in x:
@@ -3,8 +3,6 @@ abbrev = [
3
3
  "CIA",
4
4
  "NSA",
5
5
  "NATO",
6
- "UN",
7
- "WHO",
8
6
  "NASA",
9
7
  "CDC",
10
8
  "IRS",
@@ -29,11 +27,9 @@ abbrev = [
29
27
  "GDP",
30
28
  "LLC",
31
29
  "HR",
32
- "M&",
33
30
  "KPI",
34
31
  "GPA",
35
32
  "SAT",
36
- "ACT",
37
33
  "MBA",
38
34
  "PhD",
39
35
  "BA",
@@ -106,7 +102,6 @@ abbrev = [
106
102
  "ICU",
107
103
  "NICU",
108
104
  "ER",
109
- "OR",
110
105
  "PT",
111
106
  "OT",
112
107
  "EM",
@@ -274,7 +269,6 @@ abbrev = [
274
269
  "STD",
275
270
  "TBA",
276
271
  "WBC",
277
- "WHO",
278
272
  "BP",
279
273
  "CDC",
280
274
  "EMT",
@@ -295,7 +289,6 @@ abbrev = [
295
289
  "SOP",
296
290
  "BEd",
297
291
  "CPA",
298
- "DO",
299
292
  "HS",
300
293
  "CO2",
301
294
  "H2O",
@@ -313,8 +306,6 @@ abbrev = [
313
306
  "NMR",
314
307
  "PCR",
315
308
  "FOSS",
316
- "STEM",
317
- "Toxic",
318
309
  "VOCs",
319
310
  "BOD",
320
311
  "ASAP",
@@ -333,9 +324,7 @@ abbrev = [
333
324
  "TMI",
334
325
  "VIP",
335
326
  "WTF",
336
- "YOLO",
337
327
  "TLDR",
338
- "SNAFU",
339
328
  "BTW",
340
329
  "AFK",
341
330
  "BTO",
@@ -343,9 +332,7 @@ abbrev = [
343
332
  "DS",
344
333
  "ER",
345
334
  "FAQ",
346
- "GOAT",
347
335
  "HR",
348
- "IT",
349
336
  "KPI",
350
337
  "LTD",
351
338
  "MO",
@@ -362,7 +349,6 @@ abbrev = [
362
349
  "XO",
363
350
  "YT",
364
351
  "ZA",
365
- "AAA",
366
352
  "BBC",
367
353
  "CBO",
368
354
  "DBA",
@@ -372,7 +358,6 @@ abbrev = [
372
358
  "HALO",
373
359
  "ICU",
374
360
  "JD",
375
- "KISS",
376
361
  "LOA",
377
362
  "MOU",
378
363
  "NHS",
@@ -398,7 +383,6 @@ abbrev = [
398
383
  "HTML",
399
384
  "IPA",
400
385
  "JS",
401
- "KNOW",
402
386
  "LED",
403
387
  "MAC",
404
388
  "NTP",
@@ -410,7 +394,6 @@ abbrev = [
410
394
  "TRA",
411
395
  "USP",
412
396
  "VCR",
413
- "WASP",
414
397
  "XYZ",
415
398
  "YOY",
416
399
  "ZIP",
@@ -433,7 +416,6 @@ abbrev = [
433
416
  "QT",
434
417
  "RAD",
435
418
  "SAD",
436
- "TOEFL",
437
419
  "UNC",
438
420
  "VAN",
439
421
  "WC",
@@ -0,0 +1,3 @@
1
+ 0.7.20-post.14
2
+ August 05, 2025
3
+ benchmarking fixes
@@ -1,6 +1,6 @@
1
- Metadata-Version: 2.4
1
+ Metadata-Version: 2.2
2
2
  Name: batchalign
3
- Version: 0.7.20.post13
3
+ Version: 0.7.20.post14
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -48,7 +48,6 @@ Dynamic: author-email
48
48
  Dynamic: classifier
49
49
  Dynamic: description
50
50
  Dynamic: description-content-type
51
- Dynamic: license-file
52
51
  Dynamic: provides-extra
53
52
  Dynamic: requires-dist
54
53
  Dynamic: summary
@@ -1,3 +0,0 @@
1
- 0.7.20-post.13
2
- July 30th, 2025
3
- even more parsing patches?