batchalign 0.7.6a18__tar.gz → 0.7.6a19__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (110) hide show
  1. {batchalign-0.7.6a18/batchalign.egg-info → batchalign-0.7.6a19}/PKG-INFO +1 -1
  2. batchalign-0.7.6a19/batchalign/pipelines/morphosyntax/en/irr.py +215 -0
  3. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/morphosyntax/ud.py +8 -1
  4. batchalign-0.7.6a19/batchalign/version +3 -0
  5. {batchalign-0.7.6a18 → batchalign-0.7.6a19/batchalign.egg-info}/PKG-INFO +1 -1
  6. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign.egg-info/SOURCES.txt +1 -0
  7. batchalign-0.7.6a18/batchalign/version +0 -3
  8. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/LICENSE +0 -0
  9. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/MANIFEST.in +0 -0
  10. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/README.md +0 -0
  11. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/__init__.py +0 -0
  12. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/__main__.py +0 -0
  13. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/cli/__init__.py +0 -0
  14. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/cli/cli.py +0 -0
  15. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/cli/dispatch.py +0 -0
  16. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/constants.py +0 -0
  17. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/document.py +0 -0
  18. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/errors.py +0 -0
  19. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/formats/__init__.py +0 -0
  20. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/formats/base.py +0 -0
  21. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/formats/chat/__init__.py +0 -0
  22. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/formats/chat/file.py +0 -0
  23. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/formats/chat/generator.py +0 -0
  24. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/formats/chat/lexer.py +0 -0
  25. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/formats/chat/parser.py +0 -0
  26. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/formats/chat/utils.py +0 -0
  27. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/formats/textgrid/__init__.py +0 -0
  28. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/formats/textgrid/file.py +0 -0
  29. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/formats/textgrid/generator.py +0 -0
  30. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/formats/textgrid/parser.py +0 -0
  31. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/models/__init__.py +0 -0
  32. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/models/resolve.py +0 -0
  33. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/models/speaker/__init__.py +0 -0
  34. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/models/speaker/config.yaml +0 -0
  35. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/models/speaker/infer.py +0 -0
  36. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/models/speaker/utils.py +0 -0
  37. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/models/training/__init__.py +0 -0
  38. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/models/training/run.py +0 -0
  39. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/models/training/utils.py +0 -0
  40. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/models/utils.py +0 -0
  41. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/models/utterance/__init__.py +0 -0
  42. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/models/utterance/dataset.py +0 -0
  43. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/models/utterance/execute.py +0 -0
  44. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/models/utterance/infer.py +0 -0
  45. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/models/utterance/prep.py +0 -0
  46. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/models/utterance/train.py +0 -0
  47. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/models/whisper/__init__.py +0 -0
  48. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/models/whisper/infer_asr.py +0 -0
  49. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/models/whisper/infer_fa.py +0 -0
  50. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/__init__.py +0 -0
  51. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/analysis/__init__.py +0 -0
  52. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/analysis/eval.py +0 -0
  53. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/asr/__init__.py +0 -0
  54. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/asr/rev.py +0 -0
  55. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/asr/utils.py +0 -0
  56. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/asr/whisper.py +0 -0
  57. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/asr/whisperx.py +0 -0
  58. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/base.py +0 -0
  59. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/cleanup/__init__.py +0 -0
  60. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  61. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  62. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  63. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/cleanup/retrace.py +0 -0
  64. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  65. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  66. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/cleanup/support/test.test +0 -0
  67. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/dispatch.py +0 -0
  68. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/fa/__init__.py +0 -0
  69. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  70. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  71. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  72. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  73. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  74. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/pipeline.py +0 -0
  75. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/speaker/__init__.py +0 -0
  76. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  77. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/utr/__init__.py +0 -0
  78. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/utr/rev_utr.py +0 -0
  79. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/utr/utils.py +0 -0
  80. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  81. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/utterance/__init__.py +0 -0
  82. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  83. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/tests/__init__.py +0 -0
  84. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/tests/conftest.py +0 -0
  85. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  86. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  87. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  88. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  89. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  90. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  91. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  92. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  93. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  94. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  95. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  96. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  97. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/tests/pipelines/fixures.py +0 -0
  98. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  99. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  100. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/tests/test_document.py +0 -0
  101. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/utils/__init__.py +0 -0
  102. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/utils/config.py +0 -0
  103. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/utils/dp.py +0 -0
  104. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign/utils/utils.py +0 -0
  105. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign.egg-info/dependency_links.txt +0 -0
  106. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign.egg-info/entry_points.txt +0 -0
  107. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign.egg-info/requires.txt +0 -0
  108. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/batchalign.egg-info/top_level.txt +0 -0
  109. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/setup.cfg +0 -0
  110. {batchalign-0.7.6a18 → batchalign-0.7.6a19}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.6a18
3
+ Version: 0.7.6a19
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -0,0 +1,215 @@
1
+ IRR = """
2
+ abide: abode, abode
3
+ arise: arose, arisen
4
+ awake: awoke, awoken
5
+ be: was, were, been
6
+ bear: bore, borne
7
+ beat: beat, beaten
8
+ become: became, become
9
+ befall: befell, befallen
10
+ begin: began, begun
11
+ beget: begot, begotten
12
+ behold: beheld, beholden
13
+ bend: bent, bent
14
+ bereave: bereft, bereft
15
+ beseek: besought, besought
16
+ bet: bet, bet
17
+ betake: betook, betaken
18
+ bid: bid, bid
19
+ bade: bidden, bidden
20
+ bind: bound, bound
21
+ bite: bit, bitten
22
+ bleed: bled, bled
23
+ blow: blew, blown
24
+ break: broke, broken
25
+ breed: bred, bred
26
+ bring: brought, brought
27
+ build: built, built
28
+ burn: burnt
29
+ burst: burst, burst
30
+ buy: bought, bought
31
+ cast: cast, cast
32
+ catch: caught, caught
33
+ choose: chose, chosen
34
+ clad: clad, clad
35
+ cleave: cleft, cloven
36
+ cling: clung, clung
37
+ come: came, come
38
+ cost: cost, cost
39
+ creep: crept, crept
40
+ cut: cut, cut
41
+ deal: dealt, dealt
42
+ dig: dug, dug
43
+ dive: dove, dived
44
+ do: did, done
45
+ draw: drew, drawn
46
+ dream: dreamt, dreamt
47
+ drink: drank, drunk
48
+ drive: drove, driven
49
+ dwell: dwelt, dwelt
50
+ eat: ate, eaten
51
+ fall: fell, fallen
52
+ feed: fed, fed
53
+ feel: felt, felt
54
+ fight: fought, fought
55
+ find: found, found
56
+ fit: fit, fit
57
+ flee: fled, fled
58
+ fling: flung, flung
59
+ fly: flew, flown
60
+ forbid: forbade, forbidden
61
+ forecast: forecast, forecast
62
+ forget: forgot, forgotten
63
+ forgo: forewent, foregone
64
+ foresee: foresaw, foreseen
65
+ foretell: foretold, foretold
66
+ forgive: forgave, forgiven
67
+ forsake: forsook, forsaken
68
+ forswear: forswore, forsworn
69
+ freeze: froze, frozen
70
+ get: got, gotten
71
+ gild: gilt
72
+ give: gave, given
73
+ go: went, gone
74
+ grind: ground, ground
75
+ grow: grew, grown
76
+ hang: hung, hung
77
+ have: had, had
78
+ hear: heard, heard
79
+ hew: hewn
80
+ hide: hid, hidden
81
+ hit: hit, hit
82
+ hold: held, held
83
+ hurt: hurt, hurt
84
+ inlay: inlaid, inlaid
85
+ inset: inset, inset
86
+ input: input, input
87
+ interlay: interlaid, interlaid
88
+ interweave: interwoven
89
+ keep: kept, kept
90
+ kneel: knelt, knelt
91
+ knit: knit, knit
92
+ know: knew, known
93
+ lay: laid, laid
94
+ lead: led, led
95
+ leap: leapt, leapt
96
+ led: led, led
97
+ leave: left, left
98
+ lend: lent, lent
99
+ let: let, let
100
+ lie: lay, lain
101
+ lose: lost, lost
102
+ make: made, made
103
+ mean: meant, meant
104
+ meet: met, met
105
+ misspeak: misspoke, mispoken
106
+ mistake: mistook, mistaken
107
+ offset: offset, offset
108
+ overdo: overdid, overdone
109
+ outbid: outbid, outbid
110
+ pay: paid, paid
111
+ partake: partook, partaken
112
+ plead: pled, pled
113
+ prepay: prepaid, prepaid
114
+ prove: proven
115
+ put: put, put
116
+ quit: quit, quit
117
+ recast: recast, recast
118
+ redo: redid, redone
119
+ remake: remade, remade
120
+ reset: reset, reset
121
+ read: read, read
122
+ rend: rent, rent
123
+ rid: rid, ridden
124
+ ride: rode, ridden
125
+ ring: rang, rung
126
+ rise: rose, risen
127
+ run: ran, run
128
+ say: said, said
129
+ seek: sought, sought
130
+ see: saw, seen
131
+ sell: sold, sold
132
+ send: sent, sent
133
+ set: set, set
134
+ sew: sewn
135
+ shake: shook, shaken
136
+ shave: shaven
137
+ shed: shed, shed
138
+ shine: shone, shone
139
+ shoot: shot, shot
140
+ show: shown
141
+ shrink: shrank, shrunk
142
+ shut: shut, shut
143
+ sing: sang, sung
144
+ sink: sank, sunk
145
+ sit: sat, sat
146
+ slay: slew, slain
147
+ sleep: slept, slept
148
+ slide: slid, slid
149
+ slink: slunk, slunk
150
+ slit: slit, slit
151
+ smite: smote, smitten
152
+ sneak: snuck, snuck
153
+ speak: spoke, spoken
154
+ speed: sped, sped
155
+ spend: spent, spent
156
+ spin: spun, spun
157
+ spit: spit, spit
158
+ split: split, split
159
+ spread: spread, spread
160
+ spring: sprang, sprung
161
+ stand: stood, stood
162
+ steal: stole, stolen
163
+ stick: stuck, stuck
164
+ sting: stung, stung
165
+ stink: stank, stunk
166
+ strew: strewn
167
+ strike: struck, struck
168
+ string: strung, strung
169
+ strive: strove, striven
170
+ swear: swore, sworn
171
+ sweep: swept, swept
172
+ swell: swollen
173
+ swim: swam, swum
174
+ swing: swung, swung
175
+ take: took, taken
176
+ teach: taught, taught
177
+ tear: tore, torn
178
+ tell: told, told
179
+ think: thought, thought
180
+ throw: threw, thrown
181
+ thrust: thrust, thrust
182
+ tread: trod, trod
183
+ unbend: unbended, unbent
184
+ underlie: underlay, underlain
185
+ undergo: underwent, undergone
186
+ understand: understood, understood
187
+ upset: upset, upset
188
+ wake: woke, woken
189
+ waylay: waylaid, waylaid
190
+ wear: wore, worn
191
+ weave: wove, woven
192
+ wed: wed, wed
193
+ weep: wept, wept
194
+ wet: wet, wet
195
+ win: won, won
196
+ wind: wound, wound
197
+ withdraw: withdrew, withdrawn
198
+ withhold: withheld, withheld
199
+ withstand: withstood, withstood
200
+ wring: wrung, wrung
201
+ write: wrote, written
202
+ wreak: wrought, wrough
203
+ """
204
+
205
+ def is_irregular(lemma, form):
206
+ proc = [[j.strip() for j in i.split(":")] for i in IRR.strip().split("\n")]
207
+ proc = {a.strip():
208
+ [k.strip() for k in b.strip().split(",")]
209
+ for (a,b) in proc}
210
+
211
+ res = proc.get(lemma.lower())
212
+ if not res:
213
+ return False
214
+ else:
215
+ return (form.lower() in res)
@@ -248,6 +248,13 @@ def handler__VERB(word, lang=None):
248
248
  polarity = feats.get("Polarity", "")
249
249
  polite = feats.get("Polite", "")
250
250
 
251
+ irr = False
252
+ if lang == "en" and tense == "Past":
253
+ from batchalign.pipelines.morphosyntax.en.irr import is_irregular
254
+ irr = is_irregular(word.lemma, word.text)
255
+ irr = "irr" if irr else ""
256
+
257
+
251
258
  res = handler(word, lang)
252
259
  if "sconj" in res:
253
260
  return res
@@ -256,7 +263,7 @@ def handler__VERB(word, lang=None):
256
263
  else:
257
264
  return res+flag+stringify_feats(aspect, mood,
258
265
  tense, polarity, polite,
259
- number[:1]+person)
266
+ number[:1]+person, irr)
260
267
 
261
268
  def handler__actual_PUNCT(word, lang=None):
262
269
  # actual punctuation handler
@@ -0,0 +1,3 @@
1
+ 0.7.6-alpha.19
2
+ October 16, 2024
3
+ past irregular
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: batchalign
3
- Version: 0.7.6a18
3
+ Version: 0.7.6a19
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -72,6 +72,7 @@ batchalign/pipelines/fa/whisper_fa.py
72
72
  batchalign/pipelines/morphosyntax/__init__.py
73
73
  batchalign/pipelines/morphosyntax/coref.py
74
74
  batchalign/pipelines/morphosyntax/ud.py
75
+ batchalign/pipelines/morphosyntax/en/irr.py
75
76
  batchalign/pipelines/morphosyntax/fr/case.py
76
77
  batchalign/pipelines/morphosyntax/ja/verbforms.py
77
78
  batchalign/pipelines/speaker/__init__.py
@@ -1,3 +0,0 @@
1
- 0.7.6-alpha.18
2
- October 16, 2024
3
- fixing asr for file names
File without changes
File without changes
File without changes
File without changes
File without changes