batchalign 0.7.19.post11__tar.gz → 0.7.19.post15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of batchalign might be problematic. Click here for more details.

Files changed (125) hide show
  1. {batchalign-0.7.19.post11/batchalign.egg-info → batchalign-0.7.19.post15}/PKG-INFO +1 -1
  2. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/analysis/eval.py +61 -23
  3. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/asr/utils.py +31 -3
  4. batchalign-0.7.19.post15/batchalign/utils/compounds.py +1 -0
  5. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/utils/dp.py +6 -5
  6. batchalign-0.7.19.post15/batchalign/version +3 -0
  7. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15/batchalign.egg-info}/PKG-INFO +1 -1
  8. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign.egg-info/SOURCES.txt +1 -0
  9. batchalign-0.7.19.post11/batchalign/version +0 -3
  10. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/LICENSE +0 -0
  11. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/MANIFEST.in +0 -0
  12. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/README.md +0 -0
  13. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/__init__.py +0 -0
  14. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/__main__.py +0 -0
  15. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/cli/__init__.py +0 -0
  16. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/cli/cli.py +0 -0
  17. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/cli/dispatch.py +0 -0
  18. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/constants.py +0 -0
  19. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/document.py +0 -0
  20. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/errors.py +0 -0
  21. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/formats/__init__.py +0 -0
  22. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/formats/base.py +0 -0
  23. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/formats/chat/__init__.py +0 -0
  24. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/formats/chat/file.py +0 -0
  25. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/formats/chat/generator.py +0 -0
  26. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/formats/chat/lexer.py +0 -0
  27. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/formats/chat/parser.py +0 -0
  28. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/formats/chat/utils.py +0 -0
  29. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/formats/textgrid/__init__.py +0 -0
  30. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/formats/textgrid/file.py +0 -0
  31. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/formats/textgrid/generator.py +0 -0
  32. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/formats/textgrid/parser.py +0 -0
  33. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/models/__init__.py +0 -0
  34. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/models/resolve.py +0 -0
  35. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/models/speaker/__init__.py +0 -0
  36. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/models/speaker/config.yaml +0 -0
  37. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/models/speaker/infer.py +0 -0
  38. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/models/speaker/utils.py +0 -0
  39. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/models/training/__init__.py +0 -0
  40. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/models/training/run.py +0 -0
  41. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/models/training/utils.py +0 -0
  42. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/models/utils.py +0 -0
  43. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/models/utterance/__init__.py +0 -0
  44. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/models/utterance/cantonese_infer.py +0 -0
  45. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/models/utterance/dataset.py +0 -0
  46. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/models/utterance/execute.py +0 -0
  47. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/models/utterance/infer.py +0 -0
  48. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/models/utterance/prep.py +0 -0
  49. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/models/utterance/train.py +0 -0
  50. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/models/wave2vec/__init__.py +0 -0
  51. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/models/wave2vec/infer_fa.py +0 -0
  52. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/models/whisper/__init__.py +0 -0
  53. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/models/whisper/infer_asr.py +0 -0
  54. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/models/whisper/infer_fa.py +0 -0
  55. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/__init__.py +0 -0
  56. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/analysis/__init__.py +0 -0
  57. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/asr/__init__.py +0 -0
  58. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/asr/num2chinese.py +0 -0
  59. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/asr/oai_whisper.py +0 -0
  60. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/asr/rev.py +0 -0
  61. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/asr/whisper.py +0 -0
  62. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/asr/whisperx.py +0 -0
  63. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/base.py +0 -0
  64. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/cleanup/__init__.py +0 -0
  65. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/cleanup/cleanup.py +0 -0
  66. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/cleanup/disfluencies.py +0 -0
  67. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/cleanup/parse_support.py +0 -0
  68. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/cleanup/retrace.py +0 -0
  69. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/cleanup/support/filled_pauses.eng +0 -0
  70. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/cleanup/support/replacements.eng +0 -0
  71. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/cleanup/support/test.test +0 -0
  72. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/dispatch.py +0 -0
  73. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/fa/__init__.py +0 -0
  74. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/fa/wave2vec_fa.py +0 -0
  75. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/fa/whisper_fa.py +0 -0
  76. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/morphosyntax/__init__.py +0 -0
  77. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/morphosyntax/coref.py +0 -0
  78. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/morphosyntax/en/irr.py +0 -0
  79. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/morphosyntax/fr/apm.py +0 -0
  80. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/morphosyntax/fr/apmn.py +0 -0
  81. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/morphosyntax/fr/case.py +0 -0
  82. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/morphosyntax/ja/verbforms.py +0 -0
  83. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/morphosyntax/ud.py +0 -0
  84. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/pipeline.py +0 -0
  85. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/speaker/__init__.py +0 -0
  86. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/speaker/nemo_speaker.py +0 -0
  87. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/translate/__init__.py +0 -0
  88. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/translate/gtrans.py +0 -0
  89. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/translate/seamless.py +0 -0
  90. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/translate/utils.py +0 -0
  91. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/utr/__init__.py +0 -0
  92. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/utr/rev_utr.py +0 -0
  93. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/utr/utils.py +0 -0
  94. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/utr/whisper_utr.py +0 -0
  95. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/utterance/__init__.py +0 -0
  96. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/pipelines/utterance/ud_utterance.py +0 -0
  97. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/tests/__init__.py +0 -0
  98. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/tests/conftest.py +0 -0
  99. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/tests/formats/chat/test_chat_file.py +0 -0
  100. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/tests/formats/chat/test_chat_generator.py +0 -0
  101. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/tests/formats/chat/test_chat_lexer.py +0 -0
  102. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/tests/formats/chat/test_chat_parser.py +0 -0
  103. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/tests/formats/chat/test_chat_utils.py +0 -0
  104. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/tests/formats/textgrid/test_textgrid.py +0 -0
  105. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/tests/pipelines/analysis/test_eval.py +0 -0
  106. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/tests/pipelines/asr/test_asr_pipeline.py +0 -0
  107. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/tests/pipelines/asr/test_asr_utils.py +0 -0
  108. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/tests/pipelines/cleanup/test_disfluency.py +0 -0
  109. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/tests/pipelines/cleanup/test_parse_support.py +0 -0
  110. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/tests/pipelines/fa/test_fa_pipeline.py +0 -0
  111. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/tests/pipelines/fixures.py +0 -0
  112. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/tests/pipelines/test_pipeline.py +0 -0
  113. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/tests/pipelines/test_pipeline_models.py +0 -0
  114. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/tests/test_document.py +0 -0
  115. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/utils/__init__.py +0 -0
  116. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/utils/abbrev.py +0 -0
  117. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/utils/config.py +0 -0
  118. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/utils/names.py +0 -0
  119. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign/utils/utils.py +0 -0
  120. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign.egg-info/dependency_links.txt +0 -0
  121. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign.egg-info/entry_points.txt +0 -0
  122. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign.egg-info/requires.txt +0 -0
  123. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/batchalign.egg-info/top_level.txt +0 -0
  124. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/setup.cfg +0 -0
  125. {batchalign-0.7.19.post11 → batchalign-0.7.19.post15}/setup.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: batchalign
3
- Version: 0.7.19.post11
3
+ Version: 0.7.19.post15
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -11,25 +11,47 @@ from batchalign.utils.config import config_read
11
11
 
12
12
  from batchalign.utils.dp import align, ExtraType, Extra, Match
13
13
  from batchalign.utils.names import names
14
+ from batchalign.utils.compounds import compounds
14
15
 
15
16
  import logging
16
17
  L = logging.getLogger("batchalign")
17
18
 
19
+ joined_compounds = ["".join(k) for k in compounds]
20
+
21
+ fillers = ["um", "uhm", "em", "mhm", "uhhm", "eh", "uh"]
18
22
  def conform(x):
19
23
  result = []
20
24
  for i in x:
21
- if "'s" in i.strip():
25
+ if i.strip() in joined_compounds:
26
+ for k in compounds[joined_compounds.index(i.strip())]:
27
+ result.append(k)
28
+ elif "'s" in i.strip():
22
29
  result.append(i.split("'")[0])
23
30
  result.append("is")
24
- elif "americanstyle" == i.strip():
25
- result.append("american")
26
- result.append("style")
27
- elif "postwar" == i.strip():
28
- result.append("post")
29
- result.append("war")
30
- elif "postwar" == i.strip():
31
- result.append("post")
32
- result.append("war")
31
+ elif "'ve" in i.strip():
32
+ result.append(i.split("'")[0])
33
+ result.append("have")
34
+ elif "'s" in i.strip():
35
+ result.append(i.split("'")[0])
36
+ result.append("is")
37
+ elif "'d" in i.strip():
38
+ result.append(i.split("'")[0])
39
+ result.append("had")
40
+ elif "'m" in i.strip():
41
+ result.append(i.split("'")[0])
42
+ result.append("am")
43
+ elif i.strip() in fillers:
44
+ result.append("um")
45
+ elif "-" in i.strip():
46
+ result += [k.strip() for k in i.split("-")]
47
+ elif "ok" == i.strip():
48
+ result.append("okay")
49
+ elif "gimme" == i.strip():
50
+ result.append("give")
51
+ result.append("me")
52
+ elif "hafta" == i.strip() or "havta" == i.strip():
53
+ result.append("have")
54
+ result.append("to")
33
55
  elif i.strip() in names:
34
56
  result.append("name")
35
57
  elif "dunno" == i.strip():
@@ -38,21 +60,16 @@ def conform(x):
38
60
  elif "wanna" == i.strip():
39
61
  result.append("want")
40
62
  result.append("to")
63
+ elif "bbc" == i.strip():
64
+ result.append("b")
65
+ result.append("b")
66
+ result.append("c")
41
67
  elif "ii" == i.strip():
42
68
  result.append("i")
43
69
  result.append("i")
44
70
  elif "i'd" == i.strip():
45
71
  result.append("i")
46
72
  result.append("had")
47
- elif "tshirts" == i.strip():
48
- result.append("t")
49
- result.append("shirts")
50
- elif "tshirts" == i.strip():
51
- result.append("t")
52
- result.append("shirts")
53
- elif "anytime" == i.strip():
54
- result.append("any")
55
- result.append("time")
56
73
  elif "alright" == i.strip():
57
74
  result.append("all")
58
75
  result.append("right")
@@ -73,18 +90,34 @@ def conform(x):
73
90
  elif "gotta" == i.strip():
74
91
  result.append("got")
75
92
  result.append("to")
93
+ elif "hadta" == i.strip():
94
+ result.append("had")
95
+ result.append("to")
76
96
  elif "eh" == i.strip():
77
97
  result.append("uh")
78
98
  elif "kinda" == i.strip():
79
- result.append("a")
80
99
  result.append("kind")
81
100
  result.append("of")
101
+ elif "gonna" == i.strip():
102
+ result.append("going")
103
+ result.append("to")
104
+ elif "shoulda" == i.strip():
105
+ result.append("should")
106
+ result.append("have")
107
+ elif "sposta" == i.strip():
108
+ result.append("supposed")
109
+ result.append("to")
82
110
  elif "farmhouse" == i.strip():
83
111
  result.append("farm")
84
112
  result.append("house")
85
113
  elif "aa" == i.strip():
86
114
  result.append("a")
87
115
  result.append("a")
116
+ elif "aa" == i.strip():
117
+ result.append("a")
118
+ result.append("a")
119
+ elif "em" == i.strip():
120
+ result.append("them")
88
121
  elif "hmm" == i.strip():
89
122
  result.append("hm")
90
123
  elif "_" in i.strip():
@@ -95,6 +128,11 @@ def conform(x):
95
128
 
96
129
  return result
97
130
 
131
+ def match_fn(x,y):
132
+ return (y == x or
133
+ y.replace("(", "").replace(")", "") == x.replace("(", "").replace(")", "") or
134
+ re.sub(r"\((.*)\)",r"", y) == x or re.sub(r"\((.*)\)",r"", x) == y)
135
+
98
136
  class EvaluationEngine(BatchalignEngine):
99
137
  tasks = [ Task.WER ]
100
138
 
@@ -107,8 +145,8 @@ class EvaluationEngine(BatchalignEngine):
107
145
  forms = [i.replace("-", "") for i in forms if i.strip() not in MOR_PUNCT+ENDING_PUNCT]
108
146
  gold_forms = [i.replace("-", "") for i in gold_forms if i.strip() not in MOR_PUNCT+ENDING_PUNCT]
109
147
 
110
- forms = [re.sub(r"\((.*)\)",r"", i) for i in forms]
111
- gold_forms = [re.sub(r"\((.*)\)",r"", i) for i in gold_forms]
148
+ # forms = [re.sub(r"\((.*)\)",r"", i) for i in forms]
149
+ # gold_forms = [re.sub(r"\((.*)\)",r"", i) for i in gold_forms]
112
150
 
113
151
  # if there are single letter frames, we combine them tofgether
114
152
  # until the utterance is done or there isn't any left
@@ -149,7 +187,7 @@ class EvaluationEngine(BatchalignEngine):
149
187
  forms_final = conform(forms_final)
150
188
 
151
189
  # dp!
152
- alignment = align(forms_final, gold_final, False)
190
+ alignment = align(forms_final, gold_final, False, match_fn)
153
191
 
154
192
  # calculate each type of error
155
193
  sub = 0
@@ -8,6 +8,34 @@ from batchalign.pipelines.asr.num2chinese import num2chinese
8
8
  from num2words import num2words
9
9
  import pycountry
10
10
 
11
+ from batchalign.utils.compounds import compounds
12
+
13
+ from copy import deepcopy
14
+
15
+ def merge_on_wordlist(x):
16
+ """merges generation list x on compounds"""
17
+ x = deepcopy(x)
18
+ if len(x) < 2:
19
+ return x
20
+
21
+ emit = []
22
+ buf = []
23
+ while len(x) > 0:
24
+ while len(x) > 0 and len(buf) < 2:
25
+ buf.append(x.pop(0))
26
+ if [i["value"] for i in buf] in compounds:
27
+ emit.append({
28
+ "value": "".join([i["value"] for i in buf]),
29
+ "ts": buf[0]["ts"],
30
+ "end_ts": buf[-1]["ts"],
31
+ "type": "text",
32
+ })
33
+ buf = []
34
+ else:
35
+ emit.append(buf.pop(0))
36
+ emit += buf
37
+
38
+ return emit
11
39
 
12
40
  def retokenize(intermediate_output):
13
41
  """Retokenize the output of the ASR system from one giant blob to utterances
@@ -134,15 +162,15 @@ def process_generation(output, lang="eng", utterance_engine=None):
134
162
 
135
163
  for utterance in output["monologues"]:
136
164
  # get a list of words
137
- words = utterance["elements"]
165
+ words = merge_on_wordlist(utterance["elements"])
138
166
  # coallate words (not punct) into the shape we expect
139
167
  # which is ['word', [start_ms, end_ms]]. Yes, this would
140
168
  # involve multiplying by 1000 to s => ms
141
169
  words = [[i["value"], [round(i["ts"]*1000) if i.get("ts") != None else None,
142
- round(i["end_ts"]*1000) if i.get("end_ts") != None else None]] # the shape
170
+ round(i["end_ts"]*1000) if i.get("end_ts") != None else the]] # None shape
143
171
  for i in words # for each word
144
172
  if i["value"].strip() != "" and
145
- not re.match(r'<.*>', i["value"])] # if its text (i.e. not "pause")
173
+ not re.match(r'<.*>', i["value"])] # if its text (i.e. not "pause")
146
174
 
147
175
  # sometimes, the system outputs two forms with a space as one single
148
176
  # word. we need to interpolate the space between them
@@ -0,0 +1 @@
1
+ compounds = [['air', 'bag'], ['air', 'boat'], ['air', 'con'], ['air', 'conditioner'], ['air', 'craft'], ['air', 'fare'], ['air', 'field'], ['air', 'flow'], ['air', 'force'], ['air', 'frame'], ['air', 'head'], ['air', 'lift'], ['air', 'line'], ['air', 'liner'], ['air', 'lock'], ['air', 'mail'], ['air', 'man'], ['air', 'mask'], ['air', 'plane'], ['air', 'play'], ['air', 'port'], ['air', 'ship'], ['air', 'speed'], ['air', 'strip'], ['air', 'wave'], ['air', 'way'], ['alley', 'way'], ['anchor', 'man'], ['angel', 'fish'], ['angel', 'food'], ['angle', 'worm'], ['angler', 'fish'], ['ant', 'catcher'], ['ant', 'eater'], ['ant', 'hill'], ['ape', 'man'], ['apple', 'jack'], ['apple', 'sauce'], ['apron', 'string'], ['arch', 'way'], ['arm', 'band'], ['arm', 'chair'], ['arm', 'hole'], ['arm', 'load'], ['arm', 'pit'], ['arm', 'rest'], ['arm', 'sling'], ['aroma', 'therapy'], ['arrow', 'head'], ['art', 'work'], ['ash', 'bin'], ['ash', 'fall'], ['ash', 'tray'], ['ass', 'hole'], ['auto', 'bank'], ['baby', 'bath'], ['baby', 'bottle'], ['baby', 'cake'], ['baby', 'doll'], ['baby', 'food'], ['baby', 'girl'], ['baby', 'seat'], ['baby', 'talk'], ['back', 'ache'], ['back', 'bend'], ['back', 'board'], ['back', 'bone'], ['back', 'burner'], ['back', 'drop'], ['back', 'ground'], ['back', 'hand'], ['back', 'lane'], ['back', 'log'], ['back', 'man'], ['back', 'pack'], ['back', 'rest'], ['back', 'scrubber'], ['back', 'seat'], ['back', 'side'], ['back', 'space'], ['back', 'stitch'], ['back', 'story'], ['back', 'stroke'], ['back', 'swamp'], ['back', 'yard'], ['bag', 'lady'], ['bag', 'pipe'], ['bake', 'shop'], ['baker', 'man'], ['american', 'style'], ['any', 'time'], ['t', 'shirts'], ['non', 'verbals'], ['non', 'verbal'], ['post', 'war'], ['ball', 'game'], ['ball', 'gown'], ['ball', 'park'], ['ball', 'pen'], ['ball', 'player'], ['ball', 'point'], ['ball', 'room'], ['balloons', 'man'], ['band', 'hat'], ['band', 'stand'], ['band', 'wagon'], ['band', 'width'], ['bar', 'bell'], ['bar', 'code'], ['bar', 'man'], ['bar', 'room'], ['bar', 'stool'], ['barber', 'shop'], ['barn', 'stormer'], ['barn', 'yard'], ['base', 'ball'], ['base', 'board'], ['base', 'line'], ['base', 'man'], ['basket', 'ball'], ['bat', 'mobile'], ['bat', 'room'], ['bath', 'house'], ['bath', 'robe'], ['bath', 'room'], ['bath', 'time'], ['bath', 'tub'], ['bath', 'water'], ['bathing', 'suit'], ['battle', 'axe'], ['battle', 'cry'], ['battle', 'field'], ['battle', 'front'], ['battle', 'ground'], ['battle', 'ship'], ['battle', 'star'], ['bay', 'berry'], ['bay', 'leaf'], ['beach', 'ball'], ['beach', 'head'], ['bead', 'work'], ['beads', 'man'], ['bean', 'bag'], ['bean', 'stalk'], ['bear', 'hug'], ['beat', 'box'], ['beaver', 'tail'], ['bed', 'bug'], ['bed', 'cover'], ['bed', 'fellow'], ['bed', 'ground'], ['bed', 'head'], ['bed', 'hog'], ['bed', 'lamp'], ['bed', 'mattress'], ['bed', 'pan'], ['bed', 'post'], ['bed', 'rail'], ['bed', 'rock'], ['bed', 'room'], ['bed', 'sheet'], ['bed', 'side'], ['bed', 'spread'], ['bed', 'spring'], ['bed', 'straw'], ['bed', 'time'], ['bee', 'house'], ['bee', 'keeper'], ['bee', 'line'], ['beef', 'burger'], ['bees', 'wax'], ['beet', 'root'], ['beetle', 'car'], ['bell', 'bottom'], ['bell', 'boy'], ['belly', 'ache'], ['belly', 'button'], ['belt', 'way'], ['bench', 'mark'], ['bill', 'board'], ['bin', 'bag'], ['bin', 'boy'], ['bin', 'lorry'], ['bird', 'bath'], ['bird', 'brain'], ['bird', 'cage'], ['bird', 'feeder'], ['bird', 'house'], ['bird', 'seed'], ['bird', 'song'], ['birth', 'control'], ['birth', 'date'], ['birth', 'day'], ['birth', 'mark'], ['birth', 'place'], ['birth', 'right'], ['birth', 'stone'], ['bit', 'coin'], ['block', 'buster'], ['block', 'head'], ['block', 'house'], ['blood', 'hound'], ['blood', 'lust'], ['blood', 'root'], ['blood', 'stream'], ['blood', 'sucker'], ['blood', 'work'], ['board', 'room'], ['board', 'walk'], ['boat', 'builder'], ['boat', 'house'], ['boat', 'load'], ['boat', 'man'], ['boat', 'swain'], ['boat', 'well'], ['boat', 'yard'], ['boats', 'man'], ['bob', 'sled'], ['bobby', 'pin'], ['body', 'builder'], ['body', 'guard'], ['body', 'suit'], ['body', 'wash'], ['body', 'wind'], ['bogey', 'man'], ['boiler', 'plate'], ['bolt', 'hole'], ['bomb', 'shell'], ['bonds', 'man'], ['bone', 'head'], ['boogey', 'man'], ['boogy', 'man'], ['book', 'bag'], ['book', 'case'], ['book', 'end'], ['book', 'keeper'], ['book', 'mark'], ['book', 'mobile'], ['book', 'seller'], ['book', 'shelf'], ['book', 'shelves'], ['book', 'shop'], ['book', 'stand'], ['book', 'store'], ['book', 'work'], ['book', 'worm'], ['boom', 'box'], ['boom', 'town'], ['boot', 'camp'], ['boot', 'strap'], ['border', 'land'], ['border', 'line'], ['boss', 'man'], ['bossy', 'boots'], ['bottle', 'cap'], ['bottle', 'neck'], ['bottom', 'land'], ['bottom', 'side'], ['bow', 'tie'], ['bows', 'man'], ['box', 'car'], ['box', 'fish'], ['boy', 'friend'], ['boy', 'scout'], ['brain', 'child'], ['brain', 'storm'], ['brain', 'wave'], ['brake', 'man'], ['bramble', 'berry'], ['branch', 'line'], ['bread', 'box'], ['bread', 'crumb'], ['bread', 'stick'], ['bread', 'winner'], ['break', 'dancer'], ['break', 'time'], ['breakfast', 'time'], ['breast', 'bone'], ['breast', 'plate'], ['breast', 'stroke'], ['breeze', 'way'], ['briar', 'wood'], ['brick', 'layer'], ['brick', 'mason'], ['brick', 'yard'], ['bride', 'groom'], ['brides', 'maid'], ['bridge', 'head'], ['bridge', 'work'], ['brief', 'case'], ['broom', 'stick'], ['brush', 'cut'], ['brush', 'fire'], ['brush', 'stroke'], ['brush', 'work'], ['bubble', 'bath'], ['bubble', 'gum'], ['bubble', 'wand'], ['buck', 'board'], ['buck', 'eye'], ['buck', 'shot'], ['buck', 'skin'], ['buck', 'wheat'], ['bucket', 'load'], ['buffalo', 'burger'], ['bug', 'bed'], ['bug', 'house'], ['buggy', 'edger'], ['bulk', 'head'], ["bull's", 'eye'], ['bull', 'crap'], ['bull', 'dog'], ['bull', 'frog'], ['bull', 'hide'], ['bull', 'nose'], ['bull', 'rush'], ['bull', 'shit'], ['bull', 'walker'], ['bulls', 'eye'], ['bum', 'head'], ['bumble', 'bee'], ['bun', 'tin'], ['bunk', 'bed'], ['bunk', 'house'], ['bunk', 'mate'], ['bunny', 'girl'], ['bus', 'driver'], ['bus', 'load'], ['bus', 'stop'], ['bush', 'land'], ['bush', 'master'], ['business', 'woman'], ['butch', 'wax'], ['butt', 'face'], ['butt', 'head'], ['butter', 'ball'], ['butter', 'brickle'], ['butter', 'cream'], ['butter', 'cup'], ['butter', 'fat'], ['butter', 'finger'], ['butter', 'fly'], ['butter', 'milk'], ['butter', 'nut'], ['butter', 'scotch'], ['button', 'bush'], ['button', 'hole'], ['buzz', 'saw'], ['cake', 'pan'], ['cake', 'tin'], ['cake', 'walk'], ['calf', 'skin'], ['camera', 'man'], ['camp', 'fire'], ['camp', 'ground'], ['camp', 'master'], ['camp', 'site'], ['camp', 'town'], ['camper', 'van'], ['candle', 'holder'], ['candle', 'light'], ['candle', 'stick'], ['candy', 'cane'], ['candy', 'floss'], ['candy', 'man'], ['candy', 'stripe'], ['cannon', 'ball'], ['canyon', 'side'], ['cap', 'stone'], ['car', 'bed'], ['car', 'boot'], ['car', 'fare'], ['car', 'load'], ['car', 'park'], ['car', 'pool'], ['car', 'port'], ['car', 'seat'], ['car', 'wash'], ['care', 'giver'], ['care', 'taker'], ['carriage', 'man'], ['carriage', 'way'], ['carrot', 'head'], ['cart', 'horse'], ['cart', 'wheel'], ['case', 'book'], ['case', 'load'], ['case', 'work'], ['case', 'worker'], ['cat', 'bird'], ['cat', 'call'], ['cat', 'door'], ['cat', 'fish'], ['cat', 'nap'], ['cat', 'nip'], ['cat', 'tail'], ['cat', 'woman'], ['cattle', 'man'], ['catty', 'corner'], ['cause', 'way'], ['cavalry', 'man'], ['cave', 'man'], ['cell', 'phone'], ['cement', 'works'], ['center', 'line'], ['center', 'piece'], ['chain', 'saw'], ['chair', 'man'], ['chair', 'person'], ['chair', 'woman'], ['chalk', 'board'], ['chalk', 'box'], ['chamber', 'maid'], ['chap', 'stick'], ['chart', 'room'], ['chat', 'room'], ['chatter', 'box'], ['check', 'book'], ['check', 'list'], ['check', 'mark'], ['check', 'mate'], ['checker', 'board'], ['cheek', 'bone'], ['cheese', 'ball'], ['cheese', 'bit'], ['cheese', 'burger'], ['cheese', 'cake'], ['cheese', 'cloth'], ['cheese', 'pop'], ['cheese', 'steak'], ['cheese', 'stick'], ['cherry', 'picker'], ['cherry', 'pie'], ['chess', 'board'], ['chess', 'master'], ['chess', 'mate'], ['chess', 'way'], ['chest', 'nut'], ['chick', 'pea'], ['chicken', 'hawk'], ['chicken', 'pox'], ['child', 'birth'], ['child', 'care'], ['child', 'minder'], ['chimney', 'pot'], ['chimney', 'stack'], ['chop', 'stick'], ['christmas', 'time'], ['church', 'goer'], ['church', 'man'], ['city', 'scape'], ['class', 'mate'], ['class', 'room'], ['class', 'work'], ['claw', 'foot'], ['clearing', 'house'], ['clergy', 'man'], ['clip', 'board'], ['cloak', 'room'], ['clock', 'work'], ['clod', 'hopper'], ['clothes', 'horse'], ['clothes', 'line'], ['clothes', 'pin'], ['clothes', 'washer'], ['cloud', 'mobile'], ['clown', 'fish'], ['club', 'house'], ['coach', 'man'], ['coach', 'work'], ['coal', 'man'], ['coat', 'hanger'], ['coat', 'tail'], ['cob', 'web'], ['cobble', 'stone'], ['cock', 'chafer'], ['cock', 'horse'], ['cock', 'pit'], ['cock', 'roach'], ['cock', 'sucker'], ['cock', 'tail'], ['cockle', 'bur'], ['cockle', 'shell'], ['cocks', 'comb'], ['code', 'book'], ['coffee', 'cake'], ['coffee', 'house'], ['coffee', 'pot'], ['coffer', 'dam'], ['collar', 'bone'], ['combine', 'harvester'], ['comic', 'book'], ['comic', 'strip'], ['committee', 'man'], ['committee', 'woman'], ['con', 'man'], ['concert', 'master'], ['congress', 'man'], ['congress', 'woman'], ['coon', 'skin'], ['copy', 'book'], ['copy', 'cat'], ['copy', 'right'], ['copy', 'writer'], ['cork', 'screw'], ['corn', 'bread'], ['corn', 'cob'], ['corn', 'dog'], ['corn', 'field'], ['corn', 'flake'], ['corn', 'meal'], ['corn', 'roll'], ['corn', 'row'], ['corn', 'starch'], ['corn', 'top'], ['corner', 'stone'], ['corona', 'virus'], ['corps', 'man'], ['cot', 'bed'], ['cotton', 'mouth'], ['cotton', 'seed'], ['cotton', 'wood'], ['cotton', 'wool'], ['council', 'man'], ['council', 'woman'], ['counter', 'man'], ['counter', 'top'], ['countri', 'man'], ['country', 'man'], ['country', 'side'], ['course', 'load'], ['course', 'work'], ['court', 'house'], ['court', 'room'], ['court', 'yard'], ['cow', 'bell'], ['cow', 'bird'], ['cow', 'boy'], ['cow', 'catcher'], ['cow', 'girl'], ['cow', 'hand'], ['cow', 'hide'], ['cow', 'horse'], ['cow', 'house'], ['cow', 'lick'], ['cow', 'man'], ['cow', 'poke'], ['cow', 'pony'], ['cow', 'puncher'], ['cow', 'stall'], ['crab', 'apple'], ['crab', 'meat'], ['crab', 'stick'], ['crack', 'head'], ['crack', 'house'], ['cracker', 'jack'], ['crafts', 'man'], ['crafts', 'people'], ['crafts', 'person'], ['crank', 'shaft'], ['cray', 'fish'], ['cream', 'puff'], ['cream', 'ware'], ['crew', 'man'], ['crew', 'neck'], ['crock', 'pot'], ['cross', 'bar'], ['cross', 'bone'], ['cross', 'breed'], ['cross', 'bun'], ['crow', 'bait'], ['crow', 'bar'], ['cubby', 'hole'], ['cue', 'ball'], ['cuff', 'link'], ['cup', 'board'], ['cup', 'cake'], ['cup', 'holder'], ['curb', 'side'], ['curve', 'ball'], ['custom', 'house'], ['cutter', 'box'], ['cuttle', 'fish'], ['dagger', 'man'], ['dart', 'board'], ['data', 'base'], ['day', 'bed'], ['day', 'care'], ['day', 'dream'], ['day', 'dreamer'], ['day', 'light'], ['day', 'time'], ['death', 'bed'], ['desk', 'top'], ['dick', 'head'], ['dinner', 'time'], ['dinner', 'ware'], ['dirt', 'bike'], ['dish', 'cloth'], ['dish', 'pan'], ['dish', 'rack'], ['dish', 'rag'], ['dish', 'towel'], ['dish', 'ware'], ['dish', 'washer'], ['dish', 'water'], ['ditch', 'water'], ['dixie', 'land'], ['dock', 'side'], ['dog', 'catcher'], ['dog', 'fight'], ['dog', 'fish'], ['dog', 'house'], ['dog', 'leg'], ['dog', 'sled'], ['dog', 'trot'], ['dog', 'wood'], ['doll', 'house'], ['door', 'bell'], ['door', 'handle'], ['door', 'jamb'], ['door', 'keeper'], ['door', 'knob'], ['door', 'man'], ['door', 'mat'], ['door', 'nail'], ['door', 'post'], ['door', 'step'], ['door', 'stop'], ['door', 'stopper'], ['door', 'way'], ['douche', 'bag'], ['dove', 'cote'], ['drafts', 'man'], ['drafts', 'person'], ['drain', 'board'], ['drain', 'pipe'], ['dread', 'lock'], ['dream', 'scape'], ['dress', 'maker'], ['drift', 'wood'], ['drug', 'store'], ['drum', 'stick'], ['duck', 'bill'], ['duck', 'weed'], ['duct', 'work'], ['dummy', 'head'], ['dump', 'truck'], ['dunder', 'head'], ['dust', 'bin'], ['dust', 'cart'], ['dust', 'cover'], ['dust', 'pan'], ['ear', 'ache'], ['ear', 'bud'], ['ear', 'drop'], ['ear', 'drum'], ['ear', 'lobe'], ['ear', 'muff'], ['ear', 'phone'], ['ear', 'piece'], ['ear', 'plug'], ['ear', 'ring'], ['ear', 'shop'], ['ear', 'shot'], ['ear', 'wax'], ['ear', 'worm'], ['earth', 'quake'], ['earth', 'worm'], ['egg', 'beater'], ['egg', 'box'], ['egg', 'cup'], ['egg', 'head'], ['egg', 'nog'], ['egg', 'plant'], ['egg', 'roll'], ['egg', 'shell'], ['eider', 'down'], ['elder', 'flower'], ['end', 'game'], ['end', 'gate'], ['end', 'point'], ['entrance', 'way'], ['entry', 'way'], ['express', 'way'], ['eye', 'ball'], ['eye', 'book'], ['eye', 'brow'], ['eye', 'drop'], ['eye', 'glass'], ['eye', 'lash'], ['eye', 'lid'], ['eye', 'liner'], ['eye', 'patch'], ['eye', 'piece'], ['eye', 'shadow'], ['eye', 'sight'], ['eye', 'witness'], ['face', 'cloth'], ['face', 'time'], ['fair', 'ground'], ['fair', 'way'], ['fairy', 'godmother'], ['faker', 'face'], ['farm', 'boy'], ['farm', 'hand'], ['farm', 'house'], ['farm', 'land'], ['farm', 'stead'], ['farm', 'work'], ['farm', 'yard'], ['farmer', 'man'], ['fat', 'head'], ['feather', 'bed'], ['feather', 'weight'], ['fence', 'post'], ['ferry', 'boat'], ['fiber', 'board'], ['fiber', 'glass'], ['field', 'day'], ['field', 'mouse'], ['field', 'stone'], ['field', 'trip'], ['field', 'work'], ['fig', 'newton'], ['figure', 'head'], ['film', 'strip'], ['finger', 'nail'], ['finger', 'paint'], ['finger', 'print'], ['finger', 'tip'], ['fire', 'arm'], ['fire', 'ball'], ['fire', 'bell'], ['fire', 'bird'], ['fire', 'boat'], ['fire', 'box'], ['fire', 'brigade'], ['fire', 'bug'], ['fire', 'car'], ['fire', 'cracker'], ['fire', 'extinguisher'], ['fire', 'fighter'], ['fire', 'fly'], ['fire', 'guard'], ['fire', 'hat'], ['fire', 'hose'], ['fire', 'house'], ['fire', 'light'], ['fire', 'man'], ['fire', 'place'], ['fire', 'pole'], ['fire', 'power'], ['fire', 'side'], ['fire', 'stick'], ['fire', 'thrower'], ['fire', 'truck'], ['fire', 'trucker'], ['fire', 'wall'], ['fire', 'woman'], ['fire', 'wood'], ['fire', 'work'], ['fish', 'bone'], ['fish', 'bowl'], ['fish', 'cake'], ['fish', 'head'], ['fish', 'hook'], ['fish', 'monger'], ['fish', 'net'], ['fish', 'pond'], ['fish', 'stick'], ['fish', 'tank'], ['fisher', 'man'], ['fishing', 'pole'], ['fist', 'fight'], ['fixer', 'man'], ['flag', 'pole'], ['flag', 'ship'], ['flag', 'stone'], ['flash', 'gun'], ['flash', 'light'], ['flat', 'ware'], ['flax', 'seed'], ['flea', 'wort'], ['flint', 'stone'], ['flood', 'gate'], ['flood', 'light'], ['flood', 'plain'], ['floor', 'board'], ['floor', 'mat'], ['flow', 'chart'], ['flower', 'bed'], ['flower', 'clip'], ['flower', 'girl'], ['flower', 'head'], ['flower', 'pot'], ['fly', 'catcher'], ['fly', 'paper'], ['fly', 'swatter'], ['fly', 'trap'], ['fog', 'horn'], ['fog', 'light'], ['food', 'stuff'], ['foot', 'ball'], ['foot', 'baller'], ['foot', 'board'], ['foot', 'bridge'], ['foot', 'fall'], ['foot', 'hill'], ['foot', 'man'], ['foot', 'muff'], ['foot', 'note'], ['foot', 'path'], ['foot', 'plate'], ['foot', 'print'], ['foot', 'race'], ['foot', 'rest'], ['foot', 'soldier'], ['foot', 'step'], ['foot', 'stool'], ['foot', 'wear'], ['foot', 'work'], ['foots', 'man'], ['force', 'field'], ['fore', 'brain'], ['fore', 'closure'], ['fore', 'court'], ['fore', 'father'], ['fore', 'finger'], ['fore', 'front'], ['fore', 'ground'], ['fore', 'head'], ['fore', 'leg'], ['fore', 'man'], ['fore', 'part'], ['fore', 'play'], ['fore', 'runner'], ['fork', 'lift'], ['fountain', 'head'], ['fox', 'glove'], ['fox', 'hole'], ['fox', 'hound'], ['frame', 'work'], ['french', 'toast'], ['frog', 'man'], ['fromage', 'frais'], ['front', 'line'], ['frontiers', 'man'], ['fruit', 'cake'], ['fun', 'fair'], ['fun', 'house'], ['fund', 'raiser'], ['fur', 'ball'], ['fuse', 'box'], ['fuss', 'bucket'], ['fuss', 'pot'], ['gall', 'bladder'], ['gall', 'stone'], ['game', 'bird'], ['game', 'board'], ['game', 'boy'], ['game', 'room'], ['gang', 'buster'], ['gang', 'land'], ['gang', 'plank'], ['gang', 'way'], ['garbage', 'man'], ['gas', 'light'], ['gate', 'post'], ['gate', 'way'], ['gear', 'box'], ['gear', 'stick'], ['ghost', 'buster'], ['gift', 'ware'], ['gin', 'mill'], ['ginger', 'bread'], ['ginger', 'man'], ['gingerbread', 'man'], ['girl', 'friend'], ['glass', 'ware'], ['globe', 'trotter'], ['glow', 'stick'], ['goal', 'keeper'], ['goal', 'post'], ['god', 'brother'], ['god', 'child'], ['god', 'children'], ['god', 'daughter'], ['god', 'father'], ['god', 'head'], ['god', 'mother'], ['god', 'parent'], ['god', 'sister'], ['god', 'son'], ['gold', 'digger'], ['gold', 'mine'], ['gold', 'smith'], ['goof', 'ball'], ['goose', 'berry'], ['goose', 'bump'], ['goose', 'neck'], ['goose', 'pimple'], ['goose', 'step'], ['grape', 'fruit'], ['grape', 'vine'], ['grass', 'cutter'], ['grass', 'hopper'], ['grass', 'land'], ['grass', 'roots'], ['grave', 'side'], ['grave', 'stone'], ['grave', 'yard'], ['grid', 'iron'], ['grill', 'work'], ['grille', 'work'], ['grist', 'mill'], ['grocery', 'man'], ['grooms', 'man'], ['ground', 'hog'], ['ground', 'water'], ['ground', 'wave'], ['ground', 'work'], ['grounds', 'keeper'], ['grumble', 'head'], ['guard', 'house'], ['guard', 'man'], ['guards', 'man'], ['guest', 'room'], ['guide', 'book'], ['guide', 'line'], ['gum', 'ball'], ['gum', 'drop'], ['gun', 'barrel'], ['gun', 'belt'], ['gun', 'fighter'], ['gun', 'fire'], ['gun', 'flint'], ['gun', 'man'], ['gun', 'play'], ['gun', 'point'], ['gun', 'powder'], ['gun', 'shot'], ['gun', 'slinger'], ['gun', 'smoke'], ['gunny', 'sack'], ['gutter', 'ball'], ['hag', 'fish'], ['hail', 'stone'], ['hail', 'storm'], ['hair', 'ball'], ['hair', 'band'], ['hair', 'bow'], ['hair', 'brush'], ['hair', 'clip'], ['hair', 'dresser'], ['hair', 'dryer'], ['hair', 'line'], ['hair', 'net'], ['hair', 'piece'], ['hair', 'pin'], ['hair', 'shirt'], ['hair', 'slide'], ['hair', 'spray'], ['hair', 'style'], ['hair', 'wash'], ['hall', 'way'], ['hammer', 'head'], ['hand', 'bag'], ['hand', 'ball'], ['hand', 'book'], ['hand', 'brake'], ['hand', 'clasp'], ['hand', 'cloth'], ['hand', 'craft'], ['hand', 'cuff'], ['hand', 'grip'], ['hand', 'gun'], ['hand', 'kerchief'], ['hand', 'maid'], ['hand', 'maiden'], ['hand', 'phone'], ['hand', 'print'], ['hand', 'puppet'], ['hand', 'rail'], ['hand', 'saw'], ['hand', 'set'], ['handle', 'bar'], ['hard', 'cover'], ['hard', 'hat'], ['hat', 'band'], ['hat', 'box'], ['hatch', 'back'], ['hatch', 'way'], ['hatchet', 'man'], ['hay', 'bale'], ['hay', 'fever'], ['hay', 'loft'], ['hay', 'ride'], ['hay', 'shed'], ['hay', 'stack'], ['hazel', 'nut'], ['head', 'ache'], ['head', 'band'], ['head', 'board'], ['head', 'count'], ['head', 'dress'], ['head', 'gear'], ['head', 'lamp'], ['head', 'land'], ['head', 'light'], ['head', 'line'], ['head', 'master'], ['head', 'mistress'], ['head', 'phone'], ['head', 'piece'], ['head', 'room'], ['head', 'scarf'], ['head', 'set'], ['head', 'start'], ['head', 'stone'], ['head', 'way'], ['heads', 'man'], ['health', 'care'], ['heart', 'ache'], ['heart', 'beat'], ['hedge', 'hog'], ['hedge', 'row'], ['heir', 'loom'], ['hell', 'fire'], ['hell', 'hole'], ['helms', 'man'], ['help', 'line'], ['hen', 'house'], ['hench', 'man'], ['herring', 'bone'], ['hill', 'billy'], ['hill', 'side'], ['hill', 'top'], ['hind', 'brain'], ['hind', 'quarters'], ['hind', 'sight'], ['hobby', 'horse'], ['hog', 'wash'], ['home', 'body'], ['home', 'box'], ['home', 'boy'], ['home', 'front'], ['home', 'fry'], ['home', 'land'], ['home', 'maker'], ['home', 'owner'], ['home', 'page'], ['home', 'room'], ['home', 'run'], ['home', 'stead'], ['home', 'town'], ['home', 'work'], ['home', 'world'], ['honey', 'bag'], ['honey', 'bee'], ['honey', 'bun'], ['honey', 'cake'], ['honey', 'comb'], ['honey', 'dew'], ['honey', 'moon'], ['hook', 'worm'], ['horn', 'bill'], ['horse', 'back'], ['horse', 'flesh'], ['horse', 'fly'], ['horse', 'hair'], ['horse', 'man'], ['horse', 'meat'], ['horse', 'play'], ['horse', 'power'], ['horse', 'radish'], ['horse', 'shoe'], ['horse', 'woman'], ['horsie', 'man'], ['hose', 'pipe'], ['hour', 'glass'], ['house', 'boat'], ['house', 'boy'], ['house', 'cat'], ['house', 'coat'], ['house', 'dress'], ['house', 'fly'], ['house', 'hold'], ['house', 'holder'], ['house', 'keeper'], ['house', 'maid'], ['house', 'man'], ['house', 'mate'], ['house', 'paint'], ['house', 'plant'], ['house', 'ware'], ['house', 'work'], ['hub', 'cap'], ['huckle', 'berry'], ['human', 'kind'], ['hump', 'back'], ['hunch', 'back'], ['hunts', 'man'], ['hydro', 'pump'], ['hymn', 'book'], ['ice', 'berg'], ['ice', 'box'], ['ice', 'breaker'], ['ice', 'cap'], ['ice', 'cracker'], ['ice', 'cream'], ['ice', 'cube'], ['ice', 'pick'], ['ice', 'pop'], ['ice', 'skating'], ['inch', 'worm'], ['infantry', 'man'], ['ink', 'blot'], ['inn', 'keeper'], ['inner', 'tube'], ['iron', 'wood'], ['ironing', 'board'], ['jack', 'ass'], ['jack', 'fruit'], ['jack', 'hammer'], ['jack', 'knife'], ['jack', 'pot'], ['jack', 'rabbit'], ['jail', 'house'], ['jammy', 'dodger'], ['jaw', 'bone'], ['jaw', 'breaker'], ['jaw', 'line'], ['jay', 'bird'], ['jazz', 'man'], ['jelly', 'bean'], ['jelly', 'belly'], ['jelly', 'fish'], ['jet', 'pack'], ['jet', 'ski'], ['jig', 'saw'], ['jingle', 'bell'], ['jitter', 'bug'], ['joy', 'ride'], ['joy', 'stick'], ['juice', 'box'], ['juke', 'box'], ['jump', 'rope'], ['jungle', 'gym'], ['junk', 'man'], ['junk', 'yard'], ['key', 'board'], ['key', 'chain'], ['key', 'hole'], ['key', 'note'], ['key', 'pad'], ['key', 'ring'], ['key', 'stone'], ['key', 'stroke'], ['key', 'word'], ['kin', 'folk'], ['king', 'bird'], ['king', 'fisher'], ['king', 'pin'], ['knee', 'board'], ['knee', 'cap'], ['knee', 'hole'], ['knee', 'pad'], ['knick', 'knack'], ['knuckle', 'ball'], ['knuckle', 'head'], ['lady', 'bird'], ['lady', 'bug'], ['lake', 'front'], ['lake', 'side'], ['lamb', 'chop'], ['lamp', 'light'], ['lamp', 'post'], ['lamp', 'shade'], ['land', 'fill'], ['land', 'form'], ['land', 'lady'], ['land', 'line'], ['land', 'lord'], ['land', 'lubber'], ['land', 'mark'], ['land', 'mass'], ['land', 'mine'], ['land', 'owner'], ['land', 'rover'], ['land', 'scape'], ['lap', 'top'], ['lark', 'spur'], ['latch', 'key'], ['lattice', 'work'], ['launch', 'pad'], ['law', 'breaker'], ['law', 'maker'], ['law', 'man'], ['law', 'suit'], ['lawn', 'mower'], ['lay', 'man'], ['leads', 'man'], ['leaf', 'hopper'], ['lee', 'way'], ['leg', 'work'], ['letter', 'box'], ['letter', 'head'], ['life', 'belt'], ['life', 'blood'], ['life', 'boat'], ['life', 'form'], ['life', 'guard'], ['life', 'jacket'], ['life', 'line'], ['life', 'preserver'], ['life', 'raft'], ['life', 'saver'], ['life', 'span'], ['life', 'style'], ['life', 'time'], ['life', 'vest'], ['light', 'bulb'], ['light', 'house'], ['light', 'saber'], ['light', 'ship'], ['light', 'switch'], ['light', 'year'], ['lily', 'pad'], ['lime', 'light'], ['lime', 'stone'], ['line', 'back'], ['line', 'backer'], ['line', 'man'], ['lip', 'stick'], ['litter', 'bin'], ['litter', 'bug'], ['lock', 'smith'], ['log', 'jam'], ['logger', 'head'], ['loop', 'hole'], ['lorry', 'bus'], ['lounge', 'room'], ['love', 'bug'], ['love', 'seat'], ['lumber', 'jack'], ['lumber', 'man'], ['lumber', 'yard'], ['lunch', 'box'], ['lunch', 'meat'], ['lunch', 'room'], ['lunch', 'time'], ['mail', 'bag'], ['mail', 'box'], ['mail', 'lady'], ['mail', 'man'], ['mail', 'person'], ['mail', 'room'], ['mail', 'truck'], ['mail', 'woman'], ['man', 'child'], ['man', 'hole'], ['man', 'hunt'], ['man', 'kind'], ['man', 'power'], ['man', 'servant'], ['man', 'slaughter'], ['man', 'trap'], ['mantel', 'piece'], ['mantle', 'piece'], ['market', 'place'], ['marks', 'man'], ['marsh', 'mallow'], ['master', 'piece'], ['match', 'box'], ['match', 'maker'], ['match', 'stick'], ['may', 'pole'], ['meadow', 'lark'], ['meal', 'time'], ['meal', 'worm'], ['meat', 'ball'], ['meat', 'eater'], ['meat', 'head'], ['meat', 'loaf'], ['mer', 'man'], ['mer', 'people'], ['mer', 'person'], ['mess', 'pot'], ['metal', 'smith'], ['metal', 'work'], ['middle', 'man'], ['midship', 'man'], ['mile', 'stone'], ['milk', 'box'], ['milk', 'man'], ['milk', 'shake'], ['milk', 'weed'], ['mill', 'stone'], ['milque', 'toast'], ['mind', 'reader'], ['mind', 'set'], ['mine', 'sweeper'], ['mold', 'board'], ['mole', 'hill'], ['mole', 'skin'], ['monkey', 'bar'], ['moon', 'beam'], ['moon', 'cake'], ['moon', 'light'], ['moon', 'scape'], ['morning', 'time'], ['mother', 'board'], ['mother', 'fucker'], ['mother', 'land'], ['motor', 'bike'], ['motor', 'boat'], ['motor', 'boatist'], ['motor', 'car'], ['motor', 'cycle'], ['motor', 'home'], ['motor', 'scooter'], ['motor', 'way'], ['mountain', 'side'], ['mountain', 'top'], ['mouse', 'pad'], ['mouse', 'trap'], ['mouth', 'piece'], ['mud', 'flap'], ['mud', 'guard'], ['muscle', 'man'], ['mush', 'ball'], ['music', 'box'], ['music', 'maker'], ['musk', 'rat'], ['name', 'sake'], ['name', 'tag'], ['nap', 'time'], ['nappie', 'time'], ['neck', 'lace'], ['neck', 'line'], ['neck', 'piece'], ['neck', 'tie'], ['needle', 'nose'], ['needle', 'point'], ['needle', 'work'], ['net', 'ball'], ['net', 'work'], ['news', 'agent'], ['news', 'boy'], ['news', 'cast'], ['news', 'caster'], ['news', 'letter'], ['news', 'man'], ['news', 'paper'], ['news', 'print'], ['news', 'reel'], ['news', 'stand'], ['news', 'time'], ['newspaper', 'man'], ['nick', 'name'], ['night', 'cap'], ['night', 'clothes'], ['night', 'club'], ['night', 'crawler'], ['night', 'dress'], ['night', 'gown'], ['night', 'life'], ['night', 'light'], ['night', 'mare'], ['night', 'shirt'], ['night', 'stand'], ['night', 'stick'], ['night', 'time'], ['noise', 'maker'], ['noon', 'time'], ['north', 'east'], ['north', 'west'], ['nose', 'bag'], ['note', 'book'], ['note', 'card'], ['note', 'pad'], ['nut', 'cake'], ['nut', 'case'], ['nut', 'cracker'], ['nut', 'hatch'], ['nut', 'house'], ['nut', 'job'], ['nut', 'shell'], ['oat', 'meal'], ['ocean', 'front'], ['ocean', 'side'], ['oil', 'can'], ['oil', 'cloth'], ['oil', 'seed'], ['olive', 'wood'], ['outdoors', 'man'], ['ox', 'bow'], ['ox', 'cart'], ['ox', 'tail'], ['pace', 'maker'], ['pack', 'rat'], ['pad', 'lock'], ['paddle', 'ball'], ['paddle', 'board'], ['page', 'boy'], ['pain', 'killer'], ['paint', 'ball'], ['paint', 'brush'], ['paint', 'work'], ['pall', 'bearer'], ['pan', 'cake'], ['pan', 'handle'], ['pan', 'pipe'], ['pant', 'leg'], ['pant', 'suit'], ['pantry', 'man'], ['panty', 'hose'], ['paper', 'back'], ['paper', 'boy'], ['paper', 'clip'], ['paper', 'cut'], ['paper', 'man'], ['paper', 'weight'], ['paper', 'work'], ['park', 'way'], ['parrot', 'fish'], ['pass', 'word'], ['passage', 'way'], ['passion', 'fruit'], ['patch', 'work'], ['path', 'way'], ['patrol', 'man'], ['paw', 'print'], ['pay', 'check'], ['pay', 'day'], ['pay', 'loader'], ['pay', 'master'], ['pay', 'roll'], ['pea', 'cock'], ['pea', 'fowl'], ['pea', 'hen'], ['peace', 'maker'], ['peace', 'time'], ['pearl', 'ware'], ['pee', 'pot'], ['peep', 'hole'], ['peg', 'board'], ['pen', 'friend'], ['pen', 'knife'], ['pen', 'man'], ['pen', 'pal'], ['pen', 'stock'], ['pepper', 'corn'], ['pepper', 'mill'], ['pepper', 'mint'], ['pet', 'shop'], ['phone', 'book'], ['phone', 'box'], ['phone', 'call'], ['phone', 'man'], ['pick', 'ax'], ['pick', 'axe'], ['pickle', 'ball'], ['picture', 'book'], ['pie', 'dish'], ['pie', 'man'], ['piece', 'work'], ['pig', 'pen'], ['pig', 'skin'], ['pig', 'stall'], ['pig', 'sty'], ['pig', 'swill'], ['pig', 'tail'], ['pigeon', 'hole'], ['piggie', 'back'], ['piggy', 'back'], ['piggy', 'bank'], ['pigs', 'eye'], ['pillar', 'box'], ['pillow', 'case'], ['pin', 'ball'], ['pin', 'cushion'], ['pin', 'head'], ['pin', 'point'], ['pin', 'stripe'], ['pin', 'wheel'], ['pine', 'apple'], ['pine', 'cone'], ['pipe', 'line'], ['pit', 'bull'], ['pit', 'viper'], ['pitch', 'fork'], ['pixie', 'pie'], ['pizza', 'maker'], ['place', 'holder'], ['place', 'mat'], ['plane', 'load'], ['plaster', 'board'], ['plaster', 'man'], ['plastic', 'ware'], ['play', 'house'], ['play', 'wright'], ['plot', 'line'], ['plough', 'man'], ['plug', 'hole'], ['pocket', 'book'], ['pocket', 'knife'], ['pod', 'cast'], ['pole', 'cat'], ['police', 'man'], ['police', 'woman'], ['pony', 'tail'], ['poop', 'head'], ['pop', 'tart'], ['pop', 'star'], ['poppy', 'seed'], ['pork', 'belly'], ['pork', 'chop'], ['port', 'hole'], ['posse', 'man'], ['post', 'bag'], ['post', 'box'], ['post', 'card'], ['post', 'code'], ['post', 'man'], ['post', 'master'], ['post', 'office'], ['post', 'woman'], ['poster', 'board'], ['pot', 'boiler'], ['pot', 'holder'], ['pot', 'hole'], ['pot', 'luck'], ['pot', 'pie'], ['powder', 'puff'], ['power', 'bike'], ['power', 'house'], ['power', 'point'], ['power', 'saw'], ['prong', 'horn'], ['puff', 'ball'], ['puffer', 'fish'], ['punch', 'ball'], ['puppet', 'master'], ['pussy', 'cat'], ['pussy', 'willow'], ['quarry', 'man'], ['quarter', 'back'], ['quarter', 'master'], ['quay', 'side'], ['rabbit', 'man'], ['race', 'car'], ['race', 'horse'], ['race', 'track'], ['race', 'way'], ['racquet', 'ball'], ['radio', 'man'], ['rag', 'doll'], ['rag', 'time'], ['rail', 'head'], ['rail', 'road'], ['rail', 'roader'], ['rail', 'way'], ['rain', 'bow'], ['rain', 'cloud'], ['rain', 'coat'], ['rain', 'dance'], ['rain', 'drop'], ['rain', 'forest'], ['rain', 'maker'], ['rain', 'stick'], ['rain', 'storm'], ['rat', 'tail'], ['razor', 'back'], ['razor', 'blade'], ['rhine', 'stone'], ['rib', 'cage'], ['rifle', 'man'], ['ring', 'bill'], ['ring', 'leader'], ['ring', 'maker'], ['ring', 'master'], ['ring', 'neck'], ['ring', 'side'], ['river', 'bank'], ['river', 'bed'], ['river', 'bend'], ['river', 'boat'], ['river', 'side'], ['road', 'grader'], ['road', 'bed'], ['road', 'hog'], ['road', 'house'], ['road', 'rage'], ['road', 'runner'], ['road', 'side'], ['road', 'way'], ['road', 'work'], ['rock', 'star'], ['rocket', 'ship'], ['roller', 'blade'], ['roller', 'blader'], ['roller', 'coaster'], ['roller', 'skate'], ['roof', 'line'], ['roof', 'rack'], ['roof', 'top'], ['roof', 'tree'], ['room', 'mate'], ['root', 'beer'], ['rose', 'bud'], ['rose', 'bush'], ['row', 'house'], ['rubber', 'band'], ['saber', 'tooth'], ['sack', 'cloth'], ['sage', 'brush'], ['sail', 'boat'], ['sail', 'fish'], ['sales', 'man'], ['sales', 'people'], ['sales', 'person'], ['sales', 'woman'], ['salt', 'bush'], ['salt', 'fish'], ['salt', 'shaker'], ['salt', 'water'], ['sand', 'bag'], ['sand', 'bank'], ['sand', 'bar'], ['sand', 'box'], ['sand', 'bucket'], ['sand', 'castle'], ['sand', 'lot'], ['sand', 'lotter'], ['sand', 'man'], ['sand', 'paper'], ['sand', 'pile'], ['sand', 'piper'], ['sand', 'pit'], ['sand', 'stone'], ['sandal', 'wood'], ['sauce', 'pan'], ['sauce', 'pot'], ['saw', 'dust'], ['saw', 'horse'], ['saw', 'mill'], ['saw', 'timber'], ['saw', 'tooth'], ['scape', 'goat'], ['school', 'bag'], ['school', 'book'], ['school', 'boy'], ['school', 'bus'], ['school', 'day'], ['school', 'girl'], ['school', 'house'], ['school', 'keeper'], ['school', 'marm'], ['school', 'master'], ['school', 'mate'], ['school', 'night'], ['school', 'teacher'], ['school', 'time'], ['school', 'work'], ['school', 'yard'], ['score', 'board'], ['score', 'card'], ['scouffle', 'bug'], ['scrap', 'book'], ['scrap', 'yard'], ['scratch', 'card'], ['screen', 'land'], ['screen', 'play'], ['screen', 'saver'], ['screen', 'writer'], ['screw', 'driver'], ['scum', 'bag'], ['scum', 'gutter'], ['scuttle', 'bug'], ['sea', 'board'], ['sea', 'food'], ['sea', 'front'], ['sea', 'gull'], ['sea', 'horse'], ['sea', 'lion'], ['sea', 'man'], ['sea', 'plane'], ['sea', 'port'], ['sea', 'shell'], ['sea', 'shore'], ['sea', 'side'], ['sea', 'way'], ['sea', 'weed'], ['seat', 'belt'], ['seed', 'pod'], ['service', 'man'], ['share', 'holder'], ['shark', 'skin'], ['sheep', 'dog'], ['sheep', 'herder'], ['sheep', 'skin'], ['sheet', 'rock'], ['shell', 'fish'], ['shin', 'bone'], ['ship', 'board'], ['ship', 'mate'], ['ship', 'wreck'], ['ship', 'yard'], ['shirt', 'dress'], ['shirt', 'front'], ['shirt', 'sleeve'], ['shirt', 'tail'], ['shit', 'fit'], ['shit', 'load'], ['shoe', 'box'], ['shoe', 'horn'], ['shoe', 'lace'], ['shoe', 'maker'], ['shoe', 'string'], ['shop', 'keeper'], ['shore', 'bird'], ['shore', 'line'], ['short', 'bread'], ['show', 'biz'], ['show', 'man'], ['show', 'time'], ['shower', 'head'], ['shuttle', 'cock'], ['side', 'board'], ['side', 'door'], ['side', 'light'], ['side', 'line'], ['side', 'man'], ['side', 'show'], ['side', 'table'], ['side', 'wall'], ['side', 'winder'], ['sight', 'seer'], ['sign', 'board'], ['sign', 'language'], ['sign', 'post'], ['signal', 'man'], ['silver', 'ware'], ['sketch', 'book'], ['skill', 'set'], ['skin', 'head'], ['skull', 'cap'], ['sky', 'lark'], ['sky', 'light'], ['sky', 'line'], ['sky', 'rocket'], ['sky', 'scraper'], ['sky', 'wave'], ['sky', 'way'], ['sledge', 'hammer'], ['sleep', 'suit'], ['sleep', 'time'], ['sleepy', 'head'], ['sleepy', 'time'], ['slide', 'show'], ['slime', 'ball'], ['sling', 'shot'], ['smart', 'alec'], ['smoke', 'house'], ['smoke', 'screen'], ['smoke', 'stack'], ['snack', 'time'], ['snaggle', 'tooth'], ['snap', 'shot'], ['snot', 'face'], ['snow', 'ball'], ['snow', 'bank'], ['snow', 'bird'], ['snow', 'blower'], ['snow', 'board'], ['snow', 'boot'], ['snow', 'boy'], ['snow', 'cone'], ['snow', 'drift'], ['snow', 'drop'], ['snow', 'flake'], ['snow', 'globe'], ['snow', 'lady'], ['snow', 'man'], ['snow', 'mobile'], ['snow', 'pack'], ['snow', 'pile'], ['snow', 'plow'], ['snow', 'shoe'], ['snow', 'storm'], ['snow', 'suit'], ['soap', 'box'], ['soap', 'sud'], ['sock', 'eye'], ['sofa', 'bed'], ['song', 'bag'], ['song', 'bird'], ['song', 'book'], ['song', 'writer'], ['sooth', 'sayer'], ['soul', 'mate'], ['sound', 'board'], ['sound', 'scape'], ['sound', 'track'], ['south', 'east'], ['south', 'paw'], ['south', 'west'], ['sow', 'belly'], ['space', 'craft'], ['space', 'man'], ['space', 'ship'], ['space', 'suit'], ['speaker', 'phone'], ['spear', 'man'], ['spear', 'mint'], ['speed', 'boat'], ['speed', 'way'], ['spell', 'book'], ['spider', 'web'], ['spit', 'ball'], ['spokes', 'people'], ['spokes', 'person'], ['sports', 'car'], ['sports', 'cast'], ['sports', 'caster'], ['sports', 'man'], ['sports', 'writer'], ['spot', 'light'], ['spring', 'time'], ['stable', 'man'], ['stage', 'coach'], ['stage', 'craft'], ['stair', 'case'], ['stair', 'way'], ['stair', 'well'], ['stake', 'holder'], ['star', 'dust'], ['star', 'fruit'], ['star', 'light'], ['star', 'ship'], ['state', 'room'], ['states', 'man'], ['station', 'master'], ['steak', 'house'], ['steam', 'boat'], ['steam', 'roller'], ['steam', 'ship'], ['steel', 'maker'], ['steel', 'worker'], ['stick', 'ball'], ['stick', 'man'], ['stick', 'pin'], ['stickle', 'back'], ['sting', 'ray'], ['stock', 'broker'], ['stock', 'holder'], ['stock', 'pile'], ['stock', 'room'], ['stomach', 'ache'], ['stone', 'work'], ['store', 'front'], ['store', 'house'], ['store', 'keeper'], ['store', 'man'], ['store', 'room'], ['storm', 'cloud'], ['storm', 'trooper'], ['story', 'board'], ['story', 'book'], ['story', 'line'], ['story', 'time'], ['stove', 'top'], ['strait', 'jacket'], ['straw', 'berry'], ['stream', 'side'], ['street', 'car'], ['street', 'light'], ['style', 'mark'], ['sucker', 'fish'], ['sugar', 'baby'], ['sugar', 'bowl'], ['sugar', 'pie'], ['sugar', 'plum'], ['suit', 'case'], ['suit', 'coat'], ['summer', 'time'], ['sun', 'bath'], ['sun', 'bed'], ['sun', 'bonnet'], ['sun', 'burn'], ['sun', 'burst'], ['sun', 'dial'], ['sun', 'dress'], ['sun', 'fish'], ['sun', 'flower'], ['sun', 'frock'], ['sun', 'glass'], ['sun', 'hat'], ['sun', 'light'], ['sun', 'lounger'], ['sun', 'ray'], ['sun', 'roof'], ['sun', 'room'], ['sun', 'screen'], ['sun', 'shade'], ['sun', 'shield'], ['sun', 'spot'], ['sun', 'suit'], ['sun', 'tan'], ['supper', 'time'], ['sweat', 'band'], ['sweat', 'pants'], ['sweat', 'shirt'], ['sweat', 'shop'], ['sweat', 'suit'], ['sweep', 'stakes'], ['sweet', 'shop'], ['swing', 'set'], ['switch', 'board'], ['swords', 'man'], ['table', 'cloth'], ['table', 'land'], ['table', 'spoon'], ['table', 'top'], ['tail', 'back'], ['tail', 'fin'], ['tail', 'gunner'], ['tail', 'light'], ['tail', 'pipe'], ['tail', 'race'], ['tape', 'worm'], ['tar', 'paper'], ['tar', 'pit'], ['task', 'master'], ['tax', 'payer'], ['taxi', 'cab'], ['taxi', 'way'], ['tea', 'bag'], ['tea', 'cake'], ['tea', 'cart'], ['tea', 'cup'], ['tea', 'house'], ['tea', 'kettle'], ['tea', 'pot'], ['tea', 'set'], ['tea', 'spoon'], ['tea', 'time'], ['tea', 'towel'], ['teak', 'wood'], ['team', 'mate'], ['team', 'work'], ['tear', 'drop'], ['tee', 'ball'], ['tee', 'totaler'], ['teeny', 'bopper'], ['teeth', 'mark'], ['teethy', 'peg'], ['terry', 'cloth'], ['tether', 'ball'], ['text', 'book'], ['theater', 'goer'], ['thorn', 'bush'], ['thumb', 'nail'], ['thumb', 'print'], ['thumb', 'tack'], ['thunder', 'bird'], ['thunder', 'bolt'], ['thunder', 'stick'], ['thunder', 'storm'], ['tide', 'water'], ['time', 'frame'], ['time', 'line'], ['time', 'piece'], ['time', 'sheet'], ['time', 'table'], ['tin', 'man'], ['tin', 'plate'], ['tinker', 'toy'], ['tippy', 'toe'], ['tit', 'mouse'], ['toad', 'stool'], ['toe', 'nail'], ['toll', 'booth'], ['toll', 'gate'], ['toll', 'house'], ['toll', 'way'], ['tom', 'boy'], ['tom', 'cat'], ['tom', 'foolery'], ['tomb', 'stone'], ['tool', 'bag'], ['tool', 'belt'], ['tool', 'box'], ['tool', 'kit'], ['tool', 'maker'], ['tool', 'shed'], ['tooth', 'ache'], ['tooth', 'brush'], ['tooth', 'fairy'], ['tooth', 'paste'], ['tooth', 'pick'], ['tooth', 'puller'], ['toothie', 'peg'], ['top', 'coat'], ['top', 'soil'], ['tortoise', 'shell'], ['tow', 'head'], ['town', 'home'], ['town', 'house'], ['towns', 'man'], ['towns', 'people'], ['toy', 'bin'], ['toy', 'box'], ['toy', 'land'], ['toy', 'maker'], ['toy', 'shop'], ['trade', 'mark'], ['trade', 'wind'], ['trades', 'man'], ['trail', 'head'], ['train', 'man'], ['train', 'set'], ['train', 'track'], ['tram', 'way'], ['trash', 'can'], ['trash', 'man'], ['tree', 'house'], ['tree', 'top'], ['trench', 'coat'], ['trencher', 'man'], ['tribes', 'man'], ['trigger', 'fish'], ['troop', 'ship'], ['trouble', 'maker'], ['trouble', 'shooter'], ['truck', 'load'], ['tumble', 'dryer'], ['tummy', 'ache'], ['tuna', 'fish'], ['turtle', 'neck'], ['twinkle', 'toe'], ['type', 'script'], ['type', 'writer'], ['user', 'name'], ['video', 'camera'], ['video', 'game'], ['video', 'tape'], ['video', 'taper'], ['view', 'finder'], ['view', 'point'], ['vine', 'yard'], ['viper', 'fish'], ['voice', 'box'], ['voice', 'mail'], ['volley', 'ball'], ['waist', 'band'], ['waist', 'coat'], ['waist', 'line'], ['wake', 'board'], ['wall', 'board'], ['wall', 'flower'], ['wall', 'paper'], ['war', 'fare'], ['war', 'front'], ['war', 'head'], ['war', 'monger'], ['war', 'time'], ['ward', 'room'], ['ware', 'house'], ['wart', 'hog'], ['wash', 'bag'], ['wash', 'line'], ['waste', 'basket'], ['waste', 'land'], ['waste', 'paper'], ['watch', 'maker'], ['watch', 'smith'], ['watch', 'tower'], ['water', 'bed'], ['water', 'bird'], ['water', 'color'], ['water', 'cress'], ['water', 'fowl'], ['water', 'front'], ['water', 'hole'], ['water', 'lily'], ['water', 'line'], ['water', 'melon'], ['water', 'mill'], ['water', 'park'], ['water', 'pump'], ['water', 'shed'], ['water', 'side'], ['water', 'ski'], ['water', 'slide'], ['water', 'spout'], ['water', 'tap'], ['water', 'way'], ['water', 'wheel'], ['water', 'witch'], ['water', 'works'], ['wave', 'length'], ['way', 'point'], ['way', 'side'], ['weather', 'man'], ['weather', 'strip'], ['weather', 'vane'], ['web', 'cam'], ['web', 'page'], ['web', 'site'], ['weed', 'eater'], ['week', 'day'], ['week', 'end'], ['weight', 'lifter'], ['wet', 'land'], ['wheel', 'barrow'], ['wheel', 'chair'], ['wheel', 'house'], ['whip', 'saw'], ['whipper', 'snapper'], ['whirly', 'bird'], ['whole', 'saler'], ['wife', 'beater'], ['wiffle', 'ball'], ['will', 'power'], ['wind', 'bag'], ['wind', 'breaker'], ['wind', 'mill'], ['wind', 'pipe'], ['wind', 'screen'], ['wind', 'shield'], ['wind', 'sock'], ['wind', 'storm'], ['window', 'pane'], ['window', 'sill'], ['wine', 'glass'], ['wine', 'head'], ['wing', 'back'], ['wing', 'man'], ['wing', 'span'], ['wing', 'tip'], ['winter', 'time'], ['wish', 'bone'], ['witch', 'craft'], ['witch', 'doctor'], ['wolf', 'hound'], ['wonder', 'land'], ['wood', 'carver'], ['wood', 'chuck'], ['wood', 'cock'], ['wood', 'cutter'], ['wood', 'land'], ['wood', 'louse'], ['wood', 'man'], ['wood', 'pecker'], ['wood', 'shed'], ['wood', 'shop'], ['wood', 'smoke'], ['wood', 'wind'], ['wood', 'work'], ['wood', 'worker'], ['wood', 'worm'], ['woods', 'man'], ['work', 'bench'], ['work', 'book'], ['work', 'day'], ['work', 'flow'], ['work', 'force'], ['work', 'horse'], ['work', 'house'], ['work', 'lady'], ['work', 'load'], ['work', 'man'], ['work', 'mate'], ['work', 'piece'], ['work', 'place'], ['work', 'room'], ['work', 'sheet'], ['work', 'shop'], ['work', 'space'], ['work', 'station'], ['world', 'view'], ['wrist', 'band'], ['wrist', 'watch'], ['wrong', 'doer'], ['yachts', 'man'], ['yard', 'stick'], ['yard', 'work'], ['year', 'book'], ['zebra', 'fish'], ['zip', 'line'], ['zoo', 'keeper'], ['broad', 'side'], ['closed', 'toe'], ['common', 'place'], ['dumb', 'ass'], ['four', 'star'], ['fresh', 'water'], ['hard', 'back'], ['hard', 'core'], ['heavy', 'weight'], ['high', 'brow'], ['left', 'hand'], ['long', 'neck'], ['long', 'time'], ['middle', 'class'], ['one', 'way'], ['part', 'time'], ['plain', 'clothes'], ['real', 'time'], ['rear', 'view'], ['right', 'hand'], ['round', 'neck'], ['second', 'hand'], ['short', 'sleeve'], ['soft', 'core'], ['white', 'water'], ['whole', 'sale'], ['worth', 'while'], ['bald', 'headed'], ['best', 'selling'], ['big', 'headed'], ['bob', 'tailed'], ['broken', 'hearted'], ['cold', 'hearted'], ['cool', 'headed'], ['down', 'cast'], ['down', 'talking'], ['down', 'trodden'], ['dumb', 'struck'], ['easy', 'going'], ['ever', 'lasting'], ['far', 'sighted'], ['first', 'born'], ['fore', 'boding'], ['fore', 'known'], ['fore', 'seen'], ['forth', 'coming'], ['fresh', 'born'], ['googoo', 'eyed'], ['half', 'hearted'], ['hard', 'boiled'], ['hard', 'headed'], ['hard', 'working'], ['heavy', 'set'], ['in', 'born'], ['in', 'coming'], ['in', 'dwelling'], ['in', 'grown'], ['in', 'laid'], ['light', 'headed'], ['light', 'hearted'], ['merry', 'making'], ['near', 'sighted'], ['never', 'ending'], ['new', 'born'], ['new', 'found'], ['north', 'bound'], ['on', 'coming'], ['on', 'going'], ['on', 'rushing'], ['out', 'going'], ['out', 'lying'], ['out', 'spoken'], ['out', 'standing'], ['out', 'worn'], ['pure', 'bred'], ['red', 'headed'], ['rough', 'shod'], ['scatter', 'brained'], ['short', 'sighted'], ['soft', 'boiled'], ['soft', 'hearted'], ['south', 'bound'], ['standard', 'bred'], ['still', 'born'], ['thorough', 'bred'], ['thorough', 'going'], ['up', 'standing'], ['whole', 'hearted'], ['wide', 'spread'], ['air', 'conditioning'], ['house', 'broken'], ['bird', 'watching'], ['blood', 'letting'], ['brick', 'laying'], ['child', 'bearing'], ['dish', 'washing'], ['fire', 'fighting'], ['fund', 'raising'], ['hair', 'dressing'], ['hand', 'writing'], ['head', 'standing'], ['home', 'coming'], ['home', 'making'], ['house', 'cleaning'], ['house', 'keeping'], ['house', 'warming'], ['kick', 'boxing'], ['short', 'coming'], ['wood', 'working'], ['wrong', 'doing'], ['along', 'side'], ['down', 'hill'], ['down', 'stairs'], ['down', 'stream'], ['down', 'town'], ['down', 'wind'], ['in', 'doors'], ['in', 'land'], ['in', 'shore'], ['in', 'side'], ['off', 'shore'], ['off', 'stage'], ['out', 'back'], ['out', 'board'], ['out', 'doors'], ['out', 'side'], ['over', 'board'], ['over', 'hand'], ['over', 'head'], ['over', 'land'], ['over', 'night'], ['over', 'seas'], ['under', 'foot'], ['under', 'ground'], ['under', 'water'], ['under', 'way'], ['up', 'land'], ['up', 'section'], ['up', 'shore'], ['up', 'stairs'], ['up', 'state'], ['up', 'stream'], ['up', 'town'], ['die', 'hard'], ['fail', 'safe'], ['fly', 'away'], ['make', 'shift'], ['ride', 'on'], ['see', 'through'], ['shatter', 'proof'], ['sing', 'song'], ['slip', 'on'], ['tell', 'tale'], ['back', 'lit'], ['bow', 'legged'], ['back', 'ordered'], ['back', 'sided'], ['bed', 'ridden'], ['blood', 'shot'], ['book', 'keeping'], ['breath', 'taking'], ['brow', 'beaten'], ['bull', 'headed'], ['candle', 'lit'], ['church', 'going'], ['cliff', 'hanging'], ['cloth', 'bound'], ['crest', 'fallen'], ['ear', 'splitting'], ['god', 'damned'], ['god', 'sent'], ['ground', 'breaking'], ['hand', 'held'], ['hand', 'made'], ['hand', 'written'], ['heart', 'breaking'], ['heart', 'broken'], ['heart', 'felt'], ['hide', 'bound'], ['home', 'bound'], ['home', 'bred'], ['home', 'building'], ['home', 'grown'], ['home', 'made'], ['land', 'locked'], ['law', 'making'], ['love', 'lorn'], ['man', 'made'], ['match', 'making'], ['money', 'making'], ['moon', 'lit'], ['mother', 'fucking'], ['nerve', 'wracking'], ['pains', 'taking'], ['peace', 'making'], ['pin', 'striped'], ['rock', 'bound'], ['sea', 'faring'], ['shop', 'worn'], ['snow', 'bound'], ['storm', 'bound'], ['tax', 'paying'], ['theater', 'going'], ['time', 'worn'], ['type', 'written'], ['war', 'mongering'], ['way', 'faring'], ['wind', 'swept'], ['audio', 'visual'], ['bitter', 'sweet'], ['ever', 'green'], ['far', 'away'], ['forth', 'right'], ['front', 'most'], ['hydro', 'electric'], ['low', 'down'], ['north', 'east'], ['north', 'eastern'], ['north', 'most'], ['north', 'west'], ['north', 'western'], ['northern', 'most'], ['south', 'east'], ['south', 'eastern'], ['south', 'west'], ['south', 'western'], ['southern', 'most'], ['straight', 'forward'], ['upper', 'most'], ['air', 'condition'], ['baby', 'sat'], ['baby', 'sit'], ['back', 'bite'], ['back', 'order'], ['book', 'bind'], ['brain', 'wash'], ['break', 'dance'], ['breast', 'fed'], ['breast', 'feed'], ['bunny', 'hop'], ['car', 'jack'], ['care', 'take'], ['cheer', 'lead'], ['clickety', 'clop'], ['crane', 'lift'], ['diddy', 'bop'], ['draw', 'file'], ['eaves', 'drop'], ['face', 'time'], ['film', 'make'], ['fund', 'raise'], ['gang', 'bang'], ['gate', 'keep'], ['gob', 'smack'], ['grave', 'dig'], ['hand', 'pick'], ['hand', 'wash'], ['hen', 'peck'], ['hitch', 'hike'], ['home', 'school'], ['house', 'break'], ['ice', 'skate'], ['jam', 'pack'], ['jay', 'walk'], ['jump', 'start'], ['karate', 'chop'], ['key', 'punch'], ['leap', 'frog'], ['light', 'saber'], ['man', 'handle'], ['mind', 'read'], ['molly', 'coddle'], ['mouth', 'breathe'], ['nit', 'pick'], ['nose', 'dive'], ['pan', 'handle'], ['photo', 'copy'], ['pile', 'drive'], ['pinch', 'hit'], ['pole', 'vault'], ['post', 'mark'], ['proof', 'read'], ['role', 'play'], ['ship', 'wreck'], ['shoe', 'horn'], ['shop', 'lift'], ['shot', 'put'], ['show', 'stop'], ['side', 'step'], ['side', 'swipe'], ['sight', 'read'], ['sight', 'see'], ['sky', 'dive'], ['sky', 'jack'], ['sleep', 'walk'], ['snow', 'mobile'], ['sound', 'proof'], ['spell', 'check'], ['spoon', 'feed'], ['star', 'gaze'], ['steam', 'roll'], ['stock', 'pile'], ['story', 'tell'], ['sun', 'bathe'], ['tack', 'solder'], ['tail', 'gate'], ['tailor', 'make'], ['tap', 'dance'], ['tape', 'record'], ['text', 'message'], ['trail', 'blaze'], ['trouble', 'shoot'], ['water', 'proof'], ['back', 'slide'], ['back', 'track'], ['black', 'ball'], ['cyber', 'loaf'], ['down', 'play'], ['down', 'shift'], ['fine', 'tune'], ['free', 'lance'], ['hard', 'wire'], ['hi', 'jack'], ['high', 'jack'], ['high', 'tail'], ['micro', 'manage'], ['quick', 'step'], ['rough', 'handle'], ['rough', 'house'], ['rough', 'sand'], ['rubber', 'neck'], ['safe', 'guard'], ['white', 'wash'], ['wise', 'crack'], ['blow', 'hole'], ['blow', 'job'], ['blow', 'torch'], ['bob', 'tail'], ['bobble', 'head'], ['break', 'fast'], ['break', 'water'], ['bus', 'boy'], ['buzz', 'word'], ['carry', 'cot'], ['catch', 'all'], ['catch', 'phrase'], ['cheat', 'sheet'], ['check', 'point'], ['chew', 'stick'], ['chew', 'toy'], ['clap', 'board'], ['cook', 'book'], ['cook', 'fire'], ['crack', 'pot'], ['crash', 'pad'], ['crawl', 'space'], ['cry', 'baby'], ['cure', 'all'], ['cut', 'throat'], ['dare', 'devil'], ['dash', 'board'], ['dip', 'shit'], ['dip', 'stick'], ['dodge', 'ball'], ['doodle', 'bug'], ['draw', 'bridge'], ['drive', 'way'], ['flip', 'side'], ['fly', 'boy'], ['fly', 'wheel'], ['fuss', 'budget'], ['guess', 'work'], ['hack', 'saw'], ['hack', 'work'], ['hang', 'glider'], ['hang', 'man'], ['hang', 'nail'], ['happen', 'stance'], ['help', 'mate'], ['hop', 'scotch'], ['hover', 'craft'], ['hover', 'fly'], ['hover', 'plane'], ['jump', 'suit'], ['keep', 'sake'], ['kick', 'ball'], ['kick', 'stand'], ['kill', 'joy'], ['leap', 'frog'], ['mince', 'meat'], ['pass', 'port'], ['pawn', 'shop'], ['pay', 'load'], ['pay', 'phone'], ['pick', 'pocket'], ['piss', 'ant'], ['play', 'boy'], ['play', 'date'], ['play', 'day'], ['play', 'dough'], ['play', 'field'], ['play', 'ground'], ['play', 'group'], ['play', 'land'], ['play', 'list'], ['play', 'mat'], ['play', 'mate'], ['play', 'mobile'], ['play', 'pen'], ['play', 'room'], ['play', 'school'], ['play', 'set'], ['play', 'station'], ['play', 'thing'], ['play', 'time'], ['play', 'toy'], ['pop', 'corn'], ['pop', 'gun'], ['punch', 'card'], ['punch', 'line'], ['push', 'ball'], ['push', 'button'], ['push', 'cart'], ['push', 'chair'], ['rattle', 'snake'], ['repair', 'man'], ['rest', 'room'], ['ring', 'tone'], ['row', 'boat'], ['run', 'way'], ['scare', 'crow'], ['scatter', 'gun'], ['screw', 'top'], ['search', 'light'], ['share', 'crop'], ['share', 'ware'], ['shock', 'wave'], ['shoo', 'fly'], ['show', 'case'], ['show', 'horse'], ['show', 'house'], ['show', 'piece'], ['show', 'room'], ['shuffle', 'board'], ['sink', 'hole'], ['skate', 'board'], ['slap', 'stick'], ['slip', 'line'], ['slip', 'stream'], ['smear', 'case'], ['snap', 'lock'], ['snuggle', 'bug'], ['speak', 'easy'], ['spill', 'way'], ['spoil', 'sport'], ['spring', 'board'], ['spy', 'glass'], ['spy', 'ware'], ['stand', 'point'], ['step', 'ladder'], ['step', 'stool'], ['stink', 'bug'], ['stink', 'head'], ['stink', 'pot'], ['stir', 'fry'], ['stop', 'light'], ['stop', 'watch'], ['surf', 'board'], ['swim', 'suit'], ['swim', 'wear'], ['switch', 'blade'], ['take', 'home'], ['tattle', 'tale'], ['tickle', 'brush'], ['touch', 'stone'], ['tow', 'bar'], ['tow', 'boat'], ['tow', 'rope'], ['tow', 'truck'], ['tread', 'mill'], ['tug', 'boat'], ['turn', 'key'], ['turn', 'pike'], ['turn', 'stile'], ['turn', 'table'], ['wait', 'list'], ['walk', 'man'], ['walk', 'way'], ['wash', 'basin'], ['wash', 'board'], ['wash', 'bowl'], ['wash', 'cloth'], ['wash', 'man'], ['wash', 'rag'], ['wash', 'room'], ['wash', 'towel'], ['wash', 'tub'], ['wash', 'woman'], ['waste', 'water'], ['watch', 'dog'], ['watch', 'man'], ['wed', 'lock'], ['whine', 'bag'], ['whine', 'bucket'], ['whirl', 'pool'], ['whirl', 'wind'], ['work', 'top'], ['back', 'up'], ['bail', 'out'], ['black', 'out'], ['blast', 'off'], ['blow', 'out'], ['blow', 'up'], ['break', 'away'], ['break', 'down'], ['break', 'in'], ['break', 'off'], ['break', 'out'], ['break', 'through'], ['break', 'up'], ['burn', 'out'], ['call', 'back'], ['camp', 'out'], ['carry', 'on'], ['carry', 'out'], ['carry', 'over'], ['cast', 'off'], ['catch', 'up'], ['change', 'over'], ['check', 'off'], ['check', 'out'], ['check', 'over'], ['check', 'up'], ['clean', 'out'], ['clean', 'up'], ['clear', 'out'], ['close', 'up'], ['come', 'back'], ['cook', 'out'], ['cop', 'out'], ['count', 'down'], ['cover', 'up'], ['crack', 'down'], ['crack', 'up'], ['cross', 'over'], ['cut', 'back'], ['cut', 'off'], ['cut', 'out'], ['down', 'pour'], ['draw', 'back'], ['draw', 'down'], ['dress', 'up'], ['dress', 'up'], ['drive', 'through'], ['drop', 'off'], ['drop', 'out'], ['dug', 'out'], ['fade', 'out'], ['fall', 'off'], ['fall', 'out'], ['feed', 'back'], ['flash', 'back'], ['fly', 'by'], ['fly', 'over'], ['fold', 'over'], ['follow', 'up'], ['gad', 'about'], ['get', 'away'], ['give', 'away'], ['goings', 'on'], ['hand', 'off'], ['hand', 'out'], ['hang', 'out'], ['hang', 'over'], ['hang', 'up'], ['hide', 'away'], ['hide', 'out'], ['hold', 'back'], ['hold', 'over'], ['hold', 'up'], ['hook', 'up'], ['kick', 'back'], ['kick', 'off'], ['knock', 'down'], ['knock', 'off'], ['knock', 'out'], ['lay', 'off'], ['lay', 'out'], ['lay', 'over'], ['lay', 'up'], ['left', 'over'], ['let', 'down'], ['lie', 'down'], ['line', 'up'], ['lock', 'down'], ['look', 'out'], ['look', 'up'], ['made', 'up'], ['make', 'over'], ['make', 'up'], ['mark', 'up'], ['match', 'up'], ['melt', 'down'], ['pass', 'over'], ['pay', 'back'], ['pay', 'off'], ['pick', 'off'], ['pick', 'up'], ['pile', 'up'], ['pin', 'up'], ['play', 'back'], ['play', 'off'], ['plug', 'in'], ['pop', 'over'], ['pop', 'up'], ['pull', 'back'], ['pull', 'down'], ['pull', 'out'], ['pull', 'over'], ['pull', 'up'], ['push', 'over'], ['push', 'up'], ['put', 'down'], ['put', 'out'], ['read', 'back'], ['read', 'out'], ['ring', 'out'], ['roll', 'away'], ['roll', 'over'], ['roll', 'up'], ['round', 'up'], ['rub', 'down'], ['run', 'about'], ['run', 'away'], ['run', 'down'], ['run', 'off'], ['run', 'through'], ['screw', 'up'], ['set', 'back'], ['set', 'up'], ['shoot', 'out'], ['shout', 'out'], ['show', 'down'], ['show', 'off'], ['shut', 'down'], ['sign', 'up'], ['sit', 'down'], ['sit', 'up'], ['sleep', 'over'], ['slide', 'on'], ['slip', 'off'], ['slip', 'on'], ['slip', 'up'], ['slow', 'down'], ['snap', 'back'], ['spill', 'over'], ['stand', 'by'], ['stand', 'off'], ['stand', 'up'], ['start', 'up'], ['stay', 'over'], ['stick', 'on'], ['stop', 'over'], ['stow', 'away'], ['strap', 'on'], ['tag', 'along'], ['tag', 'back'], ['take', 'down'], ['take', 'off'], ['take', 'out'], ['take', 'over'], ['talk', 'back'], ['throw', 'away'], ['throw', 'back'], ['throw', 'up'], ['tie', 'back'], ['time', 'out'], ['toss', 'up'], ['touch', 'down'], ['touch', 'down'], ['trade', 'off'], ['try', 'out'], ['turn', 'about'], ['turn', 'around'], ['turn', 'off'], ['turn', 'out'], ['turn', 'over'], ['voice', 'over'], ['walk', 'out'], ['walk', 'over'], ['walk', 'through'], ['walk', 'up'], ['warm', 'up'], ['wash', 'out'], ['wind', 'up'], ['wipe', 'out'], ['work', 'out'], ['wrap', 'around'], ['wrap', 'up'], ['write', 'off'], ['write', 'up'], ['hung', 'over'], ['run', 'away'], ['stand', 'by'], ['above', 'ground'], ['below', 'ground'], ['down', 'hill'], ['down', 'stream'], ['down', 'town'], ['down', 'wind'], ['in', 'door'], ['in', 'land'], ['in', 'line'], ['in', 'shore'], ['off', 'beat'], ['off', 'hand'], ['off', 'line'], ['off', 'print'], ['off', 'road'], ['off', 'site'], ['on', 'line'], ['on', 'stage'], ['out', 'board'], ['out', 'door'], ['out', 'state'], ['over', 'hand'], ['over', 'night'], ['over', 'seas'], ['over', 'weight'], ['under', 'cover'], ['under', 'ground'], ['under', 'sea'], ['under', 'water'], ['up', 'beat'], ['up', 'hill'], ['up', 'land'], ['up', 'river'], ['up', 'stairs'], ['up', 'stream'], ['up', 'tight'], ['up', 'town'], ['after', 'care'], ['after', 'effect'], ['after', 'image'], ['after', 'math'], ['after', 'noon'], ['after', 'party'], ['after', 'school'], ['after', 'season'], ['after', 'shave'], ['after', 'shock'], ['after', 'taste'], ['after', 'thought'], ['back', 'door'], ['back', 'field'], ['back', 'hoe'], ['back', 'water'], ['back', 'woods'], ['by', 'law'], ['by', 'way'], ['down', 'draft'], ['down', 'spout'], ['down', 'town'], ['down', 'trend'], ['in', 'box'], ['in', 'patient'], ['in', 'post'], ['in', 'road'], ['in', 'side'], ['in', 'voice'], ['on', 'looker'], ['out', 'doors'], ['out', 'field'], ['out', 'fielder'], ['out', 'fitter'], ['out', 'group'], ['out', 'growth'], ['out', 'house'], ['out', 'law'], ['out', 'lier'], ['out', 'line'], ['out', 'patient'], ['out', 'post'], ['out', 'rage'], ['out', 'rigger'], ['out', 'side'], ['out', 'sider'], ['under', 'pants'], ['under', 'taker'], ['under', 'wear'], ['up', 'shot'], ['up', 'time'], ['up', 'town'], ['up', 'trend'], ['up', 'welling'], ['back', 'fire'], ['back', 'light'], ['back', 'stitch'], ['beat', 'box'], ['black', 'mail'], ['blind', 'side'], ['blue', 'print'], ['boot', 'leg'], ['bull', 'shit'], ['bush', 'whack'], ['by', 'pass'], ['car', 'pool'], ['cat', 'nap'], ['chit', 'chat'], ['copy', 'right'], ['criss', 'cross'], ['date', 'line'], ['down', 'grade'], ['down', 'load'], ['down', 'size'], ['fire', 'bomb'], ['flood', 'light'], ['flood', 'lit'], ['fore', 'close'], ['fore', 'shadow'], ['fore', 'shorten'], ['fore', 'stall'], ['free', 'base'], ['head', 'line'], ['hot', 'foot'], ['key', 'note'], ['land', 'scape'], ['master', 'mind'], ['off', 'load'], ['off', 'saddle'], ['out', 'class'], ['out', 'face'], ['out', 'fit'], ['out', 'fox'], ['out', 'law'], ['out', 'source'], ['out', 'wit'], ['over', 'whelm'], ['pad', 'lock'], ['patty', 'cake'], ['pig', 'out'], ['red', 'line'], ['shep', 'herd'], ['short', 'change'], ['short', 'list'], ['side', 'track'], ['skate', 'board'], ['soft', 'pedal'], ['spear', 'head'], ['spot', 'light'], ['stream', 'line'], ['sugar', 'coat'], ['swash', 'buckle'], ['tattle', 'tale'], ['tip', 'toe'], ['tippie', 'toe'], ['up', 'chuck'], ['video', 'cam'], ['vouch', 'safe'], ['ware', 'house'], ['zig', 'zag'], ['back', 'swing'], ['cover', 'all'], ['cross', 'walk'], ['day', 'break'], ['down', 'beat'], ['down', 'fall'], ['down', 'turn'], ['fare', 'well'], ['fore', 'cast'], ['fore', 'knowledge'], ['fore', 'shadowing'], ['fore', 'sight'], ['fort', 'night'], ['free', 'fall'], ['frost', 'bite'], ['god', 'send'], ['grid', 'lock'], ['grown', 'up'], ['hand', 'hold'], ['hand', 'shake'], ['hand', 'stand'], ['hard', 'bake'], ['heart', 'throb'], ['hey', 'day'], ['honey', 'suckle'], ['in', 'put'], ['in', 'set'], ['in', 'take'], ['look', 'alike'], ['main', 'stay'], ['newly', 'wed'], ['off', 'shoot'], ['on', 'rush'], ['on', 'set'], ['out', 'back'], ['out', 'break'], ['out', 'burst'], ['out', 'cast'], ['out', 'come'], ['out', 'cry'], ['out', 'flow'], ['out', 'lay'], ['out', 'look'], ['out', 'put'], ['out', 'reach'], ['out', 'set'], ['over', 'lay'], ['over', 'throw'], ['print', 'out'], ['quick', 'step'], ['red', 'start'], ['short', 'cut'], ['short', 'fall'], ['short', 'stop'], ['sing', 'along'], ['stand', 'still'], ['straight', 'away'], ['strip', 'tease'], ['sun', 'down'], ['sun', 'up'], ['take', 'away'], ['through', 'put'], ['top', 'gallant'], ['under', 'cut'], ['up', 'do'], ['up', 'keep'], ['up', 'lift'], ['up', 'roar'], ['up', 'surge'], ['up', 'take'], ['up', 'turn'], ['upper', 'cut'], ['web', 'cast'], ['wind', 'break'], ['wood', 'cut'], ['alfalfa', 'hay'], ['ball', 'chair'], ['ball', 'pool'], ['baseball', 'logger'], ['batter', 'man'], ['bear', 'monster'], ['bed', 'man'], ['bin', 'man'], ['binky', 'bed'], ['binky', 'box'], ['binky', 'maker'], ['boat', 'car'], ['bucket', 'loader'], ['car', 'gate'], ['catch', 'go'], ['catcher', 'thing'], ['cattle', 'fish'], ['children', 'bed'], ['coco', 'man'], ['cookie', 'bread'], ['corn', 'butter'], ['corn', 'crib'], ['dipe', 'dipe'], ['dog', 'hat'], ['doggy', 'dog'], ['door', 'home'], ['dump', 'trailer'], ['dust', 'man'], ['egg', 'drop'], ['family', 'house'], ['faucet', 'wheel'], ['fighter', 'ball'], ['fire', 'girl'], ['fire', 'pig'], ['fire', 'whistle'], ['fix', 'thing'], ['flag', 'place'], ['flag', 'store'], ['food', 'lady'], ['foot', 'book'], ['gas', 'man'], ['gogo', 'puppy'], ['hitting', 'tree'], ['horse', 'swing'], ['horsie', 'ride'], ['human', 'was'], ['ice', 'lolly'], ['jacket', 'potato'], ['jar', 'cookie'], ['jet', 'maker'], ['jumping', 'trick'], ['key', 'clock'], ['kids', 'meal'], ['ladder', 'cover'], ['ladder', 'man'], ['ladder', 'truck'], ['leopard', 'tiger'], ['letter', 'man'], ['light', 'man'], ['man', 'keeper'], ['man', 'nightmare'], ['mark', 'cover'], ['market', 'geese'], ['mask', 'face'], ['mess', 'room'], ['minky', 'bed'], ['moon', 'fish'], ['moosy', 'lunch'], ['motor', 'room'], ['mow', 'lawner'], ['open', 'game'], ['paddle', 'fadder'], ['paddle', 'fish'], ['painting', 'man'], ['paints', 'man'], ['parade', 'man'], ['part', 'table'], ['pate', 'pancake'], ['picture', 'door'], ['pool', 'geese'], ['pop', 'block'], ['poppy', 'bead'], ['pump', 'water'], ['pumper', 'truck'], ['rabbit', 'garden'], ['rainbow', 'woman'], ['ribbit', 'frog'], ['road', 'set'], ['sad', 'meal'], ['sandman', 'sleepies'], ['scrub', 'brush'], ['sea', 'bone'], ['sea', 'dog'], ['sea', 'witch'], ['seeing', 'game'], ['shop', 'man'], ['sign', 'truck'], ['ski', 'ball'], ['slide', 'step'], ['sliding', 'board'], ['smile', 'shirt'], ['smoke', 'box'], ['snake', 'walk'], ['snowpan', 'man'], ['snowpen', 'man'], ['sticker', 'it'], ['straddle', 'truck'], ['straw', 'paper'], ['straw', 'wrapper'], ['talk', 'sound'], ['thief', 'man'], ['ticket', 'man'], ['turn', 'wheel'], ['water', 'name'], ['water', 'tail'], ['wool', 'sheep'], ['work', 'job'], ['yoyo', 'man'], ['yoyo', 'yacht'], ['zipper', 'zoop'], ['alder', 'man'], ['back', 'wash'], ['bad', 'ass'], ['bad', 'land'], ['best', 'seller'], ['big', 'ass'], ['big', 'horn'], ['big', 'mouth'], ['big', 'shot'], ['big', 'wig'], ['black', 'berry'], ['black', 'bird'], ['black', 'board'], ['black', 'currant'], ['black', 'head'], ['black', 'jack'], ['black', 'light'], ['black', 'mail'], ['black', 'smith'], ['black', 'top'], ['blind', 'fold'], ['blue', 'bell'], ['blue', 'berry'], ['blue', 'bird'], ['blue', 'bottle'], ['blue', 'bush'], ['blue', 'gill'], ['blue', 'grass'], ['blue', 'jay'], ['blue', 'jean'], ['blue', 'print'], ['blue', 'tack'], ['broad', 'cast'], ['broad', 'side'], ['brown', 'stone'], ['busy', 'body'], ['cheap', 'skate'], ['common', 'place'], ['common', 'wealth'], ['copper', 'head'], ['cross', 'road'], ['cross', 'word'], ['dark', 'room'], ['dead', 'beat'], ['dead', 'bolt'], ['dead', 'head'], ['dead', 'line'], ['dead', 'lock'], ['dead', 'weight'], ['dim', 'wit'], ['double', 'dutch'], ['down', 'side'], ['down', 'time'], ['dry', 'wall'], ['dumb', 'bell'], ['earthen', 'ware'], ['eight', 'ball'], ['eight', 'track'], ['elder', 'berry'], ['english', 'man'], ['fast', 'ball'], ['flash', 'card'], ['flat', 'bed'], ['flat', 'head'], ['flat', 'iron'], ['flat', 'land'], ['four', 'wheeler'], ['free', 'holder'], ['free', 'man'], ['free', 'style'], ['free', 'style'], ['free', 'way'], ['freed', 'man'], ['fresh', 'man'], ['full', 'back'], ['funny', 'bone'], ['gentle', 'man'], ['gold', 'finch'], ['golden', 'rod'], ['good', 'will'], ['gray', 'beard'], ['gray', 'scale'], ['great', 'coat'], ['green', 'bean'], ['green', 'fly'], ['green', 'grocer'], ['green', 'grocery'], ['green', 'head'], ['green', 'house'], ['green', 'ware'], ['grey', 'hound'], ['half', 'back'], ['handy', 'man'], ['hard', 'back'], ['hard', 'ball'], ['hard', 'board'], ['hard', 'head'], ['hard', 'tack'], ['hard', 'ware'], ['hard', 'wood'], ['hep', 'cat'], ['hidey', 'hole'], ['high', 'ball'], ['high', 'board'], ['high', 'boy'], ['high', 'chair'], ['high', 'five'], ['high', 'land'], ['high', 'light'], ['high', 'lighter'], ['high', 'way'], ['hollow', 'ware'], ['hot', 'box'], ['hot', 'cake'], ['hot', 'dog'], ['hot', 'house'], ['hot', 'line'], ['hot', 'rod'], ['hot', 'shot'], ['hot', 'spot'], ['humming', 'bird'], ['jumping', 'jacks'], ['lazy', 'bone'], ['light', 'weight'], ['live', 'stock'], ['long', 'boat'], ['long', 'fish'], ['long', 'hand'], ['long', 'horn'], ['long', 'shore'], ['long', 'shoreman'], ['long', 'shot'], ['loud', 'mouth'], ['loud', 'speaker'], ['low', 'fat'], ['low', 'land'], ['low', 'life'], ['lower', 'case'], ['mad', 'house'], ['main', 'frame'], ['main', 'land'], ['main', 'lander'], ['main', 'sail'], ['main', 'stream'], ['mid', 'term'], ['mocking', 'bird'], ['new', 'comer'], ['noble', 'man'], ['numb', 'skull'], ['odd', 'ball'], ['outer', 'wear'], ['pink', 'eye'], ['poor', 'house'], ['quick', 'sand'], ['quick', 'silver'], ['raw', 'hide'], ['red', 'bone'], ['red', 'bud'], ['red', 'coat'], ['red', 'eye'], ['red', 'head'], ['red', 'header'], ['red', 'neck'], ['red', 'wood'], ['rough', 'cast'], ['rough', 'neck'], ['round', 'head'], ['round', 'house'], ['running', 'back'], ['safe', 'keeping'], ['select', 'man'], ['sharp', 'tooth'], ['short', 'bread'], ['short', 'cake'], ['short', 'hand'], ['short', 'wave'], ['sick', 'room'], ['side', 'kick'], ['silver', 'back'], ['silver', 'fish'], ['sippy', 'cup'], ['sippy', 'dipper'], ['sleepy', 'head'], ['slow', 'poke'], ['small', 'mouth'], ['small', 'pox'], ['smart', 'phone'], ['smart', 'watch'], ['smooth', 'bore'], ['soft', 'ball'], ['soft', 'cover'], ['soft', 'ware'], ['soft', 'wood'], ['sour', 'dough'], ['sour', 'puss'], ['spread', 'sheet'], ['stale', 'mate'], ['still', 'birth'], ['strong', 'hold'], ['strong', 'man'], ['sweet', 'bread'], ['sweet', 'corn'], ['sweet', 'heart'], ['sweet', 'pea'], ['tender', 'loin'], ['thorough', 'fare'], ['tight', 'rope'], ['top', 'hat'], ['turbo', 'prop'], ['under', 'classman'], ['under', 'classmen'], ['upper', 'case'], ['wet', 'suit'], ['whirly', 'bird'], ['white', 'board'], ['white', 'face'], ['white', 'head'], ['white', 'ware'], ['whole', 'meal'], ['whole', 'wheat'], ['wild', 'cat'], ['wild', 'fire'], ['wild', 'flower'], ['wild', 'life'], ['yellow', 'jacket'], ['back', 'stage'], ['back', 'stairs'], ['bare', 'back'], ['before', 'hand'], ['broad', 'side'], ['cater', 'corner'], ['cross', 'ways'], ['free', 'hand'], ['front', 'ways'], ['half', 'time'], ['half', 'way'], ['half', 'ways'], ['length', 'ways'], ['long', 'ways'], ['part', 'way'], ['piece', 'meal'], ['side', 'saddle'], ['counter', 'clockwise'], ['down', 'right'], ['head', 'long'], ['heaven', 'ward'], ['horse', 'back'], ['horsie', 'back'], ['piggy', 'back'], ['that', 'away'], ['this', 'away'], ['baa', 'lamb'], ['baa', 'sheep'], ['baabaa', 'sheep'], ['billy', 'goat'], ['bunny', 'rabbit'], ['doggie', 'dog'], ['kitty', 'cat'], ['moo', 'cow'], ['panda', 'bear'], ['puppy', 'dog'], ['quack', 'duck'], ['she', 'cat'], ['teddy', 'bear'], ['air', 'tight'], ['awe', 'struck'], ['bed', 'bound'], ['bed', 'fast'], ['blood', 'thirsty'], ['bomb', 'proof'], ['brain', 'dead'], ['buck', 'wild'], ['bullet', 'proof'], ['burglar', 'proof'], ['car', 'sick'], ['care', 'free'], ['child', 'proof'], ['city', 'wide'], ['color', 'blind'], ['color', 'fast'], ['country', 'wide'], ['dog', 'eared'], ['fiber', 'optic'], ['fire', 'proof'], ['fool', 'hardy'], ['fool', 'proof'], ['fore', 'most'], ['free', 'hand'], ['free', 'lance'], ['front', 'ward'], ['god', 'damn'], ['hap', 'hazard'], ['hay', 'wire'], ['head', 'first'], ['head', 'strong'], ['hind', 'most'], ['home', 'sick'], ['honey', 'dip'], ['house', 'bound'], ['iron', 'clad'], ['knee', 'deep'], ['life', 'long'], ['luke', 'warm'], ['nation', 'wide'], ['news', 'worthy'], ['nit', 'picky'], ['note', 'worthy'], ['piece', 'meal'], ['rust', 'proof'], ['sea', 'sick'], ['ship', 'shape'], ['shock', 'proof'], ['side', 'long'], ['side', 'ways'], ['skin', 'tight'], ['sound', 'proof'], ['star', 'board'], ['state', 'wide'], ['stead', 'fast'], ['stir', 'crazy'], ['sun', 'lit'], ['thread', 'bare'], ['top', 'most'], ['water', 'proof'], ['weather', 'proof'], ['week', 'long'], ['wind', 'blown'], ['world', 'wide'], ['year', 'long'], ['year', 'old'], ['bow', 'wow'], ['clickety', 'clack'], ['clickety', 'click'], ['clickety', 'clop'], ['clip', 'clop'], ['clippity', 'clop'], ['dibble', 'dabble'], ['ding', 'dong'], ['flip', 'flop'], ['hee', 'haw'], ['hip', 'hop'], ['hokey', 'pokey'], ['hoot', 'hoot'], ['hop', 'bang'], ['nitty', 'gritty'], ['oom', 'pahpah'], ['piggie', 'wiggie'], ['pitter', 'pat'], ['pitter', 'patter'], ['plip', 'plop'], ['rufty', 'tufty'], ['snip', 'snap'], ['splish', 'splash'], ['tic', 'tac'], ['tick', 'tock'], ['tootsie', 'wootsie'], ['twirly', 'whirly'], ['back', 'flip'], ['back', 'lash'], ['back', 'rub'], ['back', 'splash'], ['back', 'wash'], ['bar', 'keep'], ['bill', 'fold'], ['bird', 'feed'], ['blood', 'flow'], ['blood', 'shed'], ['boot', 'black'], ['cat', 'walk'], ['center', 'punch'], ['crap', 'shoot'], ['crew', 'cut'], ['eye', 'wash'], ['face', 'lift'], ['hair', 'cut'], ['hair', 'do'], ['hay', 'mow'], ['head', 'butt'], ['head', 'lock'], ['head', 'rest'], ['head', 'stand'], ['heart', 'break'], ['heart', 'burn'], ['home', 'stay'], ['land', 'slide'], ['molly', 'coddle'], ['moon', 'shine'], ['moon', 'walk'], ['mouth', 'wash'], ['mud', 'slide'], ['night', 'fall'], ['nose', 'bleed'], ['pin', 'prick'], ['pip', 'squeak'], ['rain', 'fall'], ['ride', 'share'], ['road', 'block'], ['side', 'walk'], ['snake', 'bite'], ['snow', 'fall'], ['star', 'burst'], ['sun', 'block'], ['sun', 'rise'], ['sun', 'set'], ['sun', 'shine'], ['thunder', 'clap'], ['time', 'share'], ['water', 'fall'], ['whip', 'lash'], ['wind', 'fall']]
@@ -76,7 +76,7 @@ def __serialize_arr(src, tgt):
76
76
 
77
77
  return src_serialized, tgt_serialized
78
78
 
79
- def __dp(payload, reference, t):
79
+ def __dp(payload, reference, t, match_fn):
80
80
  """Performs bottom-up dynamic programming alignment
81
81
 
82
82
  Parameters
@@ -149,7 +149,7 @@ def __dp(payload, reference, t):
149
149
  # get a match.
150
150
 
151
151
  # recall 1 indexing
152
- is_match = (reference[i-1].key == payload[j-1].key)
152
+ is_match = match_fn(reference[i-1].key, payload[j-1].key)
153
153
 
154
154
  # calculate new distances
155
155
  new_dist1 = dist1+(0 if is_match else 2)
@@ -209,15 +209,16 @@ def __dp(payload, reference, t):
209
209
 
210
210
  def align(source_payload_sequence,
211
211
  target_reference_sequence,
212
- tqdm=True):
212
+ tqdm=True,
213
+ match_fn=lambda x,y: x==y):
213
214
  """Align two sequences"""
214
215
 
215
216
  if (len(source_payload_sequence) > 0 and
216
217
  type(source_payload_sequence[0]) == PayloadTarget):
217
- return __dp(source_payload_sequence, target_reference_sequence, tqdm)
218
+ return __dp(source_payload_sequence, target_reference_sequence, tqdm, match_fn)
218
219
  else:
219
220
  return __dp(*__serialize_arr(source_payload_sequence,
220
- target_reference_sequence), tqdm)
221
+ target_reference_sequence), tqdm, match_fn)
221
222
 
222
223
  # align([1,2,3,4,4,5,5,5], [1,1,3,4,4,12,5,5,18])
223
224
 
@@ -0,0 +1,3 @@
1
+ 0.7.19-post.15
2
+ July 12nd, 2025
3
+ compound word list bencmarking
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: batchalign
3
- Version: 0.7.19.post11
3
+ Version: 0.7.19.post15
4
4
  Summary: Python Speech Language Sample Analysis
5
5
  Author: Brian MacWhinney, Houjun Liu
6
6
  Author-email: macw@cmu.edu, houjun@cmu.edu
@@ -115,6 +115,7 @@ batchalign/tests/pipelines/cleanup/test_parse_support.py
115
115
  batchalign/tests/pipelines/fa/test_fa_pipeline.py
116
116
  batchalign/utils/__init__.py
117
117
  batchalign/utils/abbrev.py
118
+ batchalign/utils/compounds.py
118
119
  batchalign/utils/config.py
119
120
  batchalign/utils/dp.py
120
121
  batchalign/utils/names.py
@@ -1,3 +0,0 @@
1
- 0.7.19-post.11
2
- July 8st, 2025
3
- benchmarking changes