pystylometry 1.0.0__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pystylometry/README.md +42 -0
- pystylometry/__init__.py +45 -3
- pystylometry/_types.py +1017 -259
- pystylometry/authorship/README.md +21 -0
- pystylometry/authorship/__init__.py +28 -4
- pystylometry/authorship/additional_methods.py +260 -40
- pystylometry/authorship/compression.py +175 -0
- pystylometry/authorship/kilgarriff.py +354 -0
- pystylometry/character/README.md +17 -0
- pystylometry/character/character_metrics.py +267 -179
- pystylometry/cli.py +427 -0
- pystylometry/consistency/README.md +27 -0
- pystylometry/consistency/__init__.py +57 -0
- pystylometry/consistency/_thresholds.py +162 -0
- pystylometry/consistency/drift.py +549 -0
- pystylometry/dialect/README.md +26 -0
- pystylometry/dialect/__init__.py +65 -0
- pystylometry/dialect/_data/dialect_markers.json +1134 -0
- pystylometry/dialect/_loader.py +360 -0
- pystylometry/dialect/detector.py +533 -0
- pystylometry/lexical/README.md +23 -0
- pystylometry/lexical/advanced_diversity.py +61 -22
- pystylometry/lexical/function_words.py +255 -56
- pystylometry/lexical/hapax.py +182 -52
- pystylometry/lexical/mtld.py +108 -26
- pystylometry/lexical/ttr.py +76 -10
- pystylometry/lexical/word_frequency_sophistication.py +1522 -298
- pystylometry/lexical/yule.py +136 -50
- pystylometry/ngrams/README.md +18 -0
- pystylometry/ngrams/entropy.py +150 -49
- pystylometry/ngrams/extended_ngrams.py +314 -69
- pystylometry/prosody/README.md +17 -0
- pystylometry/prosody/rhythm_prosody.py +773 -11
- pystylometry/readability/README.md +23 -0
- pystylometry/readability/additional_formulas.py +1887 -762
- pystylometry/readability/ari.py +144 -82
- pystylometry/readability/coleman_liau.py +136 -109
- pystylometry/readability/flesch.py +177 -73
- pystylometry/readability/gunning_fog.py +165 -161
- pystylometry/readability/smog.py +123 -42
- pystylometry/stylistic/README.md +20 -0
- pystylometry/stylistic/cohesion_coherence.py +669 -13
- pystylometry/stylistic/genre_register.py +1560 -17
- pystylometry/stylistic/markers.py +611 -17
- pystylometry/stylistic/vocabulary_overlap.py +354 -13
- pystylometry/syntactic/README.md +20 -0
- pystylometry/syntactic/advanced_syntactic.py +76 -14
- pystylometry/syntactic/pos_ratios.py +70 -6
- pystylometry/syntactic/sentence_stats.py +55 -12
- pystylometry/syntactic/sentence_types.py +71 -15
- pystylometry/viz/README.md +27 -0
- pystylometry/viz/__init__.py +71 -0
- pystylometry/viz/drift.py +589 -0
- pystylometry/viz/jsx/__init__.py +31 -0
- pystylometry/viz/jsx/_base.py +144 -0
- pystylometry/viz/jsx/report.py +677 -0
- pystylometry/viz/jsx/timeline.py +716 -0
- pystylometry/viz/jsx/viewer.py +1032 -0
- pystylometry-1.3.0.dist-info/METADATA +136 -0
- pystylometry-1.3.0.dist-info/RECORD +76 -0
- {pystylometry-1.0.0.dist-info → pystylometry-1.3.0.dist-info}/WHEEL +1 -1
- pystylometry-1.3.0.dist-info/entry_points.txt +4 -0
- pystylometry-1.0.0.dist-info/METADATA +0 -275
- pystylometry-1.0.0.dist-info/RECORD +0 -46
|
@@ -24,89 +24,535 @@ References:
|
|
|
24
24
|
Davies, M. (2008-). The Corpus of Contemporary American English (COCA).
|
|
25
25
|
"""
|
|
26
26
|
|
|
27
|
-
from .._types import WordFrequencySophisticationResult
|
|
28
|
-
|
|
27
|
+
from .._types import WordFrequencySophisticationResult, make_distribution
|
|
29
28
|
|
|
30
29
|
# Academic Word List (AWL) - Coxhead (2000)
|
|
31
30
|
# GitHub Issue #15: https://github.com/craigtrim/pystylometry/issues/15
|
|
32
31
|
# This is a subset of common academic words. The full AWL contains 570 word families.
|
|
33
32
|
# Consider loading from external file for complete list.
|
|
34
33
|
ACADEMIC_WORD_LIST = {
|
|
35
|
-
"analyze",
|
|
36
|
-
"
|
|
37
|
-
"
|
|
38
|
-
"
|
|
39
|
-
"
|
|
40
|
-
"
|
|
41
|
-
"
|
|
42
|
-
"
|
|
43
|
-
"
|
|
44
|
-
"
|
|
45
|
-
"
|
|
46
|
-
"
|
|
47
|
-
"
|
|
48
|
-
"
|
|
49
|
-
"
|
|
50
|
-
"
|
|
51
|
-
"
|
|
52
|
-
"
|
|
53
|
-
"
|
|
54
|
-
"
|
|
55
|
-
"
|
|
56
|
-
"
|
|
57
|
-
"
|
|
58
|
-
"
|
|
59
|
-
"
|
|
60
|
-
"
|
|
61
|
-
"
|
|
62
|
-
"
|
|
63
|
-
"
|
|
64
|
-
"
|
|
65
|
-
"
|
|
66
|
-
"
|
|
67
|
-
"
|
|
68
|
-
"
|
|
69
|
-
"
|
|
70
|
-
"
|
|
71
|
-
"
|
|
72
|
-
"
|
|
73
|
-
"
|
|
74
|
-
"
|
|
75
|
-
"
|
|
76
|
-
"
|
|
77
|
-
"
|
|
78
|
-
"
|
|
79
|
-
"
|
|
80
|
-
"
|
|
81
|
-
"
|
|
82
|
-
"
|
|
83
|
-
"
|
|
84
|
-
"
|
|
85
|
-
"
|
|
86
|
-
"
|
|
87
|
-
"
|
|
88
|
-
"
|
|
89
|
-
"
|
|
90
|
-
"
|
|
91
|
-
"
|
|
92
|
-
"
|
|
93
|
-
"
|
|
94
|
-
"
|
|
95
|
-
"
|
|
96
|
-
"
|
|
97
|
-
"
|
|
98
|
-
"
|
|
99
|
-
"
|
|
100
|
-
"
|
|
101
|
-
"
|
|
102
|
-
"
|
|
103
|
-
"
|
|
104
|
-
"
|
|
105
|
-
"
|
|
106
|
-
"
|
|
107
|
-
"
|
|
108
|
-
"
|
|
109
|
-
"
|
|
34
|
+
"analyze",
|
|
35
|
+
"analysis",
|
|
36
|
+
"analytical",
|
|
37
|
+
"approach",
|
|
38
|
+
"area",
|
|
39
|
+
"assess",
|
|
40
|
+
"assessment",
|
|
41
|
+
"assume",
|
|
42
|
+
"assumption",
|
|
43
|
+
"authority",
|
|
44
|
+
"available",
|
|
45
|
+
"benefit",
|
|
46
|
+
"category",
|
|
47
|
+
"chapter",
|
|
48
|
+
"commission",
|
|
49
|
+
"community",
|
|
50
|
+
"complex",
|
|
51
|
+
"compute",
|
|
52
|
+
"computer",
|
|
53
|
+
"conclude",
|
|
54
|
+
"conclusion",
|
|
55
|
+
"conduct",
|
|
56
|
+
"consequence",
|
|
57
|
+
"considerable",
|
|
58
|
+
"consist",
|
|
59
|
+
"consistent",
|
|
60
|
+
"constitute",
|
|
61
|
+
"constitutional",
|
|
62
|
+
"construct",
|
|
63
|
+
"construction",
|
|
64
|
+
"consumer",
|
|
65
|
+
"context",
|
|
66
|
+
"contract",
|
|
67
|
+
"contrast",
|
|
68
|
+
"contribute",
|
|
69
|
+
"contribution",
|
|
70
|
+
"controversial",
|
|
71
|
+
"controversy",
|
|
72
|
+
"convert",
|
|
73
|
+
"create",
|
|
74
|
+
"creation",
|
|
75
|
+
"creative",
|
|
76
|
+
"credit",
|
|
77
|
+
"criteria",
|
|
78
|
+
"cultural",
|
|
79
|
+
"culture",
|
|
80
|
+
"data",
|
|
81
|
+
"debate",
|
|
82
|
+
"define",
|
|
83
|
+
"definition",
|
|
84
|
+
"demonstrate",
|
|
85
|
+
"demonstration",
|
|
86
|
+
"derive",
|
|
87
|
+
"derived",
|
|
88
|
+
"design",
|
|
89
|
+
"despite",
|
|
90
|
+
"detect",
|
|
91
|
+
"dimension",
|
|
92
|
+
"diminish",
|
|
93
|
+
"distinct",
|
|
94
|
+
"distinction",
|
|
95
|
+
"distribute",
|
|
96
|
+
"distribution",
|
|
97
|
+
"diverse",
|
|
98
|
+
"diversity",
|
|
99
|
+
"document",
|
|
100
|
+
"documentation",
|
|
101
|
+
"domestic",
|
|
102
|
+
"dominate",
|
|
103
|
+
"economy",
|
|
104
|
+
"economic",
|
|
105
|
+
"edit",
|
|
106
|
+
"element",
|
|
107
|
+
"eliminate",
|
|
108
|
+
"emerge",
|
|
109
|
+
"emphasis",
|
|
110
|
+
"emphasize",
|
|
111
|
+
"empirical",
|
|
112
|
+
"enable",
|
|
113
|
+
"encounter",
|
|
114
|
+
"energy",
|
|
115
|
+
"enforce",
|
|
116
|
+
"enhance",
|
|
117
|
+
"enormous",
|
|
118
|
+
"ensure",
|
|
119
|
+
"environment",
|
|
120
|
+
"environmental",
|
|
121
|
+
"equation",
|
|
122
|
+
"equate",
|
|
123
|
+
"error",
|
|
124
|
+
"establish",
|
|
125
|
+
"estate",
|
|
126
|
+
"estimate",
|
|
127
|
+
"ethic",
|
|
128
|
+
"ethnic",
|
|
129
|
+
"evaluate",
|
|
130
|
+
"evaluation",
|
|
131
|
+
"eventual",
|
|
132
|
+
"eventually",
|
|
133
|
+
"evident",
|
|
134
|
+
"evidence",
|
|
135
|
+
"evolve",
|
|
136
|
+
"evolution",
|
|
137
|
+
"exceed",
|
|
138
|
+
"exclude",
|
|
139
|
+
"exclusive",
|
|
140
|
+
"expand",
|
|
141
|
+
"expansion",
|
|
142
|
+
"explicit",
|
|
143
|
+
"exploit",
|
|
144
|
+
"export",
|
|
145
|
+
"expose",
|
|
146
|
+
"external",
|
|
147
|
+
"extract",
|
|
148
|
+
"facilitate",
|
|
149
|
+
"factor",
|
|
150
|
+
"feature",
|
|
151
|
+
"federal",
|
|
152
|
+
"fee",
|
|
153
|
+
"file",
|
|
154
|
+
"final",
|
|
155
|
+
"finance",
|
|
156
|
+
"financial",
|
|
157
|
+
"finite",
|
|
158
|
+
"flexible",
|
|
159
|
+
"fluctuate",
|
|
160
|
+
"focus",
|
|
161
|
+
"format",
|
|
162
|
+
"formula",
|
|
163
|
+
"forthcoming",
|
|
164
|
+
"foundation",
|
|
165
|
+
"found",
|
|
166
|
+
"framework",
|
|
167
|
+
"function",
|
|
168
|
+
"functional",
|
|
169
|
+
"fund",
|
|
170
|
+
"fundamental",
|
|
171
|
+
"gender",
|
|
172
|
+
"generate",
|
|
173
|
+
"generation",
|
|
174
|
+
"global",
|
|
175
|
+
"goal",
|
|
176
|
+
"grant",
|
|
177
|
+
"guarantee",
|
|
178
|
+
"guideline",
|
|
179
|
+
"hence",
|
|
180
|
+
"hypothesis",
|
|
181
|
+
"hypothetical",
|
|
182
|
+
"identical",
|
|
183
|
+
"identify",
|
|
184
|
+
"identity",
|
|
185
|
+
"ideology",
|
|
186
|
+
"ignorance",
|
|
187
|
+
"illustrate",
|
|
188
|
+
"image",
|
|
189
|
+
"immigrate",
|
|
190
|
+
"impact",
|
|
191
|
+
"implement",
|
|
192
|
+
"implicate",
|
|
193
|
+
"implicit",
|
|
194
|
+
"imply",
|
|
195
|
+
"impose",
|
|
196
|
+
"incentive",
|
|
197
|
+
"incidence",
|
|
198
|
+
"incline",
|
|
199
|
+
"income",
|
|
200
|
+
"incorporate",
|
|
201
|
+
"index",
|
|
202
|
+
"indicate",
|
|
203
|
+
"indication",
|
|
204
|
+
"individual",
|
|
205
|
+
"individualism",
|
|
206
|
+
"induce",
|
|
207
|
+
"inevitable",
|
|
208
|
+
"infer",
|
|
209
|
+
"infrastructure",
|
|
210
|
+
"inherent",
|
|
211
|
+
"inherit",
|
|
212
|
+
"initial",
|
|
213
|
+
"initially",
|
|
214
|
+
"initiate",
|
|
215
|
+
"injure",
|
|
216
|
+
"innovate",
|
|
217
|
+
"innovation",
|
|
218
|
+
"input",
|
|
219
|
+
"insert",
|
|
220
|
+
"insight",
|
|
221
|
+
"inspect",
|
|
222
|
+
"instance",
|
|
223
|
+
"institute",
|
|
224
|
+
"institution",
|
|
225
|
+
"instruct",
|
|
226
|
+
"integral",
|
|
227
|
+
"integrate",
|
|
228
|
+
"integration",
|
|
229
|
+
"integrity",
|
|
230
|
+
"intelligence",
|
|
231
|
+
"intense",
|
|
232
|
+
"intensity",
|
|
233
|
+
"interact",
|
|
234
|
+
"interaction",
|
|
235
|
+
"intermediate",
|
|
236
|
+
"internal",
|
|
237
|
+
"interpret",
|
|
238
|
+
"interpretation",
|
|
239
|
+
"interval",
|
|
240
|
+
"intervene",
|
|
241
|
+
"intervention",
|
|
242
|
+
"intrinsic",
|
|
243
|
+
"invest",
|
|
244
|
+
"investigate",
|
|
245
|
+
"investigation",
|
|
246
|
+
"investment",
|
|
247
|
+
"invoke",
|
|
248
|
+
"involve",
|
|
249
|
+
"involvement",
|
|
250
|
+
"isolate",
|
|
251
|
+
"isolation",
|
|
252
|
+
"issue",
|
|
253
|
+
"item",
|
|
254
|
+
"job",
|
|
255
|
+
"journal",
|
|
256
|
+
"justify",
|
|
257
|
+
"label",
|
|
258
|
+
"labor",
|
|
259
|
+
"layer",
|
|
260
|
+
"lecture",
|
|
261
|
+
"legal",
|
|
262
|
+
"legislate",
|
|
263
|
+
"legislation",
|
|
264
|
+
"legislative",
|
|
265
|
+
"levy",
|
|
266
|
+
"liberal",
|
|
267
|
+
"license",
|
|
268
|
+
"likewise",
|
|
269
|
+
"link",
|
|
270
|
+
"locate",
|
|
271
|
+
"location",
|
|
272
|
+
"logic",
|
|
273
|
+
"maintain",
|
|
274
|
+
"maintenance",
|
|
275
|
+
"major",
|
|
276
|
+
"majority",
|
|
277
|
+
"manipulate",
|
|
278
|
+
"manual",
|
|
279
|
+
"margin",
|
|
280
|
+
"mature",
|
|
281
|
+
"maturity",
|
|
282
|
+
"maximize",
|
|
283
|
+
"mechanism",
|
|
284
|
+
"media",
|
|
285
|
+
"mediate",
|
|
286
|
+
"medical",
|
|
287
|
+
"medium",
|
|
288
|
+
"mental",
|
|
289
|
+
"method",
|
|
290
|
+
"methodology",
|
|
291
|
+
"migrate",
|
|
292
|
+
"military",
|
|
293
|
+
"minimal",
|
|
294
|
+
"minimize",
|
|
295
|
+
"minimum",
|
|
296
|
+
"ministry",
|
|
297
|
+
"minor",
|
|
298
|
+
"minority",
|
|
299
|
+
"mode",
|
|
300
|
+
"modify",
|
|
301
|
+
"monitor",
|
|
302
|
+
"motive",
|
|
303
|
+
"mutual",
|
|
304
|
+
"negate",
|
|
305
|
+
"network",
|
|
306
|
+
"neutral",
|
|
307
|
+
"nevertheless",
|
|
308
|
+
"nonetheless",
|
|
309
|
+
"normal",
|
|
310
|
+
"normally",
|
|
311
|
+
"notion",
|
|
312
|
+
"notwithstanding",
|
|
313
|
+
"nuclear",
|
|
314
|
+
"objective",
|
|
315
|
+
"obtain",
|
|
316
|
+
"obvious",
|
|
317
|
+
"obviously",
|
|
318
|
+
"occupy",
|
|
319
|
+
"occur",
|
|
320
|
+
"odd",
|
|
321
|
+
"offset",
|
|
322
|
+
"ongoing",
|
|
323
|
+
"option",
|
|
324
|
+
"orient",
|
|
325
|
+
"orientation",
|
|
326
|
+
"origin",
|
|
327
|
+
"original",
|
|
328
|
+
"output",
|
|
329
|
+
"overall",
|
|
330
|
+
"overlap",
|
|
331
|
+
"overseas",
|
|
332
|
+
"panel",
|
|
333
|
+
"paradigm",
|
|
334
|
+
"paragraph",
|
|
335
|
+
"parallel",
|
|
336
|
+
"parameter",
|
|
337
|
+
"participate",
|
|
338
|
+
"participation",
|
|
339
|
+
"particular",
|
|
340
|
+
"partner",
|
|
341
|
+
"passive",
|
|
342
|
+
"perceive",
|
|
343
|
+
"percent",
|
|
344
|
+
"percentage",
|
|
345
|
+
"perception",
|
|
346
|
+
"period",
|
|
347
|
+
"periodic",
|
|
348
|
+
"persist",
|
|
349
|
+
"perspective",
|
|
350
|
+
"phase",
|
|
351
|
+
"phenomena",
|
|
352
|
+
"phenomenon",
|
|
353
|
+
"philosophy",
|
|
354
|
+
"physical",
|
|
355
|
+
"plus",
|
|
356
|
+
"policy",
|
|
357
|
+
"portion",
|
|
358
|
+
"pose",
|
|
359
|
+
"positive",
|
|
360
|
+
"potential",
|
|
361
|
+
"practitioner",
|
|
362
|
+
"precede",
|
|
363
|
+
"preceding",
|
|
364
|
+
"precise",
|
|
365
|
+
"predict",
|
|
366
|
+
"prediction",
|
|
367
|
+
"predominant",
|
|
368
|
+
"preliminary",
|
|
369
|
+
"presume",
|
|
370
|
+
"previous",
|
|
371
|
+
"primarily",
|
|
372
|
+
"primary",
|
|
373
|
+
"prime",
|
|
374
|
+
"principal",
|
|
375
|
+
"principle",
|
|
376
|
+
"prior",
|
|
377
|
+
"priority",
|
|
378
|
+
"proceed",
|
|
379
|
+
"process",
|
|
380
|
+
"professional",
|
|
381
|
+
"prohibit",
|
|
382
|
+
"project",
|
|
383
|
+
"projection",
|
|
384
|
+
"promote",
|
|
385
|
+
"promotion",
|
|
386
|
+
"proportion",
|
|
387
|
+
"prospect",
|
|
388
|
+
"protocol",
|
|
389
|
+
"psychology",
|
|
390
|
+
"publication",
|
|
391
|
+
"publish",
|
|
392
|
+
"purchase",
|
|
393
|
+
"pursue",
|
|
394
|
+
"qualitative",
|
|
395
|
+
"quote",
|
|
396
|
+
"radical",
|
|
397
|
+
"random",
|
|
398
|
+
"range",
|
|
399
|
+
"ratio",
|
|
400
|
+
"rational",
|
|
401
|
+
"react",
|
|
402
|
+
"reaction",
|
|
403
|
+
"recover",
|
|
404
|
+
"refine",
|
|
405
|
+
"reform",
|
|
406
|
+
"regime",
|
|
407
|
+
"region",
|
|
408
|
+
"regional",
|
|
409
|
+
"register",
|
|
410
|
+
"regulate",
|
|
411
|
+
"regulation",
|
|
412
|
+
"reinforce",
|
|
413
|
+
"reject",
|
|
414
|
+
"relax",
|
|
415
|
+
"release",
|
|
416
|
+
"relevant",
|
|
417
|
+
"reluctance",
|
|
418
|
+
"rely",
|
|
419
|
+
"remove",
|
|
420
|
+
"require",
|
|
421
|
+
"requirement",
|
|
422
|
+
"research",
|
|
423
|
+
"researcher",
|
|
424
|
+
"reside",
|
|
425
|
+
"resolve",
|
|
426
|
+
"resource",
|
|
427
|
+
"respond",
|
|
428
|
+
"response",
|
|
429
|
+
"restore",
|
|
430
|
+
"restrain",
|
|
431
|
+
"restrict",
|
|
432
|
+
"restriction",
|
|
433
|
+
"retain",
|
|
434
|
+
"reveal",
|
|
435
|
+
"revenue",
|
|
436
|
+
"reverse",
|
|
437
|
+
"revise",
|
|
438
|
+
"revolution",
|
|
439
|
+
"rigid",
|
|
440
|
+
"role",
|
|
441
|
+
"route",
|
|
442
|
+
"scenario",
|
|
443
|
+
"schedule",
|
|
444
|
+
"scheme",
|
|
445
|
+
"scope",
|
|
446
|
+
"section",
|
|
447
|
+
"sector",
|
|
448
|
+
"secure",
|
|
449
|
+
"security",
|
|
450
|
+
"seek",
|
|
451
|
+
"select",
|
|
452
|
+
"selection",
|
|
453
|
+
"sequence",
|
|
454
|
+
"series",
|
|
455
|
+
"sex",
|
|
456
|
+
"shift",
|
|
457
|
+
"significant",
|
|
458
|
+
"significantly",
|
|
459
|
+
"similar",
|
|
460
|
+
"similarly",
|
|
461
|
+
"simulate",
|
|
462
|
+
"simulation",
|
|
463
|
+
"site",
|
|
464
|
+
"so-called",
|
|
465
|
+
"sole",
|
|
466
|
+
"solely",
|
|
467
|
+
"somewhat",
|
|
468
|
+
"source",
|
|
469
|
+
"specific",
|
|
470
|
+
"specifically",
|
|
471
|
+
"specify",
|
|
472
|
+
"sphere",
|
|
473
|
+
"stable",
|
|
474
|
+
"statistics",
|
|
475
|
+
"status",
|
|
476
|
+
"straightforward",
|
|
477
|
+
"strategy",
|
|
478
|
+
"stress",
|
|
479
|
+
"structural",
|
|
480
|
+
"structure",
|
|
481
|
+
"style",
|
|
482
|
+
"submit",
|
|
483
|
+
"subordinate",
|
|
484
|
+
"subsequent",
|
|
485
|
+
"subsequently",
|
|
486
|
+
"subsidy",
|
|
487
|
+
"substitute",
|
|
488
|
+
"substitute",
|
|
489
|
+
"successor",
|
|
490
|
+
"sufficient",
|
|
491
|
+
"sum",
|
|
492
|
+
"summary",
|
|
493
|
+
"supplement",
|
|
494
|
+
"survey",
|
|
495
|
+
"survive",
|
|
496
|
+
"suspend",
|
|
497
|
+
"sustain",
|
|
498
|
+
"symbol",
|
|
499
|
+
"tape",
|
|
500
|
+
"target",
|
|
501
|
+
"task",
|
|
502
|
+
"team",
|
|
503
|
+
"technical",
|
|
504
|
+
"technique",
|
|
505
|
+
"technology",
|
|
506
|
+
"temporary",
|
|
507
|
+
"tense",
|
|
508
|
+
"terminate",
|
|
509
|
+
"text",
|
|
510
|
+
"theme",
|
|
511
|
+
"theory",
|
|
512
|
+
"thereby",
|
|
513
|
+
"thesis",
|
|
514
|
+
"topic",
|
|
515
|
+
"trace",
|
|
516
|
+
"tradition",
|
|
517
|
+
"traditional",
|
|
518
|
+
"transfer",
|
|
519
|
+
"transform",
|
|
520
|
+
"transformation",
|
|
521
|
+
"transit",
|
|
522
|
+
"transition",
|
|
523
|
+
"transmit",
|
|
524
|
+
"transport",
|
|
525
|
+
"trend",
|
|
526
|
+
"trigger",
|
|
527
|
+
"ultimate",
|
|
528
|
+
"ultimately",
|
|
529
|
+
"undergo",
|
|
530
|
+
"underlie",
|
|
531
|
+
"underlying",
|
|
532
|
+
"undertake",
|
|
533
|
+
"uniform",
|
|
534
|
+
"unify",
|
|
535
|
+
"unique",
|
|
536
|
+
"utilize",
|
|
537
|
+
"valid",
|
|
538
|
+
"validity",
|
|
539
|
+
"vary",
|
|
540
|
+
"variation",
|
|
541
|
+
"vehicle",
|
|
542
|
+
"version",
|
|
543
|
+
"via",
|
|
544
|
+
"violate",
|
|
545
|
+
"virtual",
|
|
546
|
+
"virtually",
|
|
547
|
+
"visible",
|
|
548
|
+
"vision",
|
|
549
|
+
"visual",
|
|
550
|
+
"volume",
|
|
551
|
+
"voluntary",
|
|
552
|
+
"welfare",
|
|
553
|
+
"whereas",
|
|
554
|
+
"whereby",
|
|
555
|
+
"widespread",
|
|
110
556
|
}
|
|
111
557
|
|
|
112
558
|
|
|
@@ -117,224 +563,989 @@ ACADEMIC_WORD_LIST = {
|
|
|
117
563
|
# This is an embedded subset for MVP. Full COCA has 60,000+ words.
|
|
118
564
|
COCA_FREQUENCY_RANKS = {
|
|
119
565
|
# Top 100 - Function words and most common verbs
|
|
120
|
-
"the": 1,
|
|
121
|
-
"
|
|
122
|
-
"
|
|
123
|
-
"
|
|
124
|
-
"
|
|
125
|
-
"
|
|
126
|
-
"
|
|
127
|
-
"
|
|
128
|
-
"
|
|
129
|
-
"
|
|
130
|
-
"
|
|
131
|
-
"
|
|
132
|
-
"
|
|
133
|
-
"
|
|
134
|
-
"
|
|
566
|
+
"the": 1,
|
|
567
|
+
"be": 2,
|
|
568
|
+
"to": 3,
|
|
569
|
+
"of": 4,
|
|
570
|
+
"and": 5,
|
|
571
|
+
"a": 6,
|
|
572
|
+
"in": 7,
|
|
573
|
+
"that": 8,
|
|
574
|
+
"have": 9,
|
|
575
|
+
"i": 10,
|
|
576
|
+
"it": 11,
|
|
577
|
+
"for": 12,
|
|
578
|
+
"not": 13,
|
|
579
|
+
"on": 14,
|
|
580
|
+
"with": 15,
|
|
581
|
+
"he": 16,
|
|
582
|
+
"as": 17,
|
|
583
|
+
"you": 18,
|
|
584
|
+
"do": 19,
|
|
585
|
+
"at": 20,
|
|
586
|
+
"this": 21,
|
|
587
|
+
"but": 22,
|
|
588
|
+
"his": 23,
|
|
589
|
+
"by": 24,
|
|
590
|
+
"from": 25,
|
|
591
|
+
"they": 26,
|
|
592
|
+
"we": 27,
|
|
593
|
+
"say": 28,
|
|
594
|
+
"her": 29,
|
|
595
|
+
"she": 30,
|
|
596
|
+
"or": 31,
|
|
597
|
+
"an": 32,
|
|
598
|
+
"will": 33,
|
|
599
|
+
"my": 34,
|
|
600
|
+
"one": 35,
|
|
601
|
+
"all": 36,
|
|
602
|
+
"would": 37,
|
|
603
|
+
"there": 38,
|
|
604
|
+
"their": 39,
|
|
605
|
+
"what": 40,
|
|
606
|
+
"so": 41,
|
|
607
|
+
"up": 42,
|
|
608
|
+
"out": 43,
|
|
609
|
+
"if": 44,
|
|
610
|
+
"about": 45,
|
|
611
|
+
"who": 46,
|
|
612
|
+
"get": 47,
|
|
613
|
+
"which": 48,
|
|
614
|
+
"go": 49,
|
|
615
|
+
"me": 50,
|
|
616
|
+
"when": 51,
|
|
617
|
+
"make": 52,
|
|
618
|
+
"can": 53,
|
|
619
|
+
"like": 54,
|
|
620
|
+
"time": 55,
|
|
621
|
+
"no": 56,
|
|
622
|
+
"just": 57,
|
|
623
|
+
"him": 58,
|
|
624
|
+
"know": 59,
|
|
625
|
+
"take": 60,
|
|
626
|
+
"people": 61,
|
|
627
|
+
"into": 62,
|
|
628
|
+
"year": 63,
|
|
629
|
+
"your": 64,
|
|
630
|
+
"good": 65,
|
|
631
|
+
"some": 66,
|
|
632
|
+
"could": 67,
|
|
633
|
+
"them": 68,
|
|
634
|
+
"see": 69,
|
|
635
|
+
"other": 70,
|
|
636
|
+
"than": 71,
|
|
637
|
+
"then": 72,
|
|
638
|
+
"now": 73,
|
|
639
|
+
"look": 74,
|
|
640
|
+
"only": 75,
|
|
641
|
+
"come": 76,
|
|
642
|
+
"its": 77,
|
|
643
|
+
"over": 78,
|
|
644
|
+
"think": 79,
|
|
645
|
+
"also": 80,
|
|
646
|
+
"back": 81,
|
|
647
|
+
"after": 82,
|
|
648
|
+
"use": 83,
|
|
649
|
+
"two": 84,
|
|
650
|
+
"how": 85,
|
|
651
|
+
"our": 86,
|
|
652
|
+
"work": 87,
|
|
653
|
+
"first": 88,
|
|
654
|
+
"well": 89,
|
|
655
|
+
"way": 90,
|
|
656
|
+
"even": 91,
|
|
657
|
+
"new": 92,
|
|
658
|
+
"want": 93,
|
|
659
|
+
"because": 94,
|
|
660
|
+
"any": 95,
|
|
661
|
+
"these": 96,
|
|
662
|
+
"give": 97,
|
|
663
|
+
"day": 98,
|
|
664
|
+
"most": 99,
|
|
135
665
|
"us": 100,
|
|
136
666
|
# 101-500 - Common words
|
|
137
|
-
"is": 101,
|
|
138
|
-
"
|
|
139
|
-
"
|
|
140
|
-
"
|
|
141
|
-
"
|
|
142
|
-
"
|
|
143
|
-
"
|
|
144
|
-
"
|
|
145
|
-
"
|
|
146
|
-
"
|
|
147
|
-
"
|
|
148
|
-
"
|
|
149
|
-
"
|
|
150
|
-
"
|
|
151
|
-
"
|
|
152
|
-
"
|
|
153
|
-
"
|
|
154
|
-
"
|
|
155
|
-
"
|
|
156
|
-
"
|
|
157
|
-
"
|
|
158
|
-
"
|
|
159
|
-
"
|
|
160
|
-
"
|
|
161
|
-
"
|
|
162
|
-
"
|
|
163
|
-
"
|
|
164
|
-
"
|
|
165
|
-
"
|
|
166
|
-
"
|
|
167
|
-
"
|
|
168
|
-
"
|
|
169
|
-
"
|
|
170
|
-
"
|
|
171
|
-
"
|
|
172
|
-
"
|
|
173
|
-
"
|
|
174
|
-
"
|
|
175
|
-
"
|
|
176
|
-
"
|
|
177
|
-
"
|
|
178
|
-
"
|
|
179
|
-
"
|
|
180
|
-
"
|
|
181
|
-
"
|
|
182
|
-
"
|
|
183
|
-
"
|
|
184
|
-
"
|
|
185
|
-
"
|
|
186
|
-
"
|
|
187
|
-
"
|
|
188
|
-
"
|
|
189
|
-
"
|
|
190
|
-
"
|
|
191
|
-
"
|
|
192
|
-
"
|
|
193
|
-
"
|
|
194
|
-
"
|
|
195
|
-
"
|
|
196
|
-
"
|
|
197
|
-
"
|
|
198
|
-
"
|
|
199
|
-
"
|
|
200
|
-
"
|
|
201
|
-
"
|
|
202
|
-
"
|
|
203
|
-
"
|
|
667
|
+
"is": 101,
|
|
668
|
+
"was": 102,
|
|
669
|
+
"are": 103,
|
|
670
|
+
"been": 104,
|
|
671
|
+
"has": 105,
|
|
672
|
+
"had": 106,
|
|
673
|
+
"were": 107,
|
|
674
|
+
"said": 108,
|
|
675
|
+
"did": 109,
|
|
676
|
+
"having": 110,
|
|
677
|
+
"may": 111,
|
|
678
|
+
"should": 112,
|
|
679
|
+
"each": 113,
|
|
680
|
+
"such": 114,
|
|
681
|
+
"through": 115,
|
|
682
|
+
"where": 116,
|
|
683
|
+
"much": 117,
|
|
684
|
+
"before": 118,
|
|
685
|
+
"right": 119,
|
|
686
|
+
"too": 120,
|
|
687
|
+
"means": 121,
|
|
688
|
+
"old": 122,
|
|
689
|
+
"same": 124,
|
|
690
|
+
"tell": 125,
|
|
691
|
+
"boy": 126,
|
|
692
|
+
"follow": 127,
|
|
693
|
+
"came": 128,
|
|
694
|
+
"show": 129,
|
|
695
|
+
"every": 130,
|
|
696
|
+
"under": 135,
|
|
697
|
+
"name": 136,
|
|
698
|
+
"very": 137,
|
|
699
|
+
"form": 140,
|
|
700
|
+
"great": 141,
|
|
701
|
+
"help": 144,
|
|
702
|
+
"low": 145,
|
|
703
|
+
"line": 146,
|
|
704
|
+
"turn": 148,
|
|
705
|
+
"cause": 149,
|
|
706
|
+
"mean": 151,
|
|
707
|
+
"differ": 152,
|
|
708
|
+
"move": 153,
|
|
709
|
+
"does": 158,
|
|
710
|
+
"sentence": 160,
|
|
711
|
+
"set": 161,
|
|
712
|
+
"three": 162,
|
|
713
|
+
"air": 164,
|
|
714
|
+
"play": 167,
|
|
715
|
+
"small": 168,
|
|
716
|
+
"end": 169,
|
|
717
|
+
"put": 170,
|
|
718
|
+
"home": 171,
|
|
719
|
+
"read": 172,
|
|
720
|
+
"hand": 173,
|
|
721
|
+
"port": 174,
|
|
722
|
+
"large": 175,
|
|
723
|
+
"spell": 176,
|
|
724
|
+
"add": 177,
|
|
725
|
+
"land": 179,
|
|
726
|
+
"here": 180,
|
|
727
|
+
"must": 181,
|
|
728
|
+
"big": 182,
|
|
729
|
+
"high": 183,
|
|
730
|
+
"act": 186,
|
|
731
|
+
"why": 187,
|
|
732
|
+
"ask": 188,
|
|
733
|
+
"men": 189,
|
|
734
|
+
"change": 190,
|
|
735
|
+
"went": 191,
|
|
736
|
+
"light": 192,
|
|
737
|
+
"kind": 193,
|
|
738
|
+
"off": 194,
|
|
739
|
+
"need": 195,
|
|
740
|
+
"house": 196,
|
|
741
|
+
"picture": 197,
|
|
742
|
+
"try": 198,
|
|
743
|
+
"again": 200,
|
|
744
|
+
"animal": 201,
|
|
745
|
+
"point": 202,
|
|
746
|
+
"mother": 203,
|
|
747
|
+
"world": 204,
|
|
748
|
+
"near": 205,
|
|
749
|
+
"build": 206,
|
|
750
|
+
"self": 207,
|
|
751
|
+
"earth": 208,
|
|
752
|
+
"father": 209,
|
|
753
|
+
"head": 210,
|
|
754
|
+
"stand": 211,
|
|
755
|
+
"own": 212,
|
|
756
|
+
"page": 213,
|
|
757
|
+
"country": 215,
|
|
758
|
+
"found": 216,
|
|
759
|
+
"answer": 217,
|
|
760
|
+
"school": 218,
|
|
761
|
+
"grow": 219,
|
|
762
|
+
"study": 220,
|
|
763
|
+
"still": 221,
|
|
764
|
+
"learn": 222,
|
|
765
|
+
"plant": 223,
|
|
766
|
+
"cover": 224,
|
|
767
|
+
"food": 225,
|
|
768
|
+
"sun": 226,
|
|
769
|
+
"four": 227,
|
|
770
|
+
"thought": 228,
|
|
771
|
+
"let": 229,
|
|
772
|
+
"keep": 230,
|
|
773
|
+
"eye": 231,
|
|
774
|
+
"never": 232,
|
|
775
|
+
"last": 233,
|
|
776
|
+
"door": 234,
|
|
777
|
+
"between": 235,
|
|
778
|
+
"city": 236,
|
|
779
|
+
"tree": 237,
|
|
780
|
+
"cross": 238,
|
|
781
|
+
"since": 239,
|
|
782
|
+
"hard": 240,
|
|
783
|
+
"start": 241,
|
|
784
|
+
"might": 242,
|
|
785
|
+
"story": 243,
|
|
786
|
+
"saw": 244,
|
|
787
|
+
"far": 245,
|
|
788
|
+
"sea": 246,
|
|
789
|
+
"draw": 247,
|
|
790
|
+
"left": 248,
|
|
791
|
+
"late": 249,
|
|
792
|
+
"run": 250,
|
|
793
|
+
"while": 251,
|
|
794
|
+
"press": 252,
|
|
795
|
+
"close": 253,
|
|
796
|
+
"night": 254,
|
|
797
|
+
"real": 255,
|
|
798
|
+
"life": 256,
|
|
799
|
+
"few": 257,
|
|
800
|
+
"stop": 258,
|
|
801
|
+
"open": 259,
|
|
802
|
+
"seem": 260,
|
|
803
|
+
"together": 261,
|
|
804
|
+
"next": 262,
|
|
805
|
+
"white": 263,
|
|
806
|
+
"children": 264,
|
|
807
|
+
"begin": 265,
|
|
808
|
+
"got": 266,
|
|
809
|
+
"walk": 267,
|
|
810
|
+
"example": 268,
|
|
811
|
+
"ease": 269,
|
|
812
|
+
"paper": 270,
|
|
813
|
+
"often": 271,
|
|
814
|
+
"always": 272,
|
|
815
|
+
"music": 273,
|
|
816
|
+
"those": 274,
|
|
817
|
+
"both": 275,
|
|
818
|
+
"mark": 276,
|
|
819
|
+
"book": 277,
|
|
820
|
+
"letter": 278,
|
|
821
|
+
"until": 279,
|
|
822
|
+
"mile": 280,
|
|
823
|
+
"river": 281,
|
|
824
|
+
"car": 282,
|
|
825
|
+
"feet": 283,
|
|
826
|
+
"care": 284,
|
|
827
|
+
"second": 285,
|
|
828
|
+
"group": 286,
|
|
829
|
+
"carry": 287,
|
|
830
|
+
"took": 288,
|
|
831
|
+
"rain": 289,
|
|
832
|
+
"eat": 290,
|
|
833
|
+
"room": 291,
|
|
834
|
+
"friend": 292,
|
|
835
|
+
"began": 293,
|
|
836
|
+
"idea": 294,
|
|
837
|
+
"fish": 295,
|
|
838
|
+
"mountain": 296,
|
|
839
|
+
"north": 297,
|
|
840
|
+
"once": 298,
|
|
841
|
+
"base": 299,
|
|
842
|
+
"hear": 300,
|
|
843
|
+
"horse": 301,
|
|
844
|
+
"cut": 302,
|
|
845
|
+
"sure": 303,
|
|
846
|
+
"watch": 304,
|
|
847
|
+
"color": 305,
|
|
848
|
+
"face": 306,
|
|
849
|
+
"wood": 307,
|
|
850
|
+
"main": 308,
|
|
851
|
+
"enough": 309,
|
|
852
|
+
"plain": 310,
|
|
853
|
+
"girl": 311,
|
|
854
|
+
"usual": 312,
|
|
855
|
+
"young": 313,
|
|
856
|
+
"ready": 314,
|
|
857
|
+
"above": 315,
|
|
858
|
+
"ever": 316,
|
|
859
|
+
"red": 317,
|
|
860
|
+
"list": 318,
|
|
861
|
+
"though": 319,
|
|
862
|
+
"feel": 320,
|
|
863
|
+
"talk": 321,
|
|
864
|
+
"bird": 322,
|
|
865
|
+
"soon": 323,
|
|
866
|
+
"body": 324,
|
|
867
|
+
"dog": 325,
|
|
868
|
+
"family": 326,
|
|
869
|
+
"direct": 327,
|
|
870
|
+
"pose": 328,
|
|
871
|
+
"leave": 329,
|
|
872
|
+
"song": 330,
|
|
873
|
+
"measure": 331,
|
|
874
|
+
"state": 332,
|
|
875
|
+
"product": 333,
|
|
876
|
+
"black": 334,
|
|
877
|
+
"short": 335,
|
|
878
|
+
"numeral": 336,
|
|
879
|
+
"class": 337,
|
|
880
|
+
"wind": 338,
|
|
881
|
+
"question": 339,
|
|
882
|
+
"happen": 340,
|
|
883
|
+
"complete": 341,
|
|
884
|
+
"ship": 342,
|
|
885
|
+
"area": 343,
|
|
886
|
+
"half": 344,
|
|
887
|
+
"rock": 345,
|
|
888
|
+
"order": 346,
|
|
889
|
+
"fire": 347,
|
|
890
|
+
"south": 348,
|
|
891
|
+
"problem": 349,
|
|
892
|
+
"piece": 350,
|
|
893
|
+
"told": 351,
|
|
894
|
+
"knew": 352,
|
|
895
|
+
"pass": 353,
|
|
896
|
+
"farm": 354,
|
|
897
|
+
"top": 355,
|
|
898
|
+
"whole": 356,
|
|
899
|
+
"king": 357,
|
|
900
|
+
"size": 358,
|
|
901
|
+
"heard": 359,
|
|
902
|
+
"best": 360,
|
|
903
|
+
"hour": 361,
|
|
904
|
+
"better": 362,
|
|
905
|
+
"true": 363,
|
|
906
|
+
"during": 364,
|
|
907
|
+
"hundred": 365,
|
|
908
|
+
"am": 366,
|
|
909
|
+
"remember": 367,
|
|
910
|
+
"step": 368,
|
|
911
|
+
"early": 369,
|
|
912
|
+
"hold": 370,
|
|
913
|
+
"west": 371,
|
|
914
|
+
"ground": 372,
|
|
915
|
+
"interest": 373,
|
|
916
|
+
"reach": 374,
|
|
917
|
+
"fast": 375,
|
|
918
|
+
"five": 376,
|
|
919
|
+
"sing": 377,
|
|
920
|
+
"listen": 378,
|
|
921
|
+
"six": 379,
|
|
922
|
+
"table": 380,
|
|
923
|
+
"travel": 381,
|
|
924
|
+
"less": 382,
|
|
925
|
+
"morning": 383,
|
|
926
|
+
"ten": 384,
|
|
927
|
+
"simple": 385,
|
|
928
|
+
"several": 386,
|
|
929
|
+
"vowel": 387,
|
|
930
|
+
"toward": 388,
|
|
931
|
+
"war": 389,
|
|
932
|
+
"lay": 390,
|
|
933
|
+
"against": 391,
|
|
934
|
+
"pattern": 392,
|
|
935
|
+
"slow": 393,
|
|
936
|
+
"center": 394,
|
|
937
|
+
"love": 395,
|
|
938
|
+
"person": 396,
|
|
939
|
+
"money": 397,
|
|
940
|
+
"serve": 398,
|
|
941
|
+
"appear": 399,
|
|
942
|
+
"road": 400,
|
|
943
|
+
"map": 401,
|
|
944
|
+
"science": 402,
|
|
945
|
+
"rule": 403,
|
|
946
|
+
"govern": 404,
|
|
947
|
+
"pull": 405,
|
|
948
|
+
"cold": 406,
|
|
949
|
+
"notice": 407,
|
|
950
|
+
"voice": 408,
|
|
951
|
+
"fall": 409,
|
|
952
|
+
"power": 410,
|
|
953
|
+
"town": 411,
|
|
954
|
+
"fine": 412,
|
|
955
|
+
"certain": 413,
|
|
956
|
+
"fly": 414,
|
|
957
|
+
"unit": 415,
|
|
958
|
+
"lead": 416,
|
|
959
|
+
"cry": 417,
|
|
960
|
+
"dark": 418,
|
|
961
|
+
"machine": 419,
|
|
962
|
+
"note": 420,
|
|
963
|
+
"wait": 421,
|
|
964
|
+
"plan": 422,
|
|
965
|
+
"figure": 423,
|
|
966
|
+
"star": 424,
|
|
967
|
+
"box": 425,
|
|
968
|
+
"noun": 426,
|
|
969
|
+
"field": 427,
|
|
970
|
+
"rest": 428,
|
|
971
|
+
"correct": 429,
|
|
972
|
+
"able": 430,
|
|
973
|
+
"pound": 431,
|
|
974
|
+
"done": 432,
|
|
975
|
+
"beauty": 433,
|
|
976
|
+
"drive": 434,
|
|
977
|
+
"stood": 435,
|
|
978
|
+
"contain": 436,
|
|
979
|
+
"front": 437,
|
|
980
|
+
"teach": 438,
|
|
981
|
+
"week": 439,
|
|
982
|
+
"final": 440,
|
|
983
|
+
"gave": 441,
|
|
984
|
+
"green": 442,
|
|
985
|
+
"oh": 443,
|
|
986
|
+
"quick": 444,
|
|
987
|
+
"develop": 445,
|
|
988
|
+
"sleep": 446,
|
|
989
|
+
"warm": 447,
|
|
990
|
+
"free": 448,
|
|
991
|
+
"minute": 449,
|
|
992
|
+
"strong": 450,
|
|
993
|
+
"special": 451,
|
|
994
|
+
"mind": 452,
|
|
995
|
+
"behind": 453,
|
|
996
|
+
"clear": 454,
|
|
997
|
+
"tail": 455,
|
|
998
|
+
"produce": 456,
|
|
999
|
+
"fact": 457,
|
|
1000
|
+
"street": 458,
|
|
1001
|
+
"inch": 459,
|
|
1002
|
+
"lot": 460,
|
|
1003
|
+
"nothing": 461,
|
|
1004
|
+
"course": 462,
|
|
1005
|
+
"stay": 463,
|
|
1006
|
+
"wheel": 464,
|
|
1007
|
+
"full": 465,
|
|
1008
|
+
"force": 466,
|
|
1009
|
+
"blue": 467,
|
|
1010
|
+
"object": 468,
|
|
1011
|
+
"decide": 469,
|
|
1012
|
+
"surface": 470,
|
|
1013
|
+
"deep": 471,
|
|
1014
|
+
"moon": 472,
|
|
1015
|
+
"island": 473,
|
|
1016
|
+
"foot": 474,
|
|
1017
|
+
"yet": 475,
|
|
1018
|
+
"busy": 476,
|
|
1019
|
+
"test": 477,
|
|
1020
|
+
"record": 478,
|
|
1021
|
+
"boat": 479,
|
|
1022
|
+
"common": 480,
|
|
1023
|
+
"gold": 481,
|
|
1024
|
+
"possible": 482,
|
|
1025
|
+
"plane": 483,
|
|
1026
|
+
"age": 484,
|
|
1027
|
+
"dry": 485,
|
|
1028
|
+
"wonder": 486,
|
|
1029
|
+
"laugh": 487,
|
|
1030
|
+
"thousand": 488,
|
|
1031
|
+
"ago": 489,
|
|
1032
|
+
"ran": 490,
|
|
1033
|
+
"check": 491,
|
|
1034
|
+
"game": 492,
|
|
1035
|
+
"shape": 493,
|
|
1036
|
+
"yes": 494,
|
|
1037
|
+
"hot": 495,
|
|
1038
|
+
"miss": 496,
|
|
1039
|
+
"brought": 497,
|
|
1040
|
+
"heat": 498,
|
|
1041
|
+
"snow": 499,
|
|
1042
|
+
"bed": 500,
|
|
204
1043
|
# 501-1000 - Common vocabulary
|
|
205
|
-
"bring": 501,
|
|
206
|
-
"
|
|
207
|
-
"
|
|
208
|
-
"
|
|
209
|
-
"
|
|
210
|
-
"
|
|
211
|
-
"
|
|
212
|
-
"
|
|
213
|
-
"
|
|
214
|
-
"
|
|
215
|
-
"
|
|
216
|
-
"
|
|
217
|
-
"
|
|
218
|
-
"
|
|
219
|
-
"
|
|
220
|
-
"
|
|
221
|
-
"
|
|
222
|
-
"
|
|
223
|
-
"
|
|
224
|
-
"
|
|
225
|
-
"
|
|
226
|
-
"
|
|
227
|
-
"
|
|
228
|
-
"
|
|
229
|
-
"
|
|
230
|
-
"
|
|
231
|
-
"
|
|
232
|
-
"
|
|
233
|
-
"
|
|
234
|
-
"
|
|
235
|
-
"
|
|
236
|
-
"
|
|
237
|
-
"
|
|
238
|
-
"
|
|
239
|
-
"
|
|
240
|
-
"
|
|
241
|
-
"
|
|
242
|
-
"
|
|
243
|
-
"
|
|
244
|
-
"
|
|
245
|
-
"
|
|
246
|
-
"
|
|
247
|
-
"
|
|
248
|
-
"
|
|
249
|
-
"
|
|
250
|
-
"
|
|
251
|
-
"
|
|
252
|
-
"
|
|
253
|
-
"
|
|
254
|
-
"
|
|
255
|
-
"
|
|
256
|
-
"
|
|
257
|
-
"
|
|
258
|
-
"
|
|
259
|
-
"
|
|
260
|
-
"
|
|
261
|
-
"
|
|
262
|
-
"
|
|
263
|
-
"
|
|
264
|
-
"
|
|
265
|
-
"
|
|
266
|
-
"
|
|
267
|
-
"
|
|
268
|
-
"
|
|
269
|
-
"
|
|
270
|
-
"
|
|
271
|
-
"
|
|
272
|
-
"
|
|
273
|
-
"
|
|
274
|
-
"
|
|
275
|
-
"
|
|
276
|
-
"
|
|
277
|
-
"
|
|
278
|
-
"
|
|
279
|
-
"
|
|
280
|
-
"
|
|
281
|
-
"
|
|
282
|
-
"
|
|
283
|
-
"
|
|
284
|
-
"
|
|
285
|
-
"
|
|
286
|
-
"
|
|
287
|
-
"
|
|
288
|
-
"
|
|
289
|
-
"
|
|
290
|
-
"
|
|
291
|
-
|
|
292
|
-
"
|
|
293
|
-
"
|
|
294
|
-
"
|
|
295
|
-
"
|
|
296
|
-
"
|
|
297
|
-
"
|
|
298
|
-
"
|
|
299
|
-
"
|
|
300
|
-
"
|
|
301
|
-
"
|
|
302
|
-
"
|
|
303
|
-
"
|
|
304
|
-
"
|
|
305
|
-
"
|
|
306
|
-
"
|
|
307
|
-
"
|
|
308
|
-
"
|
|
309
|
-
"
|
|
310
|
-
"
|
|
311
|
-
"
|
|
1044
|
+
"bring": 501,
|
|
1045
|
+
"sit": 502,
|
|
1046
|
+
"perhaps": 503,
|
|
1047
|
+
"fill": 504,
|
|
1048
|
+
"east": 505,
|
|
1049
|
+
"weight": 506,
|
|
1050
|
+
"language": 507,
|
|
1051
|
+
"among": 508,
|
|
1052
|
+
"cat": 509,
|
|
1053
|
+
"ball": 510,
|
|
1054
|
+
"human": 511,
|
|
1055
|
+
"doctor": 513,
|
|
1056
|
+
"office": 515,
|
|
1057
|
+
"break": 516,
|
|
1058
|
+
"die": 517,
|
|
1059
|
+
"radio": 518,
|
|
1060
|
+
"speak": 519,
|
|
1061
|
+
"atom": 520,
|
|
1062
|
+
"blood": 521,
|
|
1063
|
+
"felt": 522,
|
|
1064
|
+
"type": 523,
|
|
1065
|
+
"forward": 524,
|
|
1066
|
+
"century": 525,
|
|
1067
|
+
"milk": 526,
|
|
1068
|
+
"corner": 527,
|
|
1069
|
+
"speed": 528,
|
|
1070
|
+
"method": 529,
|
|
1071
|
+
"organ": 530,
|
|
1072
|
+
"pay": 531,
|
|
1073
|
+
"single": 532,
|
|
1074
|
+
"touch": 533,
|
|
1075
|
+
"control": 534,
|
|
1076
|
+
"bottom": 535,
|
|
1077
|
+
"design": 536,
|
|
1078
|
+
"coat": 537,
|
|
1079
|
+
"else": 538,
|
|
1080
|
+
"quite": 539,
|
|
1081
|
+
"broke": 540,
|
|
1082
|
+
"case": 541,
|
|
1083
|
+
"middle": 542,
|
|
1084
|
+
"kill": 543,
|
|
1085
|
+
"son": 544,
|
|
1086
|
+
"lake": 545,
|
|
1087
|
+
"moment": 546,
|
|
1088
|
+
"scale": 547,
|
|
1089
|
+
"loud": 548,
|
|
1090
|
+
"spring": 549,
|
|
1091
|
+
"observe": 550,
|
|
1092
|
+
"child": 551,
|
|
1093
|
+
"straight": 552,
|
|
1094
|
+
"consonant": 553,
|
|
1095
|
+
"nation": 554,
|
|
1096
|
+
"dictionary": 555,
|
|
1097
|
+
"bit": 556,
|
|
1098
|
+
"coast": 557,
|
|
1099
|
+
"copy": 558,
|
|
1100
|
+
"phrase": 559,
|
|
1101
|
+
"silent": 560,
|
|
1102
|
+
"tall": 561,
|
|
1103
|
+
"sand": 562,
|
|
1104
|
+
"soil": 563,
|
|
1105
|
+
"roll": 564,
|
|
1106
|
+
"temperature": 565,
|
|
1107
|
+
"finger": 566,
|
|
1108
|
+
"industry": 567,
|
|
1109
|
+
"value": 568,
|
|
1110
|
+
"fight": 569,
|
|
1111
|
+
"lie": 570,
|
|
1112
|
+
"beat": 571,
|
|
1113
|
+
"excite": 572,
|
|
1114
|
+
"natural": 573,
|
|
1115
|
+
"view": 574,
|
|
1116
|
+
"sense": 575,
|
|
1117
|
+
"capital": 576,
|
|
1118
|
+
"chair": 578,
|
|
1119
|
+
"danger": 579,
|
|
1120
|
+
"fruit": 580,
|
|
1121
|
+
"rich": 581,
|
|
1122
|
+
"thick": 582,
|
|
1123
|
+
"soldier": 583,
|
|
1124
|
+
"process": 584,
|
|
1125
|
+
"operate": 585,
|
|
1126
|
+
"practice": 586,
|
|
1127
|
+
"separate": 587,
|
|
1128
|
+
"difficult": 588,
|
|
1129
|
+
"visit": 589,
|
|
1130
|
+
"spread": 590,
|
|
1131
|
+
"particular": 591,
|
|
1132
|
+
"catch": 592,
|
|
1133
|
+
"square": 593,
|
|
1134
|
+
"reason": 594,
|
|
1135
|
+
"length": 595,
|
|
1136
|
+
"represent": 596,
|
|
1137
|
+
"art": 597,
|
|
1138
|
+
"subject": 598,
|
|
1139
|
+
"region": 599,
|
|
1140
|
+
"vary": 601,
|
|
1141
|
+
"settle": 602,
|
|
1142
|
+
"general": 605,
|
|
1143
|
+
"ice": 606,
|
|
1144
|
+
"matter": 607,
|
|
1145
|
+
"circle": 608,
|
|
1146
|
+
"pair": 609,
|
|
1147
|
+
"include": 610,
|
|
1148
|
+
"divide": 611,
|
|
1149
|
+
"syllable": 612,
|
|
1150
|
+
"grand": 614,
|
|
1151
|
+
"wave": 617,
|
|
1152
|
+
"drop": 618,
|
|
1153
|
+
"heart": 619,
|
|
1154
|
+
"present": 620,
|
|
1155
|
+
"heavy": 621,
|
|
1156
|
+
"dance": 622,
|
|
1157
|
+
"engine": 623,
|
|
1158
|
+
"position": 624,
|
|
1159
|
+
"arm": 625,
|
|
1160
|
+
"wide": 626,
|
|
1161
|
+
"sail": 627,
|
|
1162
|
+
"material": 628,
|
|
1163
|
+
"fraction": 629,
|
|
1164
|
+
"forest": 630,
|
|
1165
|
+
"race": 632,
|
|
1166
|
+
"window": 633,
|
|
1167
|
+
"store": 634,
|
|
1168
|
+
"summer": 635,
|
|
1169
|
+
"train": 636,
|
|
1170
|
+
"prove": 638,
|
|
1171
|
+
"lone": 639,
|
|
1172
|
+
"leg": 640,
|
|
1173
|
+
"exercise": 641,
|
|
1174
|
+
"wall": 642,
|
|
1175
|
+
"mount": 644,
|
|
1176
|
+
"wish": 645,
|
|
1177
|
+
"sky": 646,
|
|
1178
|
+
"board": 647,
|
|
1179
|
+
"joy": 648,
|
|
1180
|
+
"winter": 649,
|
|
1181
|
+
"sat": 650,
|
|
1182
|
+
"written": 651,
|
|
1183
|
+
"wild": 652,
|
|
1184
|
+
"instrument": 653,
|
|
1185
|
+
"kept": 654,
|
|
1186
|
+
"glass": 655,
|
|
1187
|
+
"grass": 656,
|
|
1188
|
+
"cow": 657,
|
|
1189
|
+
"job": 658,
|
|
1190
|
+
"edge": 659,
|
|
1191
|
+
"sign": 660,
|
|
1192
|
+
"past": 662,
|
|
1193
|
+
"soft": 663,
|
|
1194
|
+
"fun": 664,
|
|
1195
|
+
"bright": 665,
|
|
1196
|
+
"gas": 666,
|
|
1197
|
+
"weather": 667,
|
|
1198
|
+
"month": 668,
|
|
1199
|
+
"million": 669,
|
|
1200
|
+
"bear": 670,
|
|
1201
|
+
"finish": 671,
|
|
1202
|
+
"happy": 672,
|
|
1203
|
+
"hope": 673,
|
|
1204
|
+
"flower": 674,
|
|
1205
|
+
"clothe": 675,
|
|
1206
|
+
"strange": 676,
|
|
1207
|
+
"gone": 677,
|
|
1208
|
+
"trade": 678,
|
|
1209
|
+
"melody": 679,
|
|
1210
|
+
"trip": 680,
|
|
1211
|
+
"receive": 682,
|
|
1212
|
+
"row": 683,
|
|
1213
|
+
"mouth": 684,
|
|
1214
|
+
"exact": 685,
|
|
1215
|
+
"symbol": 686,
|
|
1216
|
+
"least": 688,
|
|
1217
|
+
"trouble": 689,
|
|
1218
|
+
"shout": 690,
|
|
1219
|
+
"except": 691,
|
|
1220
|
+
"wrote": 692,
|
|
1221
|
+
"seed": 693,
|
|
1222
|
+
"tone": 694,
|
|
1223
|
+
"join": 695,
|
|
1224
|
+
"suggest": 696,
|
|
1225
|
+
"clean": 697,
|
|
1226
|
+
"lady": 699,
|
|
1227
|
+
"yard": 700,
|
|
1228
|
+
"rise": 701,
|
|
1229
|
+
"bad": 702,
|
|
1230
|
+
"blow": 703,
|
|
1231
|
+
"oil": 704,
|
|
1232
|
+
"grew": 707,
|
|
1233
|
+
"cent": 708,
|
|
1234
|
+
"mix": 709,
|
|
1235
|
+
"team": 710,
|
|
1236
|
+
"wire": 711,
|
|
1237
|
+
"cost": 712,
|
|
1238
|
+
"lost": 713,
|
|
1239
|
+
"brown": 714,
|
|
1240
|
+
"wear": 715,
|
|
1241
|
+
"garden": 716,
|
|
1242
|
+
"equal": 717,
|
|
1243
|
+
"sent": 718,
|
|
1244
|
+
"choose": 719,
|
|
1245
|
+
"fell": 720,
|
|
1246
|
+
"fit": 721,
|
|
1247
|
+
"flow": 722,
|
|
1248
|
+
"fair": 723,
|
|
1249
|
+
"bank": 724,
|
|
1250
|
+
"collect": 725,
|
|
1251
|
+
"save": 726,
|
|
1252
|
+
"decimal": 728,
|
|
1253
|
+
"ear": 729,
|
|
1254
|
+
"paragraph": 748,
|
|
1255
|
+
"parent": 749,
|
|
1256
|
+
"shore": 750,
|
|
1257
|
+
"division": 751,
|
|
1258
|
+
"sheet": 752,
|
|
1259
|
+
"substance": 753,
|
|
1260
|
+
"favor": 754,
|
|
1261
|
+
"connect": 755,
|
|
1262
|
+
"post": 756,
|
|
1263
|
+
"spend": 757,
|
|
1264
|
+
"chord": 758,
|
|
1265
|
+
"fat": 759,
|
|
1266
|
+
"glad": 760,
|
|
1267
|
+
"original": 761,
|
|
1268
|
+
"share": 762,
|
|
1269
|
+
"station": 763,
|
|
1270
|
+
"dad": 764,
|
|
1271
|
+
"bread": 765,
|
|
1272
|
+
"charge": 766,
|
|
1273
|
+
"proper": 767,
|
|
1274
|
+
"bar": 768,
|
|
1275
|
+
"offer": 769,
|
|
1276
|
+
"segment": 770,
|
|
1277
|
+
"slave": 771,
|
|
1278
|
+
"duck": 772,
|
|
1279
|
+
"instant": 773,
|
|
1280
|
+
"market": 774,
|
|
1281
|
+
"degree": 775,
|
|
1282
|
+
"populate": 776,
|
|
1283
|
+
"chick": 777,
|
|
1284
|
+
"dear": 778,
|
|
1285
|
+
"enemy": 779,
|
|
1286
|
+
"reply": 780,
|
|
1287
|
+
"drink": 781,
|
|
1288
|
+
"occur": 782,
|
|
1289
|
+
"support": 783,
|
|
1290
|
+
"speech": 784,
|
|
1291
|
+
"nature": 785,
|
|
1292
|
+
"range": 786,
|
|
1293
|
+
"steam": 787,
|
|
1294
|
+
"motion": 788,
|
|
1295
|
+
"path": 789,
|
|
1296
|
+
"liquid": 790,
|
|
1297
|
+
"log": 791,
|
|
1298
|
+
"meant": 792,
|
|
1299
|
+
"quotient": 793,
|
|
1300
|
+
"teeth": 794,
|
|
1301
|
+
"shell": 795,
|
|
1302
|
+
"neck": 796,
|
|
1303
|
+
"oxygen": 797,
|
|
1304
|
+
"sugar": 798,
|
|
1305
|
+
"death": 799,
|
|
1306
|
+
"pretty": 800,
|
|
1307
|
+
"skill": 801,
|
|
1308
|
+
"women": 802,
|
|
1309
|
+
"season": 803,
|
|
1310
|
+
"solution": 804,
|
|
1311
|
+
"magnet": 805,
|
|
1312
|
+
"silver": 806,
|
|
1313
|
+
"thank": 807,
|
|
1314
|
+
"branch": 808,
|
|
1315
|
+
"match": 809,
|
|
1316
|
+
"suffix": 810,
|
|
1317
|
+
"especially": 811,
|
|
1318
|
+
"fig": 812,
|
|
1319
|
+
"afraid": 813,
|
|
1320
|
+
"huge": 814,
|
|
1321
|
+
"sister": 815,
|
|
1322
|
+
"steel": 816,
|
|
1323
|
+
"discuss": 817,
|
|
1324
|
+
"similar": 819,
|
|
1325
|
+
"guide": 820,
|
|
1326
|
+
"experience": 821,
|
|
1327
|
+
"score": 822,
|
|
1328
|
+
"apple": 823,
|
|
1329
|
+
"bought": 824,
|
|
1330
|
+
"led": 825,
|
|
1331
|
+
"pitch": 826,
|
|
1332
|
+
"mass": 828,
|
|
1333
|
+
"card": 829,
|
|
1334
|
+
"band": 830,
|
|
1335
|
+
"rope": 831,
|
|
1336
|
+
"slip": 832,
|
|
1337
|
+
"win": 833,
|
|
1338
|
+
"dream": 834,
|
|
1339
|
+
"evening": 835,
|
|
1340
|
+
"condition": 836,
|
|
1341
|
+
"feed": 837,
|
|
1342
|
+
"tool": 838,
|
|
1343
|
+
"total": 839,
|
|
1344
|
+
"basic": 840,
|
|
1345
|
+
"smell": 841,
|
|
1346
|
+
"valley": 842,
|
|
1347
|
+
"nor": 843,
|
|
1348
|
+
"double": 844,
|
|
1349
|
+
"seat": 845,
|
|
1350
|
+
"continue": 846,
|
|
1351
|
+
"block": 847,
|
|
1352
|
+
"chart": 848,
|
|
1353
|
+
"hat": 849,
|
|
1354
|
+
"sell": 850,
|
|
1355
|
+
"success": 851,
|
|
1356
|
+
"company": 852,
|
|
1357
|
+
"subtract": 853,
|
|
1358
|
+
"event": 854,
|
|
1359
|
+
"deal": 856,
|
|
1360
|
+
"swim": 857,
|
|
1361
|
+
"term": 858,
|
|
1362
|
+
"opposite": 859,
|
|
1363
|
+
"wife": 860,
|
|
1364
|
+
"shoe": 861,
|
|
1365
|
+
"shoulder": 862,
|
|
1366
|
+
"arrange": 864,
|
|
1367
|
+
"camp": 865,
|
|
1368
|
+
"invent": 866,
|
|
1369
|
+
"cotton": 867,
|
|
1370
|
+
"born": 868,
|
|
1371
|
+
"determine": 869,
|
|
1372
|
+
"quart": 870,
|
|
1373
|
+
"nine": 871,
|
|
1374
|
+
"truck": 872,
|
|
1375
|
+
"noise": 873,
|
|
1376
|
+
"level": 874,
|
|
1377
|
+
"chance": 875,
|
|
1378
|
+
"gather": 876,
|
|
1379
|
+
"shop": 877,
|
|
1380
|
+
"stretch": 878,
|
|
1381
|
+
"throw": 879,
|
|
1382
|
+
"shine": 880,
|
|
1383
|
+
"property": 881,
|
|
1384
|
+
"column": 882,
|
|
1385
|
+
"molecule": 883,
|
|
1386
|
+
"select": 884,
|
|
1387
|
+
"wrong": 885,
|
|
1388
|
+
"gray": 886,
|
|
1389
|
+
"repeat": 887,
|
|
1390
|
+
"require": 888,
|
|
1391
|
+
"broad": 889,
|
|
1392
|
+
"prepare": 890,
|
|
1393
|
+
"salt": 891,
|
|
1394
|
+
"nose": 892,
|
|
1395
|
+
"plural": 893,
|
|
1396
|
+
"anger": 894,
|
|
1397
|
+
"claim": 895,
|
|
1398
|
+
"continent": 896,
|
|
1399
|
+
"mom": 897,
|
|
1400
|
+
"rail": 913,
|
|
1401
|
+
"please": 1023,
|
|
1402
|
+
"protect": 1024,
|
|
1403
|
+
"noon": 1025,
|
|
1404
|
+
"crop": 1026,
|
|
1405
|
+
"modern": 1027,
|
|
1406
|
+
"element": 1028,
|
|
1407
|
+
"hit": 1029,
|
|
1408
|
+
"student": 1030,
|
|
1409
|
+
"party": 1032,
|
|
1410
|
+
"supply": 1033,
|
|
1411
|
+
"bone": 1034,
|
|
1412
|
+
"tube": 1035,
|
|
1413
|
+
"famous": 1036,
|
|
1414
|
+
"dollar": 1037,
|
|
1415
|
+
"stream": 1038,
|
|
1416
|
+
"fear": 1039,
|
|
1417
|
+
"sight": 1040,
|
|
1418
|
+
"thin": 1041,
|
|
1419
|
+
"triangle": 1042,
|
|
1420
|
+
"planet": 1043,
|
|
1421
|
+
"hurry": 1044,
|
|
1422
|
+
"chief": 1045,
|
|
1423
|
+
"colony": 1046,
|
|
1424
|
+
"clock": 1047,
|
|
1425
|
+
"mine": 1048,
|
|
1426
|
+
"tie": 1049,
|
|
1427
|
+
"enter": 1050,
|
|
1428
|
+
"major": 1051,
|
|
1429
|
+
"fresh": 1052,
|
|
1430
|
+
"search": 1053,
|
|
1431
|
+
"send": 1054,
|
|
1432
|
+
"yellow": 1055,
|
|
1433
|
+
"gun": 1056,
|
|
1434
|
+
"allow": 1057,
|
|
1435
|
+
"print": 1058,
|
|
1436
|
+
"dead": 1059,
|
|
1437
|
+
"spot": 1060,
|
|
1438
|
+
"desert": 1061,
|
|
1439
|
+
"suit": 1062,
|
|
1440
|
+
"current": 1063,
|
|
1441
|
+
"lift": 1064,
|
|
1442
|
+
"rose": 1065,
|
|
1443
|
+
"arrive": 1066,
|
|
1444
|
+
"master": 1067,
|
|
1445
|
+
"track": 1068,
|
|
1446
|
+
"locate": 1069,
|
|
1447
|
+
"ring": 1070,
|
|
1448
|
+
"believe": 1071,
|
|
1449
|
+
"gentle": 1072,
|
|
1450
|
+
"woman": 1073,
|
|
1451
|
+
"captain": 1074,
|
|
1452
|
+
"guess": 1075,
|
|
1453
|
+
"necessary": 1076,
|
|
1454
|
+
"sharp": 1077,
|
|
1455
|
+
"wing": 1078,
|
|
1456
|
+
"create": 1079,
|
|
1457
|
+
"neighbor": 1080,
|
|
1458
|
+
"wash": 1081,
|
|
1459
|
+
"bat": 1082,
|
|
1460
|
+
"rather": 1083,
|
|
1461
|
+
"crowd": 1084,
|
|
1462
|
+
"corn": 1085,
|
|
1463
|
+
"compare": 1086,
|
|
1464
|
+
"poem": 1087,
|
|
1465
|
+
"string": 1088,
|
|
1466
|
+
"bell": 1089,
|
|
1467
|
+
"depend": 1090,
|
|
1468
|
+
"meat": 1091,
|
|
1469
|
+
"rub": 1092,
|
|
1470
|
+
"indicate": 1096,
|
|
1471
|
+
"metal": 1097,
|
|
1472
|
+
"whether": 1098,
|
|
1473
|
+
"push": 1099,
|
|
1474
|
+
"seven": 1100,
|
|
312
1475
|
# Additional common words 1101-5000
|
|
313
|
-
"village": 1101,
|
|
314
|
-
"
|
|
315
|
-
"
|
|
316
|
-
"
|
|
317
|
-
"
|
|
318
|
-
"
|
|
319
|
-
"
|
|
320
|
-
"
|
|
321
|
-
|
|
1476
|
+
"village": 1101,
|
|
1477
|
+
"meet": 1102,
|
|
1478
|
+
"root": 1103,
|
|
1479
|
+
"buy": 1104,
|
|
1480
|
+
"raise": 1105,
|
|
1481
|
+
"solve": 1106,
|
|
1482
|
+
"understand": 1107,
|
|
1483
|
+
"member": 1108,
|
|
1484
|
+
"describe": 1112,
|
|
1485
|
+
"ocean": 1114,
|
|
1486
|
+
"electric": 1115,
|
|
1487
|
+
"expect": 1116,
|
|
1488
|
+
"imagine": 1119,
|
|
1489
|
+
"provide": 1120,
|
|
1490
|
+
"agree": 1121,
|
|
1491
|
+
"thus": 1122,
|
|
322
1492
|
# For brevity, jumping to approximate ranks for less common words
|
|
323
|
-
"political": 1500,
|
|
324
|
-
"
|
|
325
|
-
"
|
|
326
|
-
"
|
|
327
|
-
"
|
|
328
|
-
"
|
|
329
|
-
"
|
|
330
|
-
"
|
|
331
|
-
"
|
|
332
|
-
"
|
|
333
|
-
"
|
|
334
|
-
"
|
|
335
|
-
"
|
|
336
|
-
"
|
|
337
|
-
"
|
|
1493
|
+
"political": 1500,
|
|
1494
|
+
"social": 1501,
|
|
1495
|
+
"business": 1502,
|
|
1496
|
+
"service": 1503,
|
|
1497
|
+
"attention": 1504,
|
|
1498
|
+
"international": 1505,
|
|
1499
|
+
"various": 1506,
|
|
1500
|
+
"community": 1507,
|
|
1501
|
+
"national": 1508,
|
|
1502
|
+
"american": 1509,
|
|
1503
|
+
"president": 1510,
|
|
1504
|
+
"available": 1511,
|
|
1505
|
+
"information": 1512,
|
|
1506
|
+
"development": 1513,
|
|
1507
|
+
"different": 1515,
|
|
1508
|
+
"important": 1516,
|
|
1509
|
+
"education": 1517,
|
|
1510
|
+
"director": 1518,
|
|
1511
|
+
"economic": 1519,
|
|
1512
|
+
"evidence": 1520,
|
|
1513
|
+
"management": 1521,
|
|
1514
|
+
"hospital": 1522,
|
|
1515
|
+
"personal": 1523,
|
|
1516
|
+
"professional": 1526,
|
|
1517
|
+
"performance": 1527,
|
|
1518
|
+
"individual": 1528,
|
|
1519
|
+
"organization": 1529,
|
|
1520
|
+
"structure": 1530,
|
|
1521
|
+
"responsibility": 1531,
|
|
1522
|
+
"technology": 1532,
|
|
1523
|
+
"democratic": 1533,
|
|
1524
|
+
"relationship": 1534,
|
|
1525
|
+
"environmental": 1535,
|
|
1526
|
+
"significantly": 1536,
|
|
1527
|
+
"particularly": 1537,
|
|
1528
|
+
"approximately": 1538,
|
|
1529
|
+
"ultimately": 1539,
|
|
1530
|
+
"comprehensive": 1540,
|
|
1531
|
+
"substantial": 1541,
|
|
1532
|
+
"fundamental": 1542,
|
|
1533
|
+
"analysis": 1543,
|
|
1534
|
+
"investigation": 1544,
|
|
1535
|
+
"demonstrate": 1546,
|
|
1536
|
+
"theoretical": 1547,
|
|
1537
|
+
"significant": 1548,
|
|
1538
|
+
"hypothesis": 1549,
|
|
1539
|
+
"empirical": 1550,
|
|
1540
|
+
"methodology": 1551,
|
|
1541
|
+
"framework": 1552,
|
|
1542
|
+
"implications": 1553,
|
|
1543
|
+
"phenomena": 1554,
|
|
1544
|
+
"parameters": 1555,
|
|
1545
|
+
"correlation": 1556,
|
|
1546
|
+
"variables": 1557,
|
|
1547
|
+
"statistical": 1558,
|
|
1548
|
+
"preliminary": 1559,
|
|
338
1549
|
}
|
|
339
1550
|
|
|
340
1551
|
|
|
@@ -360,11 +1571,11 @@ def _tokenize_for_frequency_analysis(text: str) -> list[str]:
|
|
|
360
1571
|
raw_tokens = text_lower.split()
|
|
361
1572
|
|
|
362
1573
|
# Comprehensive punctuation set
|
|
363
|
-
|
|
1574
|
+
punctuation_chars = set(".,!?;:'\"()[]{}/-—–…*&@#$%^~`\\|<>«»„\"\"''‚'")
|
|
364
1575
|
|
|
365
1576
|
tokens = []
|
|
366
1577
|
for token in raw_tokens:
|
|
367
|
-
clean_token = token.strip("".join(
|
|
1578
|
+
clean_token = token.strip("".join(punctuation_chars))
|
|
368
1579
|
if clean_token:
|
|
369
1580
|
tokens.append(clean_token)
|
|
370
1581
|
|
|
@@ -391,6 +1602,7 @@ def compute_word_frequency_sophistication(
|
|
|
391
1602
|
frequency_corpus: str = "coca",
|
|
392
1603
|
rare_threshold: int = 10000,
|
|
393
1604
|
common_threshold: int = 1000,
|
|
1605
|
+
chunk_size: int = 1000,
|
|
394
1606
|
) -> WordFrequencySophisticationResult:
|
|
395
1607
|
"""
|
|
396
1608
|
Compute word frequency sophistication metrics.
|
|
@@ -477,9 +1689,7 @@ def compute_word_frequency_sophistication(
|
|
|
477
1689
|
"""
|
|
478
1690
|
# Validate corpus parameter
|
|
479
1691
|
if frequency_corpus != "coca":
|
|
480
|
-
raise ValueError(
|
|
481
|
-
f"Only 'coca' corpus is currently supported, got '{frequency_corpus}'"
|
|
482
|
-
)
|
|
1692
|
+
raise ValueError(f"Only 'coca' corpus is currently supported, got '{frequency_corpus}'")
|
|
483
1693
|
|
|
484
1694
|
# Load frequency dictionary
|
|
485
1695
|
frequency_dict = COCA_FREQUENCY_RANKS
|
|
@@ -534,9 +1744,7 @@ def compute_word_frequency_sophistication(
|
|
|
534
1744
|
"rare": sum(1 for r in word_ranks if 10000 < r <= 20000),
|
|
535
1745
|
"very_rare": sum(1 for r in word_ranks if r > 20000),
|
|
536
1746
|
}
|
|
537
|
-
frequency_band_distribution = {
|
|
538
|
-
band: count / total_words for band, count in band_counts.items()
|
|
539
|
-
}
|
|
1747
|
+
frequency_band_distribution = {band: count / total_words for band, count in band_counts.items()}
|
|
540
1748
|
|
|
541
1749
|
# Find rarest and most common words (top 10 each, deduplicated)
|
|
542
1750
|
word_rank_pairs = list(zip(tokens, word_ranks))
|
|
@@ -554,6 +1762,14 @@ def compute_word_frequency_sophistication(
|
|
|
554
1762
|
sorted_by_common = sorted(unique_pairs.items(), key=lambda x: x[1])
|
|
555
1763
|
most_common_words = [(word, float(rank)) for word, rank in sorted_by_common[:10]]
|
|
556
1764
|
|
|
1765
|
+
# Create single-value distributions (analysis is done on full text)
|
|
1766
|
+
mean_frequency_rank_dist = make_distribution([mean_rank])
|
|
1767
|
+
median_frequency_rank_dist = make_distribution([median_rank])
|
|
1768
|
+
rare_word_ratio_dist = make_distribution([rare_word_ratio])
|
|
1769
|
+
common_word_ratio_dist = make_distribution([common_word_ratio])
|
|
1770
|
+
academic_word_ratio_dist = make_distribution([academic_word_ratio])
|
|
1771
|
+
advanced_word_ratio_dist = make_distribution([advanced_word_ratio])
|
|
1772
|
+
|
|
557
1773
|
# Metadata
|
|
558
1774
|
metadata = {
|
|
559
1775
|
"frequency_corpus": frequency_corpus,
|
|
@@ -577,5 +1793,13 @@ def compute_word_frequency_sophistication(
|
|
|
577
1793
|
frequency_band_distribution=frequency_band_distribution,
|
|
578
1794
|
rarest_words=rarest_words,
|
|
579
1795
|
most_common_words=most_common_words,
|
|
1796
|
+
mean_frequency_rank_dist=mean_frequency_rank_dist,
|
|
1797
|
+
median_frequency_rank_dist=median_frequency_rank_dist,
|
|
1798
|
+
rare_word_ratio_dist=rare_word_ratio_dist,
|
|
1799
|
+
common_word_ratio_dist=common_word_ratio_dist,
|
|
1800
|
+
academic_word_ratio_dist=academic_word_ratio_dist,
|
|
1801
|
+
advanced_word_ratio_dist=advanced_word_ratio_dist,
|
|
1802
|
+
chunk_size=chunk_size,
|
|
1803
|
+
chunk_count=1, # Single pass analysis
|
|
580
1804
|
metadata=metadata,
|
|
581
1805
|
)
|