pystylometry 1.0.0__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. pystylometry/README.md +42 -0
  2. pystylometry/__init__.py +45 -3
  3. pystylometry/_types.py +1017 -259
  4. pystylometry/authorship/README.md +21 -0
  5. pystylometry/authorship/__init__.py +28 -4
  6. pystylometry/authorship/additional_methods.py +260 -40
  7. pystylometry/authorship/compression.py +175 -0
  8. pystylometry/authorship/kilgarriff.py +354 -0
  9. pystylometry/character/README.md +17 -0
  10. pystylometry/character/character_metrics.py +267 -179
  11. pystylometry/cli.py +427 -0
  12. pystylometry/consistency/README.md +27 -0
  13. pystylometry/consistency/__init__.py +57 -0
  14. pystylometry/consistency/_thresholds.py +162 -0
  15. pystylometry/consistency/drift.py +549 -0
  16. pystylometry/dialect/README.md +26 -0
  17. pystylometry/dialect/__init__.py +65 -0
  18. pystylometry/dialect/_data/dialect_markers.json +1134 -0
  19. pystylometry/dialect/_loader.py +360 -0
  20. pystylometry/dialect/detector.py +533 -0
  21. pystylometry/lexical/README.md +23 -0
  22. pystylometry/lexical/advanced_diversity.py +61 -22
  23. pystylometry/lexical/function_words.py +255 -56
  24. pystylometry/lexical/hapax.py +182 -52
  25. pystylometry/lexical/mtld.py +108 -26
  26. pystylometry/lexical/ttr.py +76 -10
  27. pystylometry/lexical/word_frequency_sophistication.py +1522 -298
  28. pystylometry/lexical/yule.py +136 -50
  29. pystylometry/ngrams/README.md +18 -0
  30. pystylometry/ngrams/entropy.py +150 -49
  31. pystylometry/ngrams/extended_ngrams.py +314 -69
  32. pystylometry/prosody/README.md +17 -0
  33. pystylometry/prosody/rhythm_prosody.py +773 -11
  34. pystylometry/readability/README.md +23 -0
  35. pystylometry/readability/additional_formulas.py +1887 -762
  36. pystylometry/readability/ari.py +144 -82
  37. pystylometry/readability/coleman_liau.py +136 -109
  38. pystylometry/readability/flesch.py +177 -73
  39. pystylometry/readability/gunning_fog.py +165 -161
  40. pystylometry/readability/smog.py +123 -42
  41. pystylometry/stylistic/README.md +20 -0
  42. pystylometry/stylistic/cohesion_coherence.py +669 -13
  43. pystylometry/stylistic/genre_register.py +1560 -17
  44. pystylometry/stylistic/markers.py +611 -17
  45. pystylometry/stylistic/vocabulary_overlap.py +354 -13
  46. pystylometry/syntactic/README.md +20 -0
  47. pystylometry/syntactic/advanced_syntactic.py +76 -14
  48. pystylometry/syntactic/pos_ratios.py +70 -6
  49. pystylometry/syntactic/sentence_stats.py +55 -12
  50. pystylometry/syntactic/sentence_types.py +71 -15
  51. pystylometry/viz/README.md +27 -0
  52. pystylometry/viz/__init__.py +71 -0
  53. pystylometry/viz/drift.py +589 -0
  54. pystylometry/viz/jsx/__init__.py +31 -0
  55. pystylometry/viz/jsx/_base.py +144 -0
  56. pystylometry/viz/jsx/report.py +677 -0
  57. pystylometry/viz/jsx/timeline.py +716 -0
  58. pystylometry/viz/jsx/viewer.py +1032 -0
  59. pystylometry-1.3.0.dist-info/METADATA +136 -0
  60. pystylometry-1.3.0.dist-info/RECORD +76 -0
  61. {pystylometry-1.0.0.dist-info → pystylometry-1.3.0.dist-info}/WHEEL +1 -1
  62. pystylometry-1.3.0.dist-info/entry_points.txt +4 -0
  63. pystylometry-1.0.0.dist-info/METADATA +0 -275
  64. pystylometry-1.0.0.dist-info/RECORD +0 -46
@@ -24,89 +24,535 @@ References:
24
24
  Davies, M. (2008-). The Corpus of Contemporary American English (COCA).
25
25
  """
26
26
 
27
- from .._types import WordFrequencySophisticationResult
28
-
27
+ from .._types import WordFrequencySophisticationResult, make_distribution
29
28
 
30
29
  # Academic Word List (AWL) - Coxhead (2000)
31
30
  # GitHub Issue #15: https://github.com/craigtrim/pystylometry/issues/15
32
31
  # This is a subset of common academic words. The full AWL contains 570 word families.
33
32
  # Consider loading from external file for complete list.
34
33
  ACADEMIC_WORD_LIST = {
35
- "analyze", "analysis", "analytical", "approach", "area", "assess", "assessment",
36
- "assume", "assumption", "authority", "available", "benefit", "category", "chapter",
37
- "commission", "community", "complex", "compute", "computer", "conclude", "conclusion",
38
- "conduct", "consequence", "considerable", "consist", "consistent", "constitute",
39
- "constitutional", "construct", "construction", "consumer", "context", "contract",
40
- "contrast", "contribute", "contribution", "controversial", "controversy", "convert",
41
- "create", "creation", "creative", "credit", "criteria", "cultural", "culture",
42
- "data", "debate", "define", "definition", "demonstrate", "demonstration", "derive",
43
- "derived", "design", "despite", "detect", "dimension", "diminish", "distinct",
44
- "distinction", "distribute", "distribution", "diverse", "diversity", "document",
45
- "documentation", "domestic", "dominate", "economy", "economic", "edit", "element",
46
- "eliminate", "emerge", "emphasis", "emphasize", "empirical", "enable", "encounter",
47
- "energy", "enforce", "enhance", "enormous", "ensure", "environment", "environmental",
48
- "equation", "equate", "error", "establish", "estate", "estimate", "ethic", "ethnic",
49
- "evaluate", "evaluation", "eventual", "eventually", "evident", "evidence", "evolve",
50
- "evolution", "exceed", "exclude", "exclusive", "expand", "expansion", "explicit",
51
- "exploit", "export", "expose", "external", "extract", "facilitate", "factor",
52
- "feature", "federal", "fee", "file", "final", "finance", "financial", "finite",
53
- "flexible", "fluctuate", "focus", "format", "formula", "forthcoming", "foundation",
54
- "found", "framework", "function", "functional", "fund", "fundamental", "gender",
55
- "generate", "generation", "global", "goal", "grant", "guarantee", "guideline",
56
- "hence", "hypothesis", "hypothetical", "identical", "identify", "identity", "ideology",
57
- "ignorance", "illustrate", "image", "immigrate", "impact", "implement", "implicate",
58
- "implicit", "imply", "impose", "incentive", "incidence", "incline", "income",
59
- "incorporate", "index", "indicate", "indication", "individual", "individualism",
60
- "induce", "inevitable", "infer", "infrastructure", "inherent", "inherit", "initial",
61
- "initially", "initiate", "injure", "innovate", "innovation", "input", "insert",
62
- "insight", "inspect", "instance", "institute", "institution", "instruct", "integral",
63
- "integrate", "integration", "integrity", "intelligence", "intense", "intensity",
64
- "interact", "interaction", "intermediate", "internal", "interpret", "interpretation",
65
- "interval", "intervene", "intervention", "intrinsic", "invest", "investigate",
66
- "investigation", "investment", "invoke", "involve", "involvement", "isolate",
67
- "isolation", "issue", "item", "job", "journal", "justify", "label", "labor",
68
- "layer", "lecture", "legal", "legislate", "legislation", "legislative", "levy",
69
- "liberal", "license", "likewise", "link", "locate", "location", "logic", "maintain",
70
- "maintenance", "major", "majority", "manipulate", "manual", "margin", "mature",
71
- "maturity", "maximize", "mechanism", "media", "mediate", "medical", "medium",
72
- "mental", "method", "methodology", "migrate", "military", "minimal", "minimize",
73
- "minimum", "ministry", "minor", "minority", "mode", "modify", "monitor", "motive",
74
- "mutual", "negate", "network", "neutral", "nevertheless", "nonetheless", "normal",
75
- "normally", "notion", "notwithstanding", "nuclear", "objective", "obtain", "obvious",
76
- "obviously", "occupy", "occur", "odd", "offset", "ongoing", "option", "orient",
77
- "orientation", "origin", "original", "output", "overall", "overlap", "overseas",
78
- "panel", "paradigm", "paragraph", "parallel", "parameter", "participate",
79
- "participation", "particular", "partner", "passive", "perceive", "percent",
80
- "percentage", "perception", "period", "periodic", "persist", "perspective", "phase",
81
- "phenomena", "phenomenon", "philosophy", "physical", "plus", "policy", "portion",
82
- "pose", "positive", "potential", "practitioner", "precede", "preceding", "precise",
83
- "predict", "prediction", "predominant", "preliminary", "presume", "previous",
84
- "primarily", "primary", "prime", "principal", "principle", "prior", "priority",
85
- "proceed", "process", "professional", "prohibit", "project", "projection", "promote",
86
- "promotion", "proportion", "prospect", "protocol", "psychology", "publication",
87
- "publish", "purchase", "pursue", "qualitative", "quote", "radical", "random",
88
- "range", "ratio", "rational", "react", "reaction", "recover", "refine", "reform",
89
- "regime", "region", "regional", "register", "regulate", "regulation", "reinforce",
90
- "reject", "relax", "release", "relevant", "reluctance", "rely", "remove", "require",
91
- "requirement", "research", "researcher", "reside", "resolve", "resource", "respond",
92
- "response", "restore", "restrain", "restrict", "restriction", "retain", "reveal",
93
- "revenue", "reverse", "revise", "revolution", "rigid", "role", "route", "scenario",
94
- "schedule", "scheme", "scope", "section", "sector", "secure", "security", "seek",
95
- "select", "selection", "sequence", "series", "sex", "shift", "significant",
96
- "significantly", "similar", "similarly", "simulate", "simulation", "site", "so-called",
97
- "sole", "solely", "somewhat", "source", "specific", "specifically", "specify",
98
- "sphere", "stable", "statistics", "status", "straightforward", "strategy", "stress",
99
- "structural", "structure", "style", "submit", "subordinate", "subsequent",
100
- "subsequently", "subsidy", "substitute", "substitute", "successor", "sufficient",
101
- "sum", "summary", "supplement", "survey", "survive", "suspend", "sustain", "symbol",
102
- "tape", "target", "task", "team", "technical", "technique", "technology", "temporary",
103
- "tense", "terminate", "text", "theme", "theory", "thereby", "thesis", "topic",
104
- "trace", "tradition", "traditional", "transfer", "transform", "transformation",
105
- "transit", "transition", "transmit", "transport", "trend", "trigger", "ultimate",
106
- "ultimately", "undergo", "underlie", "underlying", "undertake", "uniform", "unify",
107
- "unique", "utilize", "valid", "validity", "vary", "variation", "vehicle", "version",
108
- "via", "violate", "virtual", "virtually", "visible", "vision", "visual", "volume",
109
- "voluntary", "welfare", "whereas", "whereby", "widespread",
34
+ "analyze",
35
+ "analysis",
36
+ "analytical",
37
+ "approach",
38
+ "area",
39
+ "assess",
40
+ "assessment",
41
+ "assume",
42
+ "assumption",
43
+ "authority",
44
+ "available",
45
+ "benefit",
46
+ "category",
47
+ "chapter",
48
+ "commission",
49
+ "community",
50
+ "complex",
51
+ "compute",
52
+ "computer",
53
+ "conclude",
54
+ "conclusion",
55
+ "conduct",
56
+ "consequence",
57
+ "considerable",
58
+ "consist",
59
+ "consistent",
60
+ "constitute",
61
+ "constitutional",
62
+ "construct",
63
+ "construction",
64
+ "consumer",
65
+ "context",
66
+ "contract",
67
+ "contrast",
68
+ "contribute",
69
+ "contribution",
70
+ "controversial",
71
+ "controversy",
72
+ "convert",
73
+ "create",
74
+ "creation",
75
+ "creative",
76
+ "credit",
77
+ "criteria",
78
+ "cultural",
79
+ "culture",
80
+ "data",
81
+ "debate",
82
+ "define",
83
+ "definition",
84
+ "demonstrate",
85
+ "demonstration",
86
+ "derive",
87
+ "derived",
88
+ "design",
89
+ "despite",
90
+ "detect",
91
+ "dimension",
92
+ "diminish",
93
+ "distinct",
94
+ "distinction",
95
+ "distribute",
96
+ "distribution",
97
+ "diverse",
98
+ "diversity",
99
+ "document",
100
+ "documentation",
101
+ "domestic",
102
+ "dominate",
103
+ "economy",
104
+ "economic",
105
+ "edit",
106
+ "element",
107
+ "eliminate",
108
+ "emerge",
109
+ "emphasis",
110
+ "emphasize",
111
+ "empirical",
112
+ "enable",
113
+ "encounter",
114
+ "energy",
115
+ "enforce",
116
+ "enhance",
117
+ "enormous",
118
+ "ensure",
119
+ "environment",
120
+ "environmental",
121
+ "equation",
122
+ "equate",
123
+ "error",
124
+ "establish",
125
+ "estate",
126
+ "estimate",
127
+ "ethic",
128
+ "ethnic",
129
+ "evaluate",
130
+ "evaluation",
131
+ "eventual",
132
+ "eventually",
133
+ "evident",
134
+ "evidence",
135
+ "evolve",
136
+ "evolution",
137
+ "exceed",
138
+ "exclude",
139
+ "exclusive",
140
+ "expand",
141
+ "expansion",
142
+ "explicit",
143
+ "exploit",
144
+ "export",
145
+ "expose",
146
+ "external",
147
+ "extract",
148
+ "facilitate",
149
+ "factor",
150
+ "feature",
151
+ "federal",
152
+ "fee",
153
+ "file",
154
+ "final",
155
+ "finance",
156
+ "financial",
157
+ "finite",
158
+ "flexible",
159
+ "fluctuate",
160
+ "focus",
161
+ "format",
162
+ "formula",
163
+ "forthcoming",
164
+ "foundation",
165
+ "found",
166
+ "framework",
167
+ "function",
168
+ "functional",
169
+ "fund",
170
+ "fundamental",
171
+ "gender",
172
+ "generate",
173
+ "generation",
174
+ "global",
175
+ "goal",
176
+ "grant",
177
+ "guarantee",
178
+ "guideline",
179
+ "hence",
180
+ "hypothesis",
181
+ "hypothetical",
182
+ "identical",
183
+ "identify",
184
+ "identity",
185
+ "ideology",
186
+ "ignorance",
187
+ "illustrate",
188
+ "image",
189
+ "immigrate",
190
+ "impact",
191
+ "implement",
192
+ "implicate",
193
+ "implicit",
194
+ "imply",
195
+ "impose",
196
+ "incentive",
197
+ "incidence",
198
+ "incline",
199
+ "income",
200
+ "incorporate",
201
+ "index",
202
+ "indicate",
203
+ "indication",
204
+ "individual",
205
+ "individualism",
206
+ "induce",
207
+ "inevitable",
208
+ "infer",
209
+ "infrastructure",
210
+ "inherent",
211
+ "inherit",
212
+ "initial",
213
+ "initially",
214
+ "initiate",
215
+ "injure",
216
+ "innovate",
217
+ "innovation",
218
+ "input",
219
+ "insert",
220
+ "insight",
221
+ "inspect",
222
+ "instance",
223
+ "institute",
224
+ "institution",
225
+ "instruct",
226
+ "integral",
227
+ "integrate",
228
+ "integration",
229
+ "integrity",
230
+ "intelligence",
231
+ "intense",
232
+ "intensity",
233
+ "interact",
234
+ "interaction",
235
+ "intermediate",
236
+ "internal",
237
+ "interpret",
238
+ "interpretation",
239
+ "interval",
240
+ "intervene",
241
+ "intervention",
242
+ "intrinsic",
243
+ "invest",
244
+ "investigate",
245
+ "investigation",
246
+ "investment",
247
+ "invoke",
248
+ "involve",
249
+ "involvement",
250
+ "isolate",
251
+ "isolation",
252
+ "issue",
253
+ "item",
254
+ "job",
255
+ "journal",
256
+ "justify",
257
+ "label",
258
+ "labor",
259
+ "layer",
260
+ "lecture",
261
+ "legal",
262
+ "legislate",
263
+ "legislation",
264
+ "legislative",
265
+ "levy",
266
+ "liberal",
267
+ "license",
268
+ "likewise",
269
+ "link",
270
+ "locate",
271
+ "location",
272
+ "logic",
273
+ "maintain",
274
+ "maintenance",
275
+ "major",
276
+ "majority",
277
+ "manipulate",
278
+ "manual",
279
+ "margin",
280
+ "mature",
281
+ "maturity",
282
+ "maximize",
283
+ "mechanism",
284
+ "media",
285
+ "mediate",
286
+ "medical",
287
+ "medium",
288
+ "mental",
289
+ "method",
290
+ "methodology",
291
+ "migrate",
292
+ "military",
293
+ "minimal",
294
+ "minimize",
295
+ "minimum",
296
+ "ministry",
297
+ "minor",
298
+ "minority",
299
+ "mode",
300
+ "modify",
301
+ "monitor",
302
+ "motive",
303
+ "mutual",
304
+ "negate",
305
+ "network",
306
+ "neutral",
307
+ "nevertheless",
308
+ "nonetheless",
309
+ "normal",
310
+ "normally",
311
+ "notion",
312
+ "notwithstanding",
313
+ "nuclear",
314
+ "objective",
315
+ "obtain",
316
+ "obvious",
317
+ "obviously",
318
+ "occupy",
319
+ "occur",
320
+ "odd",
321
+ "offset",
322
+ "ongoing",
323
+ "option",
324
+ "orient",
325
+ "orientation",
326
+ "origin",
327
+ "original",
328
+ "output",
329
+ "overall",
330
+ "overlap",
331
+ "overseas",
332
+ "panel",
333
+ "paradigm",
334
+ "paragraph",
335
+ "parallel",
336
+ "parameter",
337
+ "participate",
338
+ "participation",
339
+ "particular",
340
+ "partner",
341
+ "passive",
342
+ "perceive",
343
+ "percent",
344
+ "percentage",
345
+ "perception",
346
+ "period",
347
+ "periodic",
348
+ "persist",
349
+ "perspective",
350
+ "phase",
351
+ "phenomena",
352
+ "phenomenon",
353
+ "philosophy",
354
+ "physical",
355
+ "plus",
356
+ "policy",
357
+ "portion",
358
+ "pose",
359
+ "positive",
360
+ "potential",
361
+ "practitioner",
362
+ "precede",
363
+ "preceding",
364
+ "precise",
365
+ "predict",
366
+ "prediction",
367
+ "predominant",
368
+ "preliminary",
369
+ "presume",
370
+ "previous",
371
+ "primarily",
372
+ "primary",
373
+ "prime",
374
+ "principal",
375
+ "principle",
376
+ "prior",
377
+ "priority",
378
+ "proceed",
379
+ "process",
380
+ "professional",
381
+ "prohibit",
382
+ "project",
383
+ "projection",
384
+ "promote",
385
+ "promotion",
386
+ "proportion",
387
+ "prospect",
388
+ "protocol",
389
+ "psychology",
390
+ "publication",
391
+ "publish",
392
+ "purchase",
393
+ "pursue",
394
+ "qualitative",
395
+ "quote",
396
+ "radical",
397
+ "random",
398
+ "range",
399
+ "ratio",
400
+ "rational",
401
+ "react",
402
+ "reaction",
403
+ "recover",
404
+ "refine",
405
+ "reform",
406
+ "regime",
407
+ "region",
408
+ "regional",
409
+ "register",
410
+ "regulate",
411
+ "regulation",
412
+ "reinforce",
413
+ "reject",
414
+ "relax",
415
+ "release",
416
+ "relevant",
417
+ "reluctance",
418
+ "rely",
419
+ "remove",
420
+ "require",
421
+ "requirement",
422
+ "research",
423
+ "researcher",
424
+ "reside",
425
+ "resolve",
426
+ "resource",
427
+ "respond",
428
+ "response",
429
+ "restore",
430
+ "restrain",
431
+ "restrict",
432
+ "restriction",
433
+ "retain",
434
+ "reveal",
435
+ "revenue",
436
+ "reverse",
437
+ "revise",
438
+ "revolution",
439
+ "rigid",
440
+ "role",
441
+ "route",
442
+ "scenario",
443
+ "schedule",
444
+ "scheme",
445
+ "scope",
446
+ "section",
447
+ "sector",
448
+ "secure",
449
+ "security",
450
+ "seek",
451
+ "select",
452
+ "selection",
453
+ "sequence",
454
+ "series",
455
+ "sex",
456
+ "shift",
457
+ "significant",
458
+ "significantly",
459
+ "similar",
460
+ "similarly",
461
+ "simulate",
462
+ "simulation",
463
+ "site",
464
+ "so-called",
465
+ "sole",
466
+ "solely",
467
+ "somewhat",
468
+ "source",
469
+ "specific",
470
+ "specifically",
471
+ "specify",
472
+ "sphere",
473
+ "stable",
474
+ "statistics",
475
+ "status",
476
+ "straightforward",
477
+ "strategy",
478
+ "stress",
479
+ "structural",
480
+ "structure",
481
+ "style",
482
+ "submit",
483
+ "subordinate",
484
+ "subsequent",
485
+ "subsequently",
486
+ "subsidy",
487
+ "substitute",
488
+ "substitute",
489
+ "successor",
490
+ "sufficient",
491
+ "sum",
492
+ "summary",
493
+ "supplement",
494
+ "survey",
495
+ "survive",
496
+ "suspend",
497
+ "sustain",
498
+ "symbol",
499
+ "tape",
500
+ "target",
501
+ "task",
502
+ "team",
503
+ "technical",
504
+ "technique",
505
+ "technology",
506
+ "temporary",
507
+ "tense",
508
+ "terminate",
509
+ "text",
510
+ "theme",
511
+ "theory",
512
+ "thereby",
513
+ "thesis",
514
+ "topic",
515
+ "trace",
516
+ "tradition",
517
+ "traditional",
518
+ "transfer",
519
+ "transform",
520
+ "transformation",
521
+ "transit",
522
+ "transition",
523
+ "transmit",
524
+ "transport",
525
+ "trend",
526
+ "trigger",
527
+ "ultimate",
528
+ "ultimately",
529
+ "undergo",
530
+ "underlie",
531
+ "underlying",
532
+ "undertake",
533
+ "uniform",
534
+ "unify",
535
+ "unique",
536
+ "utilize",
537
+ "valid",
538
+ "validity",
539
+ "vary",
540
+ "variation",
541
+ "vehicle",
542
+ "version",
543
+ "via",
544
+ "violate",
545
+ "virtual",
546
+ "virtually",
547
+ "visible",
548
+ "vision",
549
+ "visual",
550
+ "volume",
551
+ "voluntary",
552
+ "welfare",
553
+ "whereas",
554
+ "whereby",
555
+ "widespread",
110
556
  }
111
557
 
112
558
 
@@ -117,224 +563,989 @@ ACADEMIC_WORD_LIST = {
117
563
  # This is an embedded subset for MVP. Full COCA has 60,000+ words.
118
564
  COCA_FREQUENCY_RANKS = {
119
565
  # Top 100 - Function words and most common verbs
120
- "the": 1, "be": 2, "to": 3, "of": 4, "and": 5, "a": 6, "in": 7, "that": 8,
121
- "have": 9, "i": 10, "it": 11, "for": 12, "not": 13, "on": 14, "with": 15,
122
- "he": 16, "as": 17, "you": 18, "do": 19, "at": 20, "this": 21, "but": 22,
123
- "his": 23, "by": 24, "from": 25, "they": 26, "we": 27, "say": 28, "her": 29,
124
- "she": 30, "or": 31, "an": 32, "will": 33, "my": 34, "one": 35, "all": 36,
125
- "would": 37, "there": 38, "their": 39, "what": 40, "so": 41, "up": 42,
126
- "out": 43, "if": 44, "about": 45, "who": 46, "get": 47, "which": 48, "go": 49,
127
- "me": 50, "when": 51, "make": 52, "can": 53, "like": 54, "time": 55, "no": 56,
128
- "just": 57, "him": 58, "know": 59, "take": 60, "people": 61, "into": 62,
129
- "year": 63, "your": 64, "good": 65, "some": 66, "could": 67, "them": 68,
130
- "see": 69, "other": 70, "than": 71, "then": 72, "now": 73, "look": 74,
131
- "only": 75, "come": 76, "its": 77, "over": 78, "think": 79, "also": 80,
132
- "back": 81, "after": 82, "use": 83, "two": 84, "how": 85, "our": 86, "work": 87,
133
- "first": 88, "well": 89, "way": 90, "even": 91, "new": 92, "want": 93,
134
- "because": 94, "any": 95, "these": 96, "give": 97, "day": 98, "most": 99,
566
+ "the": 1,
567
+ "be": 2,
568
+ "to": 3,
569
+ "of": 4,
570
+ "and": 5,
571
+ "a": 6,
572
+ "in": 7,
573
+ "that": 8,
574
+ "have": 9,
575
+ "i": 10,
576
+ "it": 11,
577
+ "for": 12,
578
+ "not": 13,
579
+ "on": 14,
580
+ "with": 15,
581
+ "he": 16,
582
+ "as": 17,
583
+ "you": 18,
584
+ "do": 19,
585
+ "at": 20,
586
+ "this": 21,
587
+ "but": 22,
588
+ "his": 23,
589
+ "by": 24,
590
+ "from": 25,
591
+ "they": 26,
592
+ "we": 27,
593
+ "say": 28,
594
+ "her": 29,
595
+ "she": 30,
596
+ "or": 31,
597
+ "an": 32,
598
+ "will": 33,
599
+ "my": 34,
600
+ "one": 35,
601
+ "all": 36,
602
+ "would": 37,
603
+ "there": 38,
604
+ "their": 39,
605
+ "what": 40,
606
+ "so": 41,
607
+ "up": 42,
608
+ "out": 43,
609
+ "if": 44,
610
+ "about": 45,
611
+ "who": 46,
612
+ "get": 47,
613
+ "which": 48,
614
+ "go": 49,
615
+ "me": 50,
616
+ "when": 51,
617
+ "make": 52,
618
+ "can": 53,
619
+ "like": 54,
620
+ "time": 55,
621
+ "no": 56,
622
+ "just": 57,
623
+ "him": 58,
624
+ "know": 59,
625
+ "take": 60,
626
+ "people": 61,
627
+ "into": 62,
628
+ "year": 63,
629
+ "your": 64,
630
+ "good": 65,
631
+ "some": 66,
632
+ "could": 67,
633
+ "them": 68,
634
+ "see": 69,
635
+ "other": 70,
636
+ "than": 71,
637
+ "then": 72,
638
+ "now": 73,
639
+ "look": 74,
640
+ "only": 75,
641
+ "come": 76,
642
+ "its": 77,
643
+ "over": 78,
644
+ "think": 79,
645
+ "also": 80,
646
+ "back": 81,
647
+ "after": 82,
648
+ "use": 83,
649
+ "two": 84,
650
+ "how": 85,
651
+ "our": 86,
652
+ "work": 87,
653
+ "first": 88,
654
+ "well": 89,
655
+ "way": 90,
656
+ "even": 91,
657
+ "new": 92,
658
+ "want": 93,
659
+ "because": 94,
660
+ "any": 95,
661
+ "these": 96,
662
+ "give": 97,
663
+ "day": 98,
664
+ "most": 99,
135
665
  "us": 100,
136
666
  # 101-500 - Common words
137
- "is": 101, "was": 102, "are": 103, "been": 104, "has": 105, "had": 106,
138
- "were": 107, "said": 108, "did": 109, "having": 110, "may": 111, "should": 112,
139
- "each": 113, "such": 114, "through": 115, "where": 116, "much": 117, "before": 118,
140
- "right": 119, "too": 120, "means": 121, "old": 122, "any": 123, "same": 124,
141
- "tell": 125, "boy": 126, "follow": 127, "came": 128, "show": 129, "every": 130,
142
- "good": 131, "me": 132, "give": 133, "our": 134, "under": 135, "name": 136,
143
- "very": 137, "through": 138, "just": 139, "form": 140, "great": 141, "think": 142,
144
- "say": 143, "help": 144, "low": 145, "line": 146, "before": 147, "turn": 148,
145
- "cause": 149, "same": 150, "mean": 151, "differ": 152, "move": 153, "right": 154,
146
- "boy": 155, "old": 156, "too": 157, "does": 158, "tell": 159, "sentence": 160,
147
- "set": 161, "three": 162, "want": 163, "air": 164, "well": 165, "also": 166,
148
- "play": 167, "small": 168, "end": 169, "put": 170, "home": 171, "read": 172,
149
- "hand": 173, "port": 174, "large": 175, "spell": 176, "add": 177, "even": 178,
150
- "land": 179, "here": 180, "must": 181, "big": 182, "high": 183, "such": 184,
151
- "follow": 185, "act": 186, "why": 187, "ask": 188, "men": 189, "change": 190,
152
- "went": 191, "light": 192, "kind": 193, "off": 194, "need": 195, "house": 196,
153
- "picture": 197, "try": 198, "us": 199, "again": 200, "animal": 201, "point": 202,
154
- "mother": 203, "world": 204, "near": 205, "build": 206, "self": 207, "earth": 208,
155
- "father": 209, "head": 210, "stand": 211, "own": 212, "page": 213, "should": 214,
156
- "country": 215, "found": 216, "answer": 217, "school": 218, "grow": 219,
157
- "study": 220, "still": 221, "learn": 222, "plant": 223, "cover": 224, "food": 225,
158
- "sun": 226, "four": 227, "thought": 228, "let": 229, "keep": 230, "eye": 231,
159
- "never": 232, "last": 233, "door": 234, "between": 235, "city": 236, "tree": 237,
160
- "cross": 238, "since": 239, "hard": 240, "start": 241, "might": 242, "story": 243,
161
- "saw": 244, "far": 245, "sea": 246, "draw": 247, "left": 248, "late": 249,
162
- "run": 250, "while": 251, "press": 252, "close": 253, "night": 254, "real": 255,
163
- "life": 256, "few": 257, "stop": 258, "open": 259, "seem": 260, "together": 261,
164
- "next": 262, "white": 263, "children": 264, "begin": 265, "got": 266, "walk": 267,
165
- "example": 268, "ease": 269, "paper": 270, "often": 271, "always": 272, "music": 273,
166
- "those": 274, "both": 275, "mark": 276, "book": 277, "letter": 278, "until": 279,
167
- "mile": 280, "river": 281, "car": 282, "feet": 283, "care": 284, "second": 285,
168
- "group": 286, "carry": 287, "took": 288, "rain": 289, "eat": 290, "room": 291,
169
- "friend": 292, "began": 293, "idea": 294, "fish": 295, "mountain": 296, "north": 297,
170
- "once": 298, "base": 299, "hear": 300, "horse": 301, "cut": 302, "sure": 303,
171
- "watch": 304, "color": 305, "face": 306, "wood": 307, "main": 308, "enough": 309,
172
- "plain": 310, "girl": 311, "usual": 312, "young": 313, "ready": 314, "above": 315,
173
- "ever": 316, "red": 317, "list": 318, "though": 319, "feel": 320, "talk": 321,
174
- "bird": 322, "soon": 323, "body": 324, "dog": 325, "family": 326, "direct": 327,
175
- "pose": 328, "leave": 329, "song": 330, "measure": 331, "state": 332, "product": 333,
176
- "black": 334, "short": 335, "numeral": 336, "class": 337, "wind": 338, "question": 339,
177
- "happen": 340, "complete": 341, "ship": 342, "area": 343, "half": 344, "rock": 345,
178
- "order": 346, "fire": 347, "south": 348, "problem": 349, "piece": 350, "told": 351,
179
- "knew": 352, "pass": 353, "farm": 354, "top": 355, "whole": 356, "king": 357,
180
- "size": 358, "heard": 359, "best": 360, "hour": 361, "better": 362, "true": 363,
181
- "during": 364, "hundred": 365, "am": 366, "remember": 367, "step": 368, "early": 369,
182
- "hold": 370, "west": 371, "ground": 372, "interest": 373, "reach": 374, "fast": 375,
183
- "five": 376, "sing": 377, "listen": 378, "six": 379, "table": 380, "travel": 381,
184
- "less": 382, "morning": 383, "ten": 384, "simple": 385, "several": 386, "vowel": 387,
185
- "toward": 388, "war": 389, "lay": 390, "against": 391, "pattern": 392, "slow": 393,
186
- "center": 394, "love": 395, "person": 396, "money": 397, "serve": 398, "appear": 399,
187
- "road": 400, "map": 401, "science": 402, "rule": 403, "govern": 404, "pull": 405,
188
- "cold": 406, "notice": 407, "voice": 408, "fall": 409, "power": 410, "town": 411,
189
- "fine": 412, "certain": 413, "fly": 414, "unit": 415, "lead": 416, "cry": 417,
190
- "dark": 418, "machine": 419, "note": 420, "wait": 421, "plan": 422, "figure": 423,
191
- "star": 424, "box": 425, "noun": 426, "field": 427, "rest": 428, "correct": 429,
192
- "able": 430, "pound": 431, "done": 432, "beauty": 433, "drive": 434, "stood": 435,
193
- "contain": 436, "front": 437, "teach": 438, "week": 439, "final": 440, "gave": 441,
194
- "green": 442, "oh": 443, "quick": 444, "develop": 445, "sleep": 446, "warm": 447,
195
- "free": 448, "minute": 449, "strong": 450, "special": 451, "mind": 452, "behind": 453,
196
- "clear": 454, "tail": 455, "produce": 456, "fact": 457, "street": 458, "inch": 459,
197
- "lot": 460, "nothing": 461, "course": 462, "stay": 463, "wheel": 464, "full": 465,
198
- "force": 466, "blue": 467, "object": 468, "decide": 469, "surface": 470, "deep": 471,
199
- "moon": 472, "island": 473, "foot": 474, "yet": 475, "busy": 476, "test": 477,
200
- "record": 478, "boat": 479, "common": 480, "gold": 481, "possible": 482, "plane": 483,
201
- "age": 484, "dry": 485, "wonder": 486, "laugh": 487, "thousand": 488, "ago": 489,
202
- "ran": 490, "check": 491, "game": 492, "shape": 493, "yes": 494, "hot": 495,
203
- "miss": 496, "brought": 497, "heat": 498, "snow": 499, "bed": 500,
667
+ "is": 101,
668
+ "was": 102,
669
+ "are": 103,
670
+ "been": 104,
671
+ "has": 105,
672
+ "had": 106,
673
+ "were": 107,
674
+ "said": 108,
675
+ "did": 109,
676
+ "having": 110,
677
+ "may": 111,
678
+ "should": 112,
679
+ "each": 113,
680
+ "such": 114,
681
+ "through": 115,
682
+ "where": 116,
683
+ "much": 117,
684
+ "before": 118,
685
+ "right": 119,
686
+ "too": 120,
687
+ "means": 121,
688
+ "old": 122,
689
+ "same": 124,
690
+ "tell": 125,
691
+ "boy": 126,
692
+ "follow": 127,
693
+ "came": 128,
694
+ "show": 129,
695
+ "every": 130,
696
+ "under": 135,
697
+ "name": 136,
698
+ "very": 137,
699
+ "form": 140,
700
+ "great": 141,
701
+ "help": 144,
702
+ "low": 145,
703
+ "line": 146,
704
+ "turn": 148,
705
+ "cause": 149,
706
+ "mean": 151,
707
+ "differ": 152,
708
+ "move": 153,
709
+ "does": 158,
710
+ "sentence": 160,
711
+ "set": 161,
712
+ "three": 162,
713
+ "air": 164,
714
+ "play": 167,
715
+ "small": 168,
716
+ "end": 169,
717
+ "put": 170,
718
+ "home": 171,
719
+ "read": 172,
720
+ "hand": 173,
721
+ "port": 174,
722
+ "large": 175,
723
+ "spell": 176,
724
+ "add": 177,
725
+ "land": 179,
726
+ "here": 180,
727
+ "must": 181,
728
+ "big": 182,
729
+ "high": 183,
730
+ "act": 186,
731
+ "why": 187,
732
+ "ask": 188,
733
+ "men": 189,
734
+ "change": 190,
735
+ "went": 191,
736
+ "light": 192,
737
+ "kind": 193,
738
+ "off": 194,
739
+ "need": 195,
740
+ "house": 196,
741
+ "picture": 197,
742
+ "try": 198,
743
+ "again": 200,
744
+ "animal": 201,
745
+ "point": 202,
746
+ "mother": 203,
747
+ "world": 204,
748
+ "near": 205,
749
+ "build": 206,
750
+ "self": 207,
751
+ "earth": 208,
752
+ "father": 209,
753
+ "head": 210,
754
+ "stand": 211,
755
+ "own": 212,
756
+ "page": 213,
757
+ "country": 215,
758
+ "found": 216,
759
+ "answer": 217,
760
+ "school": 218,
761
+ "grow": 219,
762
+ "study": 220,
763
+ "still": 221,
764
+ "learn": 222,
765
+ "plant": 223,
766
+ "cover": 224,
767
+ "food": 225,
768
+ "sun": 226,
769
+ "four": 227,
770
+ "thought": 228,
771
+ "let": 229,
772
+ "keep": 230,
773
+ "eye": 231,
774
+ "never": 232,
775
+ "last": 233,
776
+ "door": 234,
777
+ "between": 235,
778
+ "city": 236,
779
+ "tree": 237,
780
+ "cross": 238,
781
+ "since": 239,
782
+ "hard": 240,
783
+ "start": 241,
784
+ "might": 242,
785
+ "story": 243,
786
+ "saw": 244,
787
+ "far": 245,
788
+ "sea": 246,
789
+ "draw": 247,
790
+ "left": 248,
791
+ "late": 249,
792
+ "run": 250,
793
+ "while": 251,
794
+ "press": 252,
795
+ "close": 253,
796
+ "night": 254,
797
+ "real": 255,
798
+ "life": 256,
799
+ "few": 257,
800
+ "stop": 258,
801
+ "open": 259,
802
+ "seem": 260,
803
+ "together": 261,
804
+ "next": 262,
805
+ "white": 263,
806
+ "children": 264,
807
+ "begin": 265,
808
+ "got": 266,
809
+ "walk": 267,
810
+ "example": 268,
811
+ "ease": 269,
812
+ "paper": 270,
813
+ "often": 271,
814
+ "always": 272,
815
+ "music": 273,
816
+ "those": 274,
817
+ "both": 275,
818
+ "mark": 276,
819
+ "book": 277,
820
+ "letter": 278,
821
+ "until": 279,
822
+ "mile": 280,
823
+ "river": 281,
824
+ "car": 282,
825
+ "feet": 283,
826
+ "care": 284,
827
+ "second": 285,
828
+ "group": 286,
829
+ "carry": 287,
830
+ "took": 288,
831
+ "rain": 289,
832
+ "eat": 290,
833
+ "room": 291,
834
+ "friend": 292,
835
+ "began": 293,
836
+ "idea": 294,
837
+ "fish": 295,
838
+ "mountain": 296,
839
+ "north": 297,
840
+ "once": 298,
841
+ "base": 299,
842
+ "hear": 300,
843
+ "horse": 301,
844
+ "cut": 302,
845
+ "sure": 303,
846
+ "watch": 304,
847
+ "color": 305,
848
+ "face": 306,
849
+ "wood": 307,
850
+ "main": 308,
851
+ "enough": 309,
852
+ "plain": 310,
853
+ "girl": 311,
854
+ "usual": 312,
855
+ "young": 313,
856
+ "ready": 314,
857
+ "above": 315,
858
+ "ever": 316,
859
+ "red": 317,
860
+ "list": 318,
861
+ "though": 319,
862
+ "feel": 320,
863
+ "talk": 321,
864
+ "bird": 322,
865
+ "soon": 323,
866
+ "body": 324,
867
+ "dog": 325,
868
+ "family": 326,
869
+ "direct": 327,
870
+ "pose": 328,
871
+ "leave": 329,
872
+ "song": 330,
873
+ "measure": 331,
874
+ "state": 332,
875
+ "product": 333,
876
+ "black": 334,
877
+ "short": 335,
878
+ "numeral": 336,
879
+ "class": 337,
880
+ "wind": 338,
881
+ "question": 339,
882
+ "happen": 340,
883
+ "complete": 341,
884
+ "ship": 342,
885
+ "area": 343,
886
+ "half": 344,
887
+ "rock": 345,
888
+ "order": 346,
889
+ "fire": 347,
890
+ "south": 348,
891
+ "problem": 349,
892
+ "piece": 350,
893
+ "told": 351,
894
+ "knew": 352,
895
+ "pass": 353,
896
+ "farm": 354,
897
+ "top": 355,
898
+ "whole": 356,
899
+ "king": 357,
900
+ "size": 358,
901
+ "heard": 359,
902
+ "best": 360,
903
+ "hour": 361,
904
+ "better": 362,
905
+ "true": 363,
906
+ "during": 364,
907
+ "hundred": 365,
908
+ "am": 366,
909
+ "remember": 367,
910
+ "step": 368,
911
+ "early": 369,
912
+ "hold": 370,
913
+ "west": 371,
914
+ "ground": 372,
915
+ "interest": 373,
916
+ "reach": 374,
917
+ "fast": 375,
918
+ "five": 376,
919
+ "sing": 377,
920
+ "listen": 378,
921
+ "six": 379,
922
+ "table": 380,
923
+ "travel": 381,
924
+ "less": 382,
925
+ "morning": 383,
926
+ "ten": 384,
927
+ "simple": 385,
928
+ "several": 386,
929
+ "vowel": 387,
930
+ "toward": 388,
931
+ "war": 389,
932
+ "lay": 390,
933
+ "against": 391,
934
+ "pattern": 392,
935
+ "slow": 393,
936
+ "center": 394,
937
+ "love": 395,
938
+ "person": 396,
939
+ "money": 397,
940
+ "serve": 398,
941
+ "appear": 399,
942
+ "road": 400,
943
+ "map": 401,
944
+ "science": 402,
945
+ "rule": 403,
946
+ "govern": 404,
947
+ "pull": 405,
948
+ "cold": 406,
949
+ "notice": 407,
950
+ "voice": 408,
951
+ "fall": 409,
952
+ "power": 410,
953
+ "town": 411,
954
+ "fine": 412,
955
+ "certain": 413,
956
+ "fly": 414,
957
+ "unit": 415,
958
+ "lead": 416,
959
+ "cry": 417,
960
+ "dark": 418,
961
+ "machine": 419,
962
+ "note": 420,
963
+ "wait": 421,
964
+ "plan": 422,
965
+ "figure": 423,
966
+ "star": 424,
967
+ "box": 425,
968
+ "noun": 426,
969
+ "field": 427,
970
+ "rest": 428,
971
+ "correct": 429,
972
+ "able": 430,
973
+ "pound": 431,
974
+ "done": 432,
975
+ "beauty": 433,
976
+ "drive": 434,
977
+ "stood": 435,
978
+ "contain": 436,
979
+ "front": 437,
980
+ "teach": 438,
981
+ "week": 439,
982
+ "final": 440,
983
+ "gave": 441,
984
+ "green": 442,
985
+ "oh": 443,
986
+ "quick": 444,
987
+ "develop": 445,
988
+ "sleep": 446,
989
+ "warm": 447,
990
+ "free": 448,
991
+ "minute": 449,
992
+ "strong": 450,
993
+ "special": 451,
994
+ "mind": 452,
995
+ "behind": 453,
996
+ "clear": 454,
997
+ "tail": 455,
998
+ "produce": 456,
999
+ "fact": 457,
1000
+ "street": 458,
1001
+ "inch": 459,
1002
+ "lot": 460,
1003
+ "nothing": 461,
1004
+ "course": 462,
1005
+ "stay": 463,
1006
+ "wheel": 464,
1007
+ "full": 465,
1008
+ "force": 466,
1009
+ "blue": 467,
1010
+ "object": 468,
1011
+ "decide": 469,
1012
+ "surface": 470,
1013
+ "deep": 471,
1014
+ "moon": 472,
1015
+ "island": 473,
1016
+ "foot": 474,
1017
+ "yet": 475,
1018
+ "busy": 476,
1019
+ "test": 477,
1020
+ "record": 478,
1021
+ "boat": 479,
1022
+ "common": 480,
1023
+ "gold": 481,
1024
+ "possible": 482,
1025
+ "plane": 483,
1026
+ "age": 484,
1027
+ "dry": 485,
1028
+ "wonder": 486,
1029
+ "laugh": 487,
1030
+ "thousand": 488,
1031
+ "ago": 489,
1032
+ "ran": 490,
1033
+ "check": 491,
1034
+ "game": 492,
1035
+ "shape": 493,
1036
+ "yes": 494,
1037
+ "hot": 495,
1038
+ "miss": 496,
1039
+ "brought": 497,
1040
+ "heat": 498,
1041
+ "snow": 499,
1042
+ "bed": 500,
204
1043
  # 501-1000 - Common vocabulary
205
- "bring": 501, "sit": 502, "perhaps": 503, "fill": 504, "east": 505, "weight": 506,
206
- "language": 507, "among": 508, "cat": 509, "ball": 510, "human": 511, "red": 512,
207
- "doctor": 513, "road": 514, "office": 515, "break": 516, "die": 517, "radio": 518,
208
- "speak": 519, "atom": 520, "blood": 521, "felt": 522, "type": 523, "forward": 524,
209
- "century": 525, "milk": 526, "corner": 527, "speed": 528, "method": 529, "organ": 530,
210
- "pay": 531, "single": 532, "touch": 533, "control": 534, "bottom": 535, "design": 536,
211
- "coat": 537, "else": 538, "quite": 539, "broke": 540, "case": 541, "middle": 542,
212
- "kill": 543, "son": 544, "lake": 545, "moment": 546, "scale": 547, "loud": 548,
213
- "spring": 549, "observe": 550, "child": 551, "straight": 552, "consonant": 553,
214
- "nation": 554, "dictionary": 555, "bit": 556, "coast": 557, "copy": 558, "phrase": 559,
215
- "silent": 560, "tall": 561, "sand": 562, "soil": 563, "roll": 564, "temperature": 565,
216
- "finger": 566, "industry": 567, "value": 568, "fight": 569, "lie": 570, "beat": 571,
217
- "excite": 572, "natural": 573, "view": 574, "sense": 575, "capital": 576, "won't": 577,
218
- "chair": 578, "danger": 579, "fruit": 580, "rich": 581, "thick": 582, "soldier": 583,
219
- "process": 584, "operate": 585, "practice": 586, "separate": 587, "difficult": 588,
220
- "visit": 589, "spread": 590, "particular": 591, "catch": 592, "square": 593,
221
- "reason": 594, "length": 595, "represent": 596, "art": 597, "subject": 598,
222
- "region": 599, "size": 600, "vary": 601, "settle": 602, "speak": 603, "weight": 604,
223
- "general": 605, "ice": 606, "matter": 607, "circle": 608, "pair": 609, "include": 610,
224
- "divide": 611, "syllable": 612, "felt": 613, "grand": 614, "ball": 615, "yet": 616,
225
- "wave": 617, "drop": 618, "heart": 619, "present": 620, "heavy": 621, "dance": 622,
226
- "engine": 623, "position": 624, "arm": 625, "wide": 626, "sail": 627, "material": 628,
227
- "fraction": 629, "forest": 630, "sit": 631, "race": 632, "window": 633, "store": 634,
228
- "summer": 635, "train": 636, "sleep": 637, "prove": 638, "lone": 639, "leg": 640,
229
- "exercise": 641, "wall": 642, "catch": 643, "mount": 644, "wish": 645, "sky": 646,
230
- "board": 647, "joy": 648, "winter": 649, "sat": 650, "written": 651, "wild": 652,
231
- "instrument": 653, "kept": 654, "glass": 655, "grass": 656, "cow": 657, "job": 658,
232
- "edge": 659, "sign": 660, "visit": 661, "past": 662, "soft": 663, "fun": 664,
233
- "bright": 665, "gas": 666, "weather": 667, "month": 668, "million": 669, "bear": 670,
234
- "finish": 671, "happy": 672, "hope": 673, "flower": 674, "clothe": 675, "strange": 676,
235
- "gone": 677, "trade": 678, "melody": 679, "trip": 680, "office": 681, "receive": 682,
236
- "row": 683, "mouth": 684, "exact": 685, "symbol": 686, "die": 687, "least": 688,
237
- "trouble": 689, "shout": 690, "except": 691, "wrote": 692, "seed": 693, "tone": 694,
238
- "join": 695, "suggest": 696, "clean": 697, "break": 698, "lady": 699, "yard": 700,
239
- "rise": 701, "bad": 702, "blow": 703, "oil": 704, "blood": 705, "touch": 706,
240
- "grew": 707, "cent": 708, "mix": 709, "team": 710, "wire": 711, "cost": 712,
241
- "lost": 713, "brown": 714, "wear": 715, "garden": 716, "equal": 717, "sent": 718,
242
- "choose": 719, "fell": 720, "fit": 721, "flow": 722, "fair": 723, "bank": 724,
243
- "collect": 725, "save": 726, "control": 727, "decimal": 728, "ear": 729, "else": 730,
244
- "quite": 731, "broke": 732, "case": 733, "middle": 734, "kill": 735, "son": 736,
245
- "lake": 737, "moment": 738, "scale": 739, "loud": 740, "spring": 741, "observe": 742,
246
- "child": 743, "straight": 744, "consonant": 745, "nation": 746, "dictionary": 747,
247
- "paragraph": 748, "parent": 749, "shore": 750, "division": 751, "sheet": 752,
248
- "substance": 753, "favor": 754, "connect": 755, "post": 756, "spend": 757,
249
- "chord": 758, "fat": 759, "glad": 760, "original": 761, "share": 762, "station": 763,
250
- "dad": 764, "bread": 765, "charge": 766, "proper": 767, "bar": 768, "offer": 769,
251
- "segment": 770, "slave": 771, "duck": 772, "instant": 773, "market": 774,
252
- "degree": 775, "populate": 776, "chick": 777, "dear": 778, "enemy": 779, "reply": 780,
253
- "drink": 781, "occur": 782, "support": 783, "speech": 784, "nature": 785, "range": 786,
254
- "steam": 787, "motion": 788, "path": 789, "liquid": 790, "log": 791, "meant": 792,
255
- "quotient": 793, "teeth": 794, "shell": 795, "neck": 796, "oxygen": 797, "sugar": 798,
256
- "death": 799, "pretty": 800, "skill": 801, "women": 802, "season": 803, "solution": 804,
257
- "magnet": 805, "silver": 806, "thank": 807, "branch": 808, "match": 809, "suffix": 810,
258
- "especially": 811, "fig": 812, "afraid": 813, "huge": 814, "sister": 815, "steel": 816,
259
- "discuss": 817, "forward": 818, "similar": 819, "guide": 820, "experience": 821,
260
- "score": 822, "apple": 823, "bought": 824, "led": 825, "pitch": 826, "coat": 827,
261
- "mass": 828, "card": 829, "band": 830, "rope": 831, "slip": 832, "win": 833,
262
- "dream": 834, "evening": 835, "condition": 836, "feed": 837, "tool": 838, "total": 839,
263
- "basic": 840, "smell": 841, "valley": 842, "nor": 843, "double": 844, "seat": 845,
264
- "continue": 846, "block": 847, "chart": 848, "hat": 849, "sell": 850, "success": 851,
265
- "company": 852, "subtract": 853, "event": 854, "particular": 855, "deal": 856,
266
- "swim": 857, "term": 858, "opposite": 859, "wife": 860, "shoe": 861, "shoulder": 862,
267
- "spread": 863, "arrange": 864, "camp": 865, "invent": 866, "cotton": 867, "born": 868,
268
- "determine": 869, "quart": 870, "nine": 871, "truck": 872, "noise": 873, "level": 874,
269
- "chance": 875, "gather": 876, "shop": 877, "stretch": 878, "throw": 879, "shine": 880,
270
- "property": 881, "column": 882, "molecule": 883, "select": 884, "wrong": 885,
271
- "gray": 886, "repeat": 887, "require": 888, "broad": 889, "prepare": 890, "salt": 891,
272
- "nose": 892, "plural": 893, "anger": 894, "claim": 895, "continent": 896, "mom": 897,
273
- "dad": 898, "bread": 899, "original": 900, "station": 901, "radio": 902, "art": 903,
274
- "object": 904, "general": 905, "ice": 906, "engine": 907, "port": 908, "window": 909,
275
- "job": 910, "melody": 911, "trade": 912, "rail": 913, "trip": 914, "seed": 915,
276
- "tone": 916, "clean": 917, "lady": 918, "yard": 919, "blow": 920, "oil": 921,
277
- "cent": 922, "cost": 923, "brown": 924, "garden": 925, "bank": 926, "decimal": 927,
278
- "division": 928, "favor": 929, "original": 930, "proper": 931, "enemy": 932,
279
- "solution": 933, "thank": 934, "huge": 935, "discuss": 936, "guide": 937, "bought": 938,
280
- "mass": 939, "rope": 940, "evening": 941, "smell": 942, "nor": 943, "sell": 944,
281
- "subtract": 945, "swim": 946, "opposite": 947, "shoe": 948, "spread": 949, "born": 950,
282
- "noise": 951, "gather": 952, "throw": 953, "column": 954, "wrong": 955, "gray": 956,
283
- "require": 957, "prepare": 958, "plural": 959, "continent": 960, "basic": 961,
284
- "double": 962, "success": 963, "event": 964, "shoulder": 965, "nine": 966,
285
- "property": 967, "broad": 968, "anger": 969, "dad": 970, "rail": 971, "deal": 972,
286
- "level": 973, "stretch": 974, "chance": 975, "determine": 976, "nose": 977,
287
- "steel": 978, "feed": 979, "cotton": 980, "truck": 981, "band": 982, "seat": 983,
288
- "hat": 984, "particular": 985, "shoulder": 986, "claim": 987, "pitch": 988,
289
- "valley": 989, "total": 990, "apple": 991, "select": 992, "repeat": 993, "wife": 994,
290
- "term": 995, "camp": 996, "quart": 997, "shine": 998, "salt": 999, "molecule": 1000,
291
- # 1001-2000 - Less common but still frequent
292
- "temperature": 1001, "finger": 1002, "industry": 1003, "value": 1004, "fight": 1005,
293
- "lie": 1006, "beat": 1007, "excite": 1008, "natural": 1009, "view": 1010,
294
- "sense": 1011, "capital": 1012, "chair": 1013, "danger": 1014, "fruit": 1015,
295
- "rich": 1016, "thick": 1017, "soldier": 1018, "operate": 1019, "practice": 1020,
296
- "difficult": 1021, "doctor": 1022, "please": 1023, "protect": 1024, "noon": 1025,
297
- "crop": 1026, "modern": 1027, "element": 1028, "hit": 1029, "student": 1030,
298
- "corner": 1031, "party": 1032, "supply": 1033, "bone": 1034, "tube": 1035,
299
- "famous": 1036, "dollar": 1037, "stream": 1038, "fear": 1039, "sight": 1040,
300
- "thin": 1041, "triangle": 1042, "planet": 1043, "hurry": 1044, "chief": 1045,
301
- "colony": 1046, "clock": 1047, "mine": 1048, "tie": 1049, "enter": 1050,
302
- "major": 1051, "fresh": 1052, "search": 1053, "send": 1054, "yellow": 1055,
303
- "gun": 1056, "allow": 1057, "print": 1058, "dead": 1059, "spot": 1060,
304
- "desert": 1061, "suit": 1062, "current": 1063, "lift": 1064, "rose": 1065,
305
- "arrive": 1066, "master": 1067, "track": 1068, "locate": 1069, "ring": 1070,
306
- "believe": 1071, "gentle": 1072, "woman": 1073, "captain": 1074, "guess": 1075,
307
- "necessary": 1076, "sharp": 1077, "wing": 1078, "create": 1079, "neighbor": 1080,
308
- "wash": 1081, "bat": 1082, "rather": 1083, "crowd": 1084, "corn": 1085,
309
- "compare": 1086, "poem": 1087, "string": 1088, "bell": 1089, "depend": 1090,
310
- "meat": 1091, "rub": 1092, "tube": 1093, "famous": 1094, "dollar": 1095,
311
- "indicate": 1096, "metal": 1097, "whether": 1098, "push": 1099, "seven": 1100,
1044
+ "bring": 501,
1045
+ "sit": 502,
1046
+ "perhaps": 503,
1047
+ "fill": 504,
1048
+ "east": 505,
1049
+ "weight": 506,
1050
+ "language": 507,
1051
+ "among": 508,
1052
+ "cat": 509,
1053
+ "ball": 510,
1054
+ "human": 511,
1055
+ "doctor": 513,
1056
+ "office": 515,
1057
+ "break": 516,
1058
+ "die": 517,
1059
+ "radio": 518,
1060
+ "speak": 519,
1061
+ "atom": 520,
1062
+ "blood": 521,
1063
+ "felt": 522,
1064
+ "type": 523,
1065
+ "forward": 524,
1066
+ "century": 525,
1067
+ "milk": 526,
1068
+ "corner": 527,
1069
+ "speed": 528,
1070
+ "method": 529,
1071
+ "organ": 530,
1072
+ "pay": 531,
1073
+ "single": 532,
1074
+ "touch": 533,
1075
+ "control": 534,
1076
+ "bottom": 535,
1077
+ "design": 536,
1078
+ "coat": 537,
1079
+ "else": 538,
1080
+ "quite": 539,
1081
+ "broke": 540,
1082
+ "case": 541,
1083
+ "middle": 542,
1084
+ "kill": 543,
1085
+ "son": 544,
1086
+ "lake": 545,
1087
+ "moment": 546,
1088
+ "scale": 547,
1089
+ "loud": 548,
1090
+ "spring": 549,
1091
+ "observe": 550,
1092
+ "child": 551,
1093
+ "straight": 552,
1094
+ "consonant": 553,
1095
+ "nation": 554,
1096
+ "dictionary": 555,
1097
+ "bit": 556,
1098
+ "coast": 557,
1099
+ "copy": 558,
1100
+ "phrase": 559,
1101
+ "silent": 560,
1102
+ "tall": 561,
1103
+ "sand": 562,
1104
+ "soil": 563,
1105
+ "roll": 564,
1106
+ "temperature": 565,
1107
+ "finger": 566,
1108
+ "industry": 567,
1109
+ "value": 568,
1110
+ "fight": 569,
1111
+ "lie": 570,
1112
+ "beat": 571,
1113
+ "excite": 572,
1114
+ "natural": 573,
1115
+ "view": 574,
1116
+ "sense": 575,
1117
+ "capital": 576,
1118
+ "chair": 578,
1119
+ "danger": 579,
1120
+ "fruit": 580,
1121
+ "rich": 581,
1122
+ "thick": 582,
1123
+ "soldier": 583,
1124
+ "process": 584,
1125
+ "operate": 585,
1126
+ "practice": 586,
1127
+ "separate": 587,
1128
+ "difficult": 588,
1129
+ "visit": 589,
1130
+ "spread": 590,
1131
+ "particular": 591,
1132
+ "catch": 592,
1133
+ "square": 593,
1134
+ "reason": 594,
1135
+ "length": 595,
1136
+ "represent": 596,
1137
+ "art": 597,
1138
+ "subject": 598,
1139
+ "region": 599,
1140
+ "vary": 601,
1141
+ "settle": 602,
1142
+ "general": 605,
1143
+ "ice": 606,
1144
+ "matter": 607,
1145
+ "circle": 608,
1146
+ "pair": 609,
1147
+ "include": 610,
1148
+ "divide": 611,
1149
+ "syllable": 612,
1150
+ "grand": 614,
1151
+ "wave": 617,
1152
+ "drop": 618,
1153
+ "heart": 619,
1154
+ "present": 620,
1155
+ "heavy": 621,
1156
+ "dance": 622,
1157
+ "engine": 623,
1158
+ "position": 624,
1159
+ "arm": 625,
1160
+ "wide": 626,
1161
+ "sail": 627,
1162
+ "material": 628,
1163
+ "fraction": 629,
1164
+ "forest": 630,
1165
+ "race": 632,
1166
+ "window": 633,
1167
+ "store": 634,
1168
+ "summer": 635,
1169
+ "train": 636,
1170
+ "prove": 638,
1171
+ "lone": 639,
1172
+ "leg": 640,
1173
+ "exercise": 641,
1174
+ "wall": 642,
1175
+ "mount": 644,
1176
+ "wish": 645,
1177
+ "sky": 646,
1178
+ "board": 647,
1179
+ "joy": 648,
1180
+ "winter": 649,
1181
+ "sat": 650,
1182
+ "written": 651,
1183
+ "wild": 652,
1184
+ "instrument": 653,
1185
+ "kept": 654,
1186
+ "glass": 655,
1187
+ "grass": 656,
1188
+ "cow": 657,
1189
+ "job": 658,
1190
+ "edge": 659,
1191
+ "sign": 660,
1192
+ "past": 662,
1193
+ "soft": 663,
1194
+ "fun": 664,
1195
+ "bright": 665,
1196
+ "gas": 666,
1197
+ "weather": 667,
1198
+ "month": 668,
1199
+ "million": 669,
1200
+ "bear": 670,
1201
+ "finish": 671,
1202
+ "happy": 672,
1203
+ "hope": 673,
1204
+ "flower": 674,
1205
+ "clothe": 675,
1206
+ "strange": 676,
1207
+ "gone": 677,
1208
+ "trade": 678,
1209
+ "melody": 679,
1210
+ "trip": 680,
1211
+ "receive": 682,
1212
+ "row": 683,
1213
+ "mouth": 684,
1214
+ "exact": 685,
1215
+ "symbol": 686,
1216
+ "least": 688,
1217
+ "trouble": 689,
1218
+ "shout": 690,
1219
+ "except": 691,
1220
+ "wrote": 692,
1221
+ "seed": 693,
1222
+ "tone": 694,
1223
+ "join": 695,
1224
+ "suggest": 696,
1225
+ "clean": 697,
1226
+ "lady": 699,
1227
+ "yard": 700,
1228
+ "rise": 701,
1229
+ "bad": 702,
1230
+ "blow": 703,
1231
+ "oil": 704,
1232
+ "grew": 707,
1233
+ "cent": 708,
1234
+ "mix": 709,
1235
+ "team": 710,
1236
+ "wire": 711,
1237
+ "cost": 712,
1238
+ "lost": 713,
1239
+ "brown": 714,
1240
+ "wear": 715,
1241
+ "garden": 716,
1242
+ "equal": 717,
1243
+ "sent": 718,
1244
+ "choose": 719,
1245
+ "fell": 720,
1246
+ "fit": 721,
1247
+ "flow": 722,
1248
+ "fair": 723,
1249
+ "bank": 724,
1250
+ "collect": 725,
1251
+ "save": 726,
1252
+ "decimal": 728,
1253
+ "ear": 729,
1254
+ "paragraph": 748,
1255
+ "parent": 749,
1256
+ "shore": 750,
1257
+ "division": 751,
1258
+ "sheet": 752,
1259
+ "substance": 753,
1260
+ "favor": 754,
1261
+ "connect": 755,
1262
+ "post": 756,
1263
+ "spend": 757,
1264
+ "chord": 758,
1265
+ "fat": 759,
1266
+ "glad": 760,
1267
+ "original": 761,
1268
+ "share": 762,
1269
+ "station": 763,
1270
+ "dad": 764,
1271
+ "bread": 765,
1272
+ "charge": 766,
1273
+ "proper": 767,
1274
+ "bar": 768,
1275
+ "offer": 769,
1276
+ "segment": 770,
1277
+ "slave": 771,
1278
+ "duck": 772,
1279
+ "instant": 773,
1280
+ "market": 774,
1281
+ "degree": 775,
1282
+ "populate": 776,
1283
+ "chick": 777,
1284
+ "dear": 778,
1285
+ "enemy": 779,
1286
+ "reply": 780,
1287
+ "drink": 781,
1288
+ "occur": 782,
1289
+ "support": 783,
1290
+ "speech": 784,
1291
+ "nature": 785,
1292
+ "range": 786,
1293
+ "steam": 787,
1294
+ "motion": 788,
1295
+ "path": 789,
1296
+ "liquid": 790,
1297
+ "log": 791,
1298
+ "meant": 792,
1299
+ "quotient": 793,
1300
+ "teeth": 794,
1301
+ "shell": 795,
1302
+ "neck": 796,
1303
+ "oxygen": 797,
1304
+ "sugar": 798,
1305
+ "death": 799,
1306
+ "pretty": 800,
1307
+ "skill": 801,
1308
+ "women": 802,
1309
+ "season": 803,
1310
+ "solution": 804,
1311
+ "magnet": 805,
1312
+ "silver": 806,
1313
+ "thank": 807,
1314
+ "branch": 808,
1315
+ "match": 809,
1316
+ "suffix": 810,
1317
+ "especially": 811,
1318
+ "fig": 812,
1319
+ "afraid": 813,
1320
+ "huge": 814,
1321
+ "sister": 815,
1322
+ "steel": 816,
1323
+ "discuss": 817,
1324
+ "similar": 819,
1325
+ "guide": 820,
1326
+ "experience": 821,
1327
+ "score": 822,
1328
+ "apple": 823,
1329
+ "bought": 824,
1330
+ "led": 825,
1331
+ "pitch": 826,
1332
+ "mass": 828,
1333
+ "card": 829,
1334
+ "band": 830,
1335
+ "rope": 831,
1336
+ "slip": 832,
1337
+ "win": 833,
1338
+ "dream": 834,
1339
+ "evening": 835,
1340
+ "condition": 836,
1341
+ "feed": 837,
1342
+ "tool": 838,
1343
+ "total": 839,
1344
+ "basic": 840,
1345
+ "smell": 841,
1346
+ "valley": 842,
1347
+ "nor": 843,
1348
+ "double": 844,
1349
+ "seat": 845,
1350
+ "continue": 846,
1351
+ "block": 847,
1352
+ "chart": 848,
1353
+ "hat": 849,
1354
+ "sell": 850,
1355
+ "success": 851,
1356
+ "company": 852,
1357
+ "subtract": 853,
1358
+ "event": 854,
1359
+ "deal": 856,
1360
+ "swim": 857,
1361
+ "term": 858,
1362
+ "opposite": 859,
1363
+ "wife": 860,
1364
+ "shoe": 861,
1365
+ "shoulder": 862,
1366
+ "arrange": 864,
1367
+ "camp": 865,
1368
+ "invent": 866,
1369
+ "cotton": 867,
1370
+ "born": 868,
1371
+ "determine": 869,
1372
+ "quart": 870,
1373
+ "nine": 871,
1374
+ "truck": 872,
1375
+ "noise": 873,
1376
+ "level": 874,
1377
+ "chance": 875,
1378
+ "gather": 876,
1379
+ "shop": 877,
1380
+ "stretch": 878,
1381
+ "throw": 879,
1382
+ "shine": 880,
1383
+ "property": 881,
1384
+ "column": 882,
1385
+ "molecule": 883,
1386
+ "select": 884,
1387
+ "wrong": 885,
1388
+ "gray": 886,
1389
+ "repeat": 887,
1390
+ "require": 888,
1391
+ "broad": 889,
1392
+ "prepare": 890,
1393
+ "salt": 891,
1394
+ "nose": 892,
1395
+ "plural": 893,
1396
+ "anger": 894,
1397
+ "claim": 895,
1398
+ "continent": 896,
1399
+ "mom": 897,
1400
+ "rail": 913,
1401
+ "please": 1023,
1402
+ "protect": 1024,
1403
+ "noon": 1025,
1404
+ "crop": 1026,
1405
+ "modern": 1027,
1406
+ "element": 1028,
1407
+ "hit": 1029,
1408
+ "student": 1030,
1409
+ "party": 1032,
1410
+ "supply": 1033,
1411
+ "bone": 1034,
1412
+ "tube": 1035,
1413
+ "famous": 1036,
1414
+ "dollar": 1037,
1415
+ "stream": 1038,
1416
+ "fear": 1039,
1417
+ "sight": 1040,
1418
+ "thin": 1041,
1419
+ "triangle": 1042,
1420
+ "planet": 1043,
1421
+ "hurry": 1044,
1422
+ "chief": 1045,
1423
+ "colony": 1046,
1424
+ "clock": 1047,
1425
+ "mine": 1048,
1426
+ "tie": 1049,
1427
+ "enter": 1050,
1428
+ "major": 1051,
1429
+ "fresh": 1052,
1430
+ "search": 1053,
1431
+ "send": 1054,
1432
+ "yellow": 1055,
1433
+ "gun": 1056,
1434
+ "allow": 1057,
1435
+ "print": 1058,
1436
+ "dead": 1059,
1437
+ "spot": 1060,
1438
+ "desert": 1061,
1439
+ "suit": 1062,
1440
+ "current": 1063,
1441
+ "lift": 1064,
1442
+ "rose": 1065,
1443
+ "arrive": 1066,
1444
+ "master": 1067,
1445
+ "track": 1068,
1446
+ "locate": 1069,
1447
+ "ring": 1070,
1448
+ "believe": 1071,
1449
+ "gentle": 1072,
1450
+ "woman": 1073,
1451
+ "captain": 1074,
1452
+ "guess": 1075,
1453
+ "necessary": 1076,
1454
+ "sharp": 1077,
1455
+ "wing": 1078,
1456
+ "create": 1079,
1457
+ "neighbor": 1080,
1458
+ "wash": 1081,
1459
+ "bat": 1082,
1460
+ "rather": 1083,
1461
+ "crowd": 1084,
1462
+ "corn": 1085,
1463
+ "compare": 1086,
1464
+ "poem": 1087,
1465
+ "string": 1088,
1466
+ "bell": 1089,
1467
+ "depend": 1090,
1468
+ "meat": 1091,
1469
+ "rub": 1092,
1470
+ "indicate": 1096,
1471
+ "metal": 1097,
1472
+ "whether": 1098,
1473
+ "push": 1099,
1474
+ "seven": 1100,
312
1475
  # Additional common words 1101-5000
313
- "village": 1101, "meet": 1102, "root": 1103, "buy": 1104, "raise": 1105,
314
- "solve": 1106, "understand": 1107, "member": 1108, "syllable": 1109, "second": 1110,
315
- "blue": 1111, "describe": 1112, "develop": 1113, "ocean": 1114, "electric": 1115,
316
- "expect": 1116, "bone": 1117, "rail": 1118, "imagine": 1119, "provide": 1120,
317
- "agree": 1121, "thus": 1122, "capital": 1123, "chair": 1124, "danger": 1125,
318
- "fruit": 1126, "thick": 1127, "soldier": 1128, "process": 1129, "operate": 1130,
319
- "difficult": 1131, "visit": 1132, "separate": 1133, "particular": 1134, "catch": 1135,
320
- "square": 1136, "reason": 1137, "length": 1138, "represent": 1139, "art": 1140,
321
- # Continue with progressively less common words...
1476
+ "village": 1101,
1477
+ "meet": 1102,
1478
+ "root": 1103,
1479
+ "buy": 1104,
1480
+ "raise": 1105,
1481
+ "solve": 1106,
1482
+ "understand": 1107,
1483
+ "member": 1108,
1484
+ "describe": 1112,
1485
+ "ocean": 1114,
1486
+ "electric": 1115,
1487
+ "expect": 1116,
1488
+ "imagine": 1119,
1489
+ "provide": 1120,
1490
+ "agree": 1121,
1491
+ "thus": 1122,
322
1492
  # For brevity, jumping to approximate ranks for less common words
323
- "political": 1500, "social": 1501, "business": 1502, "service": 1503,
324
- "attention": 1504, "international": 1505, "various": 1506, "community": 1507,
325
- "national": 1508, "american": 1509, "president": 1510, "available": 1511,
326
- "information": 1512, "development": 1513, "question": 1514, "different": 1515,
327
- "important": 1516, "education": 1517, "director": 1518, "economic": 1519,
328
- "evidence": 1520, "management": 1521, "hospital": 1522, "personal": 1523,
329
- "understand": 1524, "director": 1525, "professional": 1526, "performance": 1527,
330
- "individual": 1528, "organization": 1529, "structure": 1530, "responsibility": 1531,
331
- "technology": 1532, "democratic": 1533, "relationship": 1534, "environmental": 1535,
332
- "significantly": 1536, "particularly": 1537, "approximately": 1538, "ultimately": 1539,
333
- "comprehensive": 1540, "substantial": 1541, "fundamental": 1542, "analysis": 1543,
334
- "investigation": 1544, "comprehensive": 1545, "demonstrate": 1546, "theoretical": 1547,
335
- "significant": 1548, "hypothesis": 1549, "empirical": 1550, "methodology": 1551,
336
- "framework": 1552, "implications": 1553, "phenomena": 1554, "parameters": 1555,
337
- "correlation": 1556, "variables": 1557, "statistical": 1558, "preliminary": 1559,
1493
+ "political": 1500,
1494
+ "social": 1501,
1495
+ "business": 1502,
1496
+ "service": 1503,
1497
+ "attention": 1504,
1498
+ "international": 1505,
1499
+ "various": 1506,
1500
+ "community": 1507,
1501
+ "national": 1508,
1502
+ "american": 1509,
1503
+ "president": 1510,
1504
+ "available": 1511,
1505
+ "information": 1512,
1506
+ "development": 1513,
1507
+ "different": 1515,
1508
+ "important": 1516,
1509
+ "education": 1517,
1510
+ "director": 1518,
1511
+ "economic": 1519,
1512
+ "evidence": 1520,
1513
+ "management": 1521,
1514
+ "hospital": 1522,
1515
+ "personal": 1523,
1516
+ "professional": 1526,
1517
+ "performance": 1527,
1518
+ "individual": 1528,
1519
+ "organization": 1529,
1520
+ "structure": 1530,
1521
+ "responsibility": 1531,
1522
+ "technology": 1532,
1523
+ "democratic": 1533,
1524
+ "relationship": 1534,
1525
+ "environmental": 1535,
1526
+ "significantly": 1536,
1527
+ "particularly": 1537,
1528
+ "approximately": 1538,
1529
+ "ultimately": 1539,
1530
+ "comprehensive": 1540,
1531
+ "substantial": 1541,
1532
+ "fundamental": 1542,
1533
+ "analysis": 1543,
1534
+ "investigation": 1544,
1535
+ "demonstrate": 1546,
1536
+ "theoretical": 1547,
1537
+ "significant": 1548,
1538
+ "hypothesis": 1549,
1539
+ "empirical": 1550,
1540
+ "methodology": 1551,
1541
+ "framework": 1552,
1542
+ "implications": 1553,
1543
+ "phenomena": 1554,
1544
+ "parameters": 1555,
1545
+ "correlation": 1556,
1546
+ "variables": 1557,
1547
+ "statistical": 1558,
1548
+ "preliminary": 1559,
338
1549
  }
339
1550
 
340
1551
 
@@ -360,11 +1571,11 @@ def _tokenize_for_frequency_analysis(text: str) -> list[str]:
360
1571
  raw_tokens = text_lower.split()
361
1572
 
362
1573
  # Comprehensive punctuation set
363
- PUNCTUATION = set(".,!?;:'\"()[]{}/-—–…*&@#$%^~`\\|<>«»„\"\"''‚'")
1574
+ punctuation_chars = set(".,!?;:'\"()[]{}/-—–…*&@#$%^~`\\|<>«»„\"\"''‚'")
364
1575
 
365
1576
  tokens = []
366
1577
  for token in raw_tokens:
367
- clean_token = token.strip("".join(PUNCTUATION))
1578
+ clean_token = token.strip("".join(punctuation_chars))
368
1579
  if clean_token:
369
1580
  tokens.append(clean_token)
370
1581
 
@@ -391,6 +1602,7 @@ def compute_word_frequency_sophistication(
391
1602
  frequency_corpus: str = "coca",
392
1603
  rare_threshold: int = 10000,
393
1604
  common_threshold: int = 1000,
1605
+ chunk_size: int = 1000,
394
1606
  ) -> WordFrequencySophisticationResult:
395
1607
  """
396
1608
  Compute word frequency sophistication metrics.
@@ -477,9 +1689,7 @@ def compute_word_frequency_sophistication(
477
1689
  """
478
1690
  # Validate corpus parameter
479
1691
  if frequency_corpus != "coca":
480
- raise ValueError(
481
- f"Only 'coca' corpus is currently supported, got '{frequency_corpus}'"
482
- )
1692
+ raise ValueError(f"Only 'coca' corpus is currently supported, got '{frequency_corpus}'")
483
1693
 
484
1694
  # Load frequency dictionary
485
1695
  frequency_dict = COCA_FREQUENCY_RANKS
@@ -534,9 +1744,7 @@ def compute_word_frequency_sophistication(
534
1744
  "rare": sum(1 for r in word_ranks if 10000 < r <= 20000),
535
1745
  "very_rare": sum(1 for r in word_ranks if r > 20000),
536
1746
  }
537
- frequency_band_distribution = {
538
- band: count / total_words for band, count in band_counts.items()
539
- }
1747
+ frequency_band_distribution = {band: count / total_words for band, count in band_counts.items()}
540
1748
 
541
1749
  # Find rarest and most common words (top 10 each, deduplicated)
542
1750
  word_rank_pairs = list(zip(tokens, word_ranks))
@@ -554,6 +1762,14 @@ def compute_word_frequency_sophistication(
554
1762
  sorted_by_common = sorted(unique_pairs.items(), key=lambda x: x[1])
555
1763
  most_common_words = [(word, float(rank)) for word, rank in sorted_by_common[:10]]
556
1764
 
1765
+ # Create single-value distributions (analysis is done on full text)
1766
+ mean_frequency_rank_dist = make_distribution([mean_rank])
1767
+ median_frequency_rank_dist = make_distribution([median_rank])
1768
+ rare_word_ratio_dist = make_distribution([rare_word_ratio])
1769
+ common_word_ratio_dist = make_distribution([common_word_ratio])
1770
+ academic_word_ratio_dist = make_distribution([academic_word_ratio])
1771
+ advanced_word_ratio_dist = make_distribution([advanced_word_ratio])
1772
+
557
1773
  # Metadata
558
1774
  metadata = {
559
1775
  "frequency_corpus": frequency_corpus,
@@ -577,5 +1793,13 @@ def compute_word_frequency_sophistication(
577
1793
  frequency_band_distribution=frequency_band_distribution,
578
1794
  rarest_words=rarest_words,
579
1795
  most_common_words=most_common_words,
1796
+ mean_frequency_rank_dist=mean_frequency_rank_dist,
1797
+ median_frequency_rank_dist=median_frequency_rank_dist,
1798
+ rare_word_ratio_dist=rare_word_ratio_dist,
1799
+ common_word_ratio_dist=common_word_ratio_dist,
1800
+ academic_word_ratio_dist=academic_word_ratio_dist,
1801
+ advanced_word_ratio_dist=advanced_word_ratio_dist,
1802
+ chunk_size=chunk_size,
1803
+ chunk_count=1, # Single pass analysis
580
1804
  metadata=metadata,
581
1805
  )