pystylometry 0.1.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. pystylometry/__init__.py +1 -2
  2. pystylometry/_normalize.py +277 -0
  3. pystylometry/_types.py +1224 -2
  4. pystylometry/_utils.py +4 -0
  5. pystylometry/authorship/__init__.py +4 -0
  6. pystylometry/authorship/additional_methods.py +100 -0
  7. pystylometry/character/__init__.py +15 -0
  8. pystylometry/character/character_metrics.py +301 -0
  9. pystylometry/lexical/__init__.py +13 -6
  10. pystylometry/lexical/advanced_diversity.py +641 -0
  11. pystylometry/lexical/function_words.py +391 -0
  12. pystylometry/lexical/hapax.py +154 -7
  13. pystylometry/lexical/mtld.py +83 -7
  14. pystylometry/lexical/ttr.py +83 -0
  15. pystylometry/lexical/word_frequency_sophistication.py +581 -0
  16. pystylometry/lexical/yule.py +34 -7
  17. pystylometry/ngrams/__init__.py +2 -0
  18. pystylometry/ngrams/extended_ngrams.py +235 -0
  19. pystylometry/prosody/__init__.py +12 -0
  20. pystylometry/prosody/rhythm_prosody.py +53 -0
  21. pystylometry/readability/__init__.py +12 -0
  22. pystylometry/readability/additional_formulas.py +985 -0
  23. pystylometry/readability/ari.py +93 -17
  24. pystylometry/readability/coleman_liau.py +102 -9
  25. pystylometry/readability/complex_words.py +531 -0
  26. pystylometry/readability/flesch.py +59 -14
  27. pystylometry/readability/gunning_fog.py +194 -25
  28. pystylometry/readability/smog.py +31 -14
  29. pystylometry/readability/syllables.py +137 -30
  30. pystylometry/stylistic/__init__.py +20 -0
  31. pystylometry/stylistic/cohesion_coherence.py +45 -0
  32. pystylometry/stylistic/genre_register.py +45 -0
  33. pystylometry/stylistic/markers.py +131 -0
  34. pystylometry/stylistic/vocabulary_overlap.py +47 -0
  35. pystylometry/syntactic/__init__.py +4 -0
  36. pystylometry/syntactic/advanced_syntactic.py +432 -0
  37. pystylometry/syntactic/pos_ratios.py +104 -13
  38. pystylometry/syntactic/sentence_stats.py +57 -13
  39. pystylometry/syntactic/sentence_types.py +470 -0
  40. {pystylometry-0.1.0.dist-info → pystylometry-1.0.0.dist-info}/METADATA +49 -12
  41. pystylometry-1.0.0.dist-info/RECORD +46 -0
  42. {pystylometry-0.1.0.dist-info → pystylometry-1.0.0.dist-info}/WHEEL +1 -1
  43. pystylometry-0.1.0.dist-info/RECORD +0 -26
@@ -0,0 +1,985 @@
1
+ """Additional readability formulas.
2
+
3
+ This module provides additional readability metrics beyond the core formulas
4
+ (Flesch, SMOG, Gunning Fog, Coleman-Liau, ARI). These formulas offer alternative
5
+ approaches to measuring text difficulty and are valuable for cross-validation
6
+ and comprehensive readability assessment.
7
+
8
+ Related GitHub Issue:
9
+ #16 - Additional Readability Formulas
10
+ https://github.com/craigtrim/pystylometry/issues/16
11
+
12
+ Formulas implemented:
13
+ - Dale-Chall: Based on list of 3000 familiar words
14
+ - Linsear Write: Developed for technical writing assessment
15
+ - Fry Readability Graph: Visual graph-based assessment
16
+ - FORCAST: Military formula using only single-syllable words
17
+ - Powers-Sumner-Kearl: Recalibrated Flesch for primary grades
18
+
19
+ References:
20
+ Dale, E., & Chall, J. S. (1948). A formula for predicting readability.
21
+ Chall, J. S., & Dale, E. (1995). Readability revisited: The new Dale-Chall
22
+ readability formula. Brookline Books.
23
+ Klare, G. R. (1974-1975). Assessing readability. Reading Research Quarterly.
24
+ Fry, E. (1968). A readability formula that saves time. Journal of Reading.
25
+ Caylor, J. S., et al. (1973). Methodologies for determining reading requirements
26
+ of military occupational specialties. Human Resources Research Organization.
27
+ Powers, R. D., Sumner, W. A., & Kearl, B. E. (1958). A recalculation of four
28
+ adult readability formulas. Journal of Educational Psychology.
29
+ """
30
+
31
+ from .._normalize import normalize_for_readability
32
+ from .._types import (
33
+ DaleChallResult,
34
+ FORCASTResult,
35
+ FryResult,
36
+ LinsearWriteResult,
37
+ PowersSumnerKearlResult,
38
+ )
39
+ from .._utils import split_sentences, tokenize
40
+ from .syllables import count_syllables
41
+
42
+
43
+ # Dale-Chall List of Familiar Words (subset of ~1200 words)
44
+ # GitHub Issue #16: https://github.com/craigtrim/pystylometry/issues/16
45
+ # Full Dale-Chall list has 3000 words that 80% of 4th graders understand.
46
+ # This is a representative subset covering most common everyday words.
47
+ DALE_CHALL_FAMILIAR_WORDS = {
48
+ # Articles, pronouns, determiners
49
+ "a", "an", "the", "this", "that", "these", "those", "some", "any", "all",
50
+ "each", "every", "both", "few", "many", "much", "more", "most", "other",
51
+ "another", "such", "what", "which", "who", "whom", "whose", "whoever",
52
+ "i", "me", "my", "mine", "myself", "we", "us", "our", "ours", "ourselves",
53
+ "you", "your", "yours", "yourself", "yourselves",
54
+ "he", "him", "his", "himself", "she", "her", "hers", "herself",
55
+ "it", "its", "itself", "they", "them", "their", "theirs", "themselves",
56
+ "one", "ones", "someone", "somebody", "something", "anyone", "anybody", "anything",
57
+ "everyone", "everybody", "everything", "no", "none", "nobody", "nothing",
58
+
59
+ # Conjunctions and prepositions
60
+ "and", "or", "but", "if", "when", "where", "why", "how", "because", "so",
61
+ "for", "nor", "yet", "after", "before", "while", "since", "until", "unless",
62
+ "though", "although", "whether", "than", "as", "like",
63
+ "of", "to", "in", "on", "at", "by", "with", "from", "about", "into",
64
+ "through", "over", "under", "above", "below", "between", "among", "against",
65
+ "during", "without", "within", "along", "across", "behind", "beside", "near",
66
+ "off", "out", "up", "down", "around", "past", "toward", "upon",
67
+
68
+ # Common verbs (base, past, -ing, -ed forms included)
69
+ "be", "am", "is", "are", "was", "were", "been", "being",
70
+ "have", "has", "had", "having", "do", "does", "did", "doing", "done",
71
+ "will", "would", "shall", "should", "may", "might", "must", "can", "could",
72
+ "go", "goes", "went", "gone", "going", "come", "comes", "came", "coming",
73
+ "make", "makes", "made", "making", "get", "gets", "got", "getting", "gotten",
74
+ "know", "knows", "knew", "known", "knowing",
75
+ "think", "thinks", "thought", "thinking",
76
+ "see", "sees", "saw", "seen", "seeing", "look", "looks", "looked", "looking",
77
+ "take", "takes", "took", "taken", "taking", "give", "gives", "gave", "given", "giving",
78
+ "find", "finds", "found", "finding", "tell", "tells", "told", "telling",
79
+ "ask", "asks", "asked", "asking", "work", "works", "worked", "working",
80
+ "seem", "seems", "seemed", "seeming", "feel", "feels", "felt", "feeling",
81
+ "try", "tries", "tried", "trying", "leave", "leaves", "left", "leaving",
82
+ "call", "calls", "called", "calling", "use", "uses", "used", "using",
83
+ "want", "wants", "wanted", "wanting", "need", "needs", "needed", "needing",
84
+ "say", "says", "said", "saying", "talk", "talks", "talked", "talking",
85
+ "turn", "turns", "turned", "turning", "run", "runs", "ran", "running",
86
+ "move", "moves", "moved", "moving", "live", "lives", "lived", "living",
87
+ "believe", "believes", "believed", "believing",
88
+ "hold", "holds", "held", "holding", "bring", "brings", "brought", "bringing",
89
+ "happen", "happens", "happened", "happening",
90
+ "write", "writes", "wrote", "written", "writing",
91
+ "sit", "sits", "sat", "sitting", "stand", "stands", "stood", "standing",
92
+ "hear", "hears", "heard", "hearing", "let", "lets", "letting",
93
+ "help", "helps", "helped", "helping", "show", "shows", "showed", "shown", "showing",
94
+ "play", "plays", "played", "playing", "read", "reads", "reading",
95
+ "change", "changes", "changed", "changing", "keep", "keeps", "kept", "keeping",
96
+ "start", "starts", "started", "starting", "stop", "stops", "stopped", "stopping",
97
+ "learn", "learns", "learned", "learning", "grow", "grows", "grew", "grown", "growing",
98
+ "open", "opens", "opened", "opening", "close", "closes", "closed", "closing",
99
+ "walk", "walks", "walked", "walking", "win", "wins", "won", "winning",
100
+ "begin", "begins", "began", "begun", "beginning", "end", "ends", "ended", "ending",
101
+ "lose", "loses", "lost", "losing", "send", "sends", "sent", "sending",
102
+ "buy", "buys", "bought", "buying", "pay", "pays", "paid", "paying",
103
+ "eat", "eats", "ate", "eaten", "eating", "drink", "drinks", "drank", "drinking",
104
+ "sleep", "sleeps", "slept", "sleeping", "wake", "wakes", "woke", "waking",
105
+ "sing", "sings", "sang", "sung", "singing", "dance", "dances", "danced", "dancing",
106
+ "wait", "waits", "waited", "waiting", "stay", "stays", "stayed", "staying",
107
+ "fly", "flies", "flew", "flown", "flying", "fall", "falls", "fell", "fallen", "falling",
108
+ "cut", "cuts", "cutting", "break", "breaks", "broke", "broken", "breaking",
109
+ "watch", "watches", "watched", "watching", "listen", "listens", "listened", "listening",
110
+ "remember", "remembers", "remembered", "remembering",
111
+ "forget", "forgets", "forgot", "forgotten", "forgetting",
112
+ "meet", "meets", "met", "meeting", "follow", "follows", "followed", "following",
113
+ "carry", "carries", "carried", "carrying", "catch", "catches", "caught", "catching",
114
+ "draw", "draws", "drew", "drawn", "drawing", "drive", "drives", "drove", "driven", "driving",
115
+ "ride", "rides", "rode", "ridden", "riding", "wear", "wears", "wore", "worn", "wearing",
116
+ "pull", "pulls", "pulled", "pulling", "push", "pushes", "pushed", "pushing",
117
+ "throw", "throws", "threw", "thrown", "throwing",
118
+ "reach", "reaches", "reached", "reaching", "pass", "passes", "passed", "passing",
119
+ "shoot", "shoots", "shot", "shooting", "rise", "rises", "rose", "risen", "rising",
120
+ "blow", "blows", "blew", "blown", "blowing", "grow", "grows", "grew", "grown", "growing",
121
+ "hit", "hits", "hitting", "fight", "fights", "fought", "fighting",
122
+ "die", "dies", "died", "dying", "kill", "kills", "killed", "killing",
123
+ "speak", "speaks", "spoke", "spoken", "speaking",
124
+
125
+ # Common nouns
126
+ "time", "times", "year", "years", "day", "days", "week", "weeks",
127
+ "month", "months", "hour", "hours", "minute", "minutes", "second", "seconds",
128
+ "morning", "afternoon", "evening", "night", "today", "yesterday", "tomorrow",
129
+ "people", "person", "man", "men", "woman", "women", "child", "children",
130
+ "boy", "boys", "girl", "girls", "baby", "babies", "friend", "friends",
131
+ "family", "families", "mother", "father", "parent", "parents",
132
+ "brother", "brothers", "sister", "sisters", "son", "daughter",
133
+ "place", "places", "home", "house", "houses", "room", "rooms",
134
+ "school", "schools", "class", "classes", "student", "students", "teacher", "teachers",
135
+ "way", "ways", "thing", "things", "part", "parts", "group", "groups",
136
+ "number", "numbers", "side", "sides", "kind", "kinds", "head", "heads",
137
+ "hand", "hands", "eye", "eyes", "face", "faces", "body", "bodies",
138
+ "foot", "feet", "arm", "arms", "leg", "legs", "ear", "ears", "mouth",
139
+ "water", "food", "air", "land", "earth", "ground", "world",
140
+ "country", "countries", "state", "states", "city", "cities", "town", "towns",
141
+ "name", "names", "word", "words", "line", "lines", "page", "pages",
142
+ "book", "books", "story", "stories", "letter", "letters", "paper", "papers",
143
+ "point", "points", "end", "ends", "top", "bottom", "front", "back",
144
+ "life", "lives", "problem", "problems", "question", "questions", "answer", "answers",
145
+ "work", "works", "job", "jobs", "money", "door", "doors", "window", "windows",
146
+ "car", "cars", "road", "roads", "street", "streets", "tree", "trees",
147
+ "animal", "animals", "bird", "birds", "fish", "dog", "dogs", "cat", "cats",
148
+ "horse", "horses", "sea", "mountain", "mountains", "river", "rivers",
149
+ "sun", "moon", "star", "stars", "sky", "cloud", "clouds", "rain", "snow",
150
+ "wind", "fire", "light", "dark", "sound", "sounds", "color", "colors",
151
+ "white", "black", "red", "blue", "green", "yellow", "brown", "orange",
152
+ "game", "games", "ball", "music", "song", "songs", "picture", "pictures",
153
+ "table", "tables", "chair", "chairs", "bed", "beds", "floor", "wall", "walls",
154
+ "minute", "power", "war", "force", "age", "care", "order", "case",
155
+
156
+ # Common adjectives
157
+ "good", "better", "best", "bad", "worse", "worst",
158
+ "big", "bigger", "biggest", "small", "smaller", "smallest",
159
+ "large", "larger", "largest", "little", "less", "least",
160
+ "long", "longer", "longest", "short", "shorter", "shortest",
161
+ "high", "higher", "highest", "low", "lower", "lowest",
162
+ "old", "older", "oldest", "young", "younger", "youngest", "new", "newer", "newest",
163
+ "great", "greater", "greatest", "important", "right", "left", "own",
164
+ "other", "different", "same", "next", "last", "first", "second", "third",
165
+ "early", "earlier", "earliest", "late", "later", "latest",
166
+ "easy", "easier", "easiest", "hard", "harder", "hardest",
167
+ "hot", "hotter", "hottest", "cold", "colder", "coldest",
168
+ "warm", "warmer", "warmest", "cool", "cooler", "coolest",
169
+ "fast", "faster", "fastest", "slow", "slower", "slowest",
170
+ "strong", "stronger", "strongest", "weak", "weaker", "weakest",
171
+ "happy", "happier", "happiest", "sad", "sadder", "saddest",
172
+ "nice", "nicer", "nicest", "kind", "kinder", "kindest",
173
+ "sure", "free", "full", "whole", "ready", "simple", "clear",
174
+ "real", "true", "certain", "public", "able", "several",
175
+ "open", "closed", "deep", "wide", "bright", "dark", "heavy", "light",
176
+ "clean", "dirty", "wet", "dry", "soft", "hard", "quiet", "loud",
177
+ "quick", "slow", "rich", "poor", "sick", "well", "dead", "alive",
178
+ "empty", "busy", "pretty", "beautiful", "ugly",
179
+
180
+ # Common adverbs
181
+ "very", "too", "so", "more", "most", "less", "least",
182
+ "well", "better", "best", "just", "only", "even", "still",
183
+ "also", "just", "now", "then", "here", "there", "where",
184
+ "how", "when", "why", "not", "never", "always", "often",
185
+ "sometimes", "usually", "ever", "again", "back", "away",
186
+ "together", "once", "twice", "soon", "today", "yesterday", "tomorrow",
187
+ "already", "almost", "enough", "quite", "rather", "really",
188
+ "perhaps", "maybe", "probably", "certainly", "surely",
189
+ "yes", "no", "please", "thank", "sorry",
190
+
191
+ # Numbers
192
+ "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "ten",
193
+ "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen", "twenty",
194
+ "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety",
195
+ "hundred", "thousand", "million",
196
+ "first", "second", "third", "fourth", "fifth", "sixth", "seventh", "eighth", "ninth", "tenth",
197
+
198
+ # Additional common words
199
+ "able", "accept", "across", "act", "add", "afraid", "against", "agree",
200
+ "allow", "alone", "appear", "apple", "area", "arm", "arrive", "art",
201
+ "aunt", "ball", "become", "believe", "belong", "boat", "build",
202
+ "burn", "business", "chair", "chance", "church", "clear", "climb",
203
+ "clothe", "clothes", "company", "contain", "continue", "control",
204
+ "cook", "corner", "cost", "count", "course", "cover", "create",
205
+ "cross", "crowd", "cry", "decide", "depend", "describe", "develop",
206
+ "die", "direction", "discover", "doctor", "double", "drop", "during",
207
+ "edge", "effect", "eight", "either", "else", "enjoy", "enough",
208
+ "enter", "example", "except", "excite", "expect", "explain", "express",
209
+ "fact", "fair", "farm", "fear", "field", "fill", "final", "fine",
210
+ "finger", "finish", "flower", "force", "foreign", "forest", "form",
211
+ "fresh", "front", "garden", "general", "glass", "god", "gold",
212
+ "hang", "hat", "hope", "hot", "idea", "include", "increase",
213
+ "instead", "interest", "island", "join", "laugh", "law", "lead",
214
+ "lie", "lift", "list", "lock", "love", "machine", "mark",
215
+ "matter", "mean", "measure", "member", "mention", "middle", "mile",
216
+ "mind", "miss", "moment", "nation", "natural", "nature", "necessary",
217
+ "neighbor", "notice", "object", "ocean", "offer", "office", "opinion",
218
+ "paint", "pair", "party", "pattern", "period", "pick", "plan",
219
+ "plant", "position", "possible", "pound", "prepare", "present", "president",
220
+ "press", "prince", "print", "probable", "produce", "promise", "proper",
221
+ "protect", "prove", "purpose", "quarter", "queen", "question", "quick",
222
+ "quiet", "race", "raise", "range", "rate", "reason", "receive",
223
+ "record", "region", "remain", "reply", "report", "represent", "require",
224
+ "rest", "result", "return", "roll", "rule", "sail", "salt",
225
+ "save", "science", "season", "seat", "seem", "sell", "sense",
226
+ "sentence", "separate", "serve", "set", "settle", "seven", "shape",
227
+ "share", "ship", "shore", "sign", "silver", "single", "sir",
228
+ "six", "size", "skin", "soldier", "solve", "south", "space",
229
+ "special", "speed", "spell", "spend", "spread", "spring", "square",
230
+ "step", "stone", "straight", "strange", "stream", "strength", "strike",
231
+ "subject", "success", "sudden", "suffer", "suggest", "suit", "summer",
232
+ "supply", "support", "suppose", "surface", "surprise", "sweet", "swim",
233
+ "system", "tail", "taste", "teach", "team", "telephone", "television",
234
+ "temperature", "ten", "test", "thick", "thin", "though", "thousand",
235
+ "three", "tire", "total", "touch", "track", "train", "travel",
236
+ "trip", "trouble", "type", "uncle", "understand", "unit", "universe",
237
+ "value", "various", "view", "village", "visit", "voice", "vote",
238
+ "wagon", "wander", "warm", "wash", "wave", "wealth", "weather",
239
+ "weight", "welcome", "west", "wheel", "wild", "wind", "winter",
240
+ "wish", "wonder", "wood", "yard", "yellow",
241
+ }
242
+
243
+
244
+ def compute_dale_chall(text: str) -> DaleChallResult:
245
+ """
246
+ Compute Dale-Chall Readability Formula.
247
+
248
+ The Dale-Chall formula estimates reading difficulty based on the percentage
249
+ of words that are NOT on a list of 3000 familiar words (words understood
250
+ by 80% of 4th graders). It also considers average sentence length.
251
+
252
+ Related GitHub Issue:
253
+ #16 - Additional Readability Formulas
254
+ https://github.com/craigtrim/pystylometry/issues/16
255
+
256
+ Formula:
257
+ Raw Score = 0.1579 * (difficult_words_pct) + 0.0496 * (avg_sentence_length)
258
+
259
+ If difficult_words_pct > 5%:
260
+ Adjusted Score = Raw Score + 3.6365
261
+
262
+ Grade Level Correspondence:
263
+ 4.9 or lower: Grade 4 and below
264
+ 5.0-5.9: Grades 5-6
265
+ 6.0-6.9: Grades 7-8
266
+ 7.0-7.9: Grades 9-10
267
+ 8.0-8.9: Grades 11-12
268
+ 9.0-9.9: Grades 13-15 (College)
269
+ 10.0+: Grade 16+ (College Graduate)
270
+
271
+ Advantages:
272
+ - Based on empirical word familiarity data
273
+ - Works well for educational materials
274
+ - Well-validated across grade levels
275
+ - Considers both vocabulary and syntax
276
+
277
+ Disadvantages:
278
+ - Requires maintaining 3000-word familiar list
279
+ - List is dated (1948, updated 1995)
280
+ - May not reflect modern vocabulary
281
+ - Doesn't account for concept difficulty
282
+
283
+ Args:
284
+ text: Input text to analyze. Should contain at least one complete
285
+ sentence. Empty text returns NaN values.
286
+
287
+ Returns:
288
+ DaleChallResult containing:
289
+ - dale_chall_score: The Dale-Chall readability score
290
+ - grade_level: Grade range (e.g., "7-8", "College")
291
+ - difficult_word_count: Words not on familiar list
292
+ - difficult_word_ratio: Difficult words / total words
293
+ - avg_sentence_length: Average words per sentence
294
+ - total_words: Total word count
295
+ - metadata: List of difficult words, adjusted score flag, etc.
296
+
297
+ Example:
298
+ >>> result = compute_dale_chall("Sample educational text...")
299
+ >>> print(f"Dale-Chall score: {result.dale_chall_score:.2f}")
300
+ Dale-Chall score: 7.3
301
+ >>> print(f"Grade level: {result.grade_level}")
302
+ Grade level: 7-8
303
+ >>> print(f"Difficult words: {result.difficult_word_ratio * 100:.1f}%")
304
+ Difficult words: 12.4%
305
+
306
+ Note:
307
+ - Case-insensitive word matching
308
+ - Punctuation stripped before word lookup
309
+ - Proper nouns may be flagged as difficult even if well-known
310
+ - Technical/specialized texts score higher than general texts
311
+ """
312
+ # Tokenize and segment
313
+ sentences = split_sentences(text)
314
+ tokens = tokenize(text)
315
+ word_tokens = normalize_for_readability(tokens)
316
+
317
+ if len(sentences) == 0 or len(word_tokens) == 0:
318
+ return DaleChallResult(
319
+ dale_chall_score=float("nan"),
320
+ grade_level="Unknown",
321
+ difficult_word_count=0,
322
+ difficult_word_ratio=float("nan"),
323
+ avg_sentence_length=float("nan"),
324
+ total_words=0,
325
+ metadata={
326
+ "sentence_count": 0,
327
+ "raw_score": float("nan"),
328
+ "adjusted": False,
329
+ "difficult_words_sample": [],
330
+ },
331
+ )
332
+
333
+ # Count difficult words (not in familiar list)
334
+ difficult_words = []
335
+ for word in word_tokens:
336
+ word_lower = word.lower()
337
+ if word_lower not in DALE_CHALL_FAMILIAR_WORDS:
338
+ difficult_words.append(word)
339
+
340
+ difficult_word_count = len(difficult_words)
341
+ difficult_word_ratio = difficult_word_count / len(word_tokens)
342
+ difficult_word_pct = difficult_word_ratio * 100
343
+
344
+ # Calculate average sentence length
345
+ avg_sentence_length = len(word_tokens) / len(sentences)
346
+
347
+ # Calculate raw score
348
+ raw_score = 0.1579 * difficult_word_pct + 0.0496 * avg_sentence_length
349
+
350
+ # Apply adjustment if difficult word % > 5.0
351
+ adjusted = difficult_word_pct > 5.0
352
+ if adjusted:
353
+ dale_chall_score = raw_score + 3.6365
354
+ else:
355
+ dale_chall_score = raw_score
356
+
357
+ # Map score to grade level
358
+ if dale_chall_score < 5.0:
359
+ grade_level = "4 and below"
360
+ elif dale_chall_score < 6.0:
361
+ grade_level = "5-6"
362
+ elif dale_chall_score < 7.0:
363
+ grade_level = "7-8"
364
+ elif dale_chall_score < 8.0:
365
+ grade_level = "9-10"
366
+ elif dale_chall_score < 9.0:
367
+ grade_level = "11-12"
368
+ elif dale_chall_score < 10.0:
369
+ grade_level = "College"
370
+ else:
371
+ grade_level = "College Graduate"
372
+
373
+ # Build metadata
374
+ # Sample up to 20 difficult words for metadata (avoid huge lists)
375
+ difficult_words_sample = list(set(difficult_words))[:20]
376
+
377
+ metadata = {
378
+ "sentence_count": len(sentences),
379
+ "raw_score": raw_score,
380
+ "adjusted": adjusted,
381
+ "difficult_word_pct": difficult_word_pct,
382
+ "difficult_words_sample": difficult_words_sample,
383
+ "familiar_word_list_size": len(DALE_CHALL_FAMILIAR_WORDS),
384
+ }
385
+
386
+ return DaleChallResult(
387
+ dale_chall_score=dale_chall_score,
388
+ grade_level=grade_level,
389
+ difficult_word_count=difficult_word_count,
390
+ difficult_word_ratio=difficult_word_ratio,
391
+ avg_sentence_length=avg_sentence_length,
392
+ total_words=len(word_tokens),
393
+ metadata=metadata,
394
+ )
395
+
396
+
397
+ def compute_linsear_write(text: str) -> LinsearWriteResult:
398
+ """
399
+ Compute Linsear Write Readability Formula.
400
+
401
+ Developed for the U.S. Air Force to assess technical writing, the Linsear
402
+ Write formula classifies words as "easy" (1-2 syllables) or "hard" (3+
403
+ syllables) and uses sentence length to estimate grade level.
404
+
405
+ Related GitHub Issue:
406
+ #16 - Additional Readability Formulas
407
+ https://github.com/craigtrim/pystylometry/issues/16
408
+
409
+ Formula:
410
+ 1. Count "easy" words (1-2 syllables): multiply count by 1
411
+ 2. Count "hard" words (3+ syllables): multiply count by 3
412
+ 3. Divide sum by number of sentences
413
+ 4. If result > 20, divide by 2 to get grade level
414
+ 5. If result <= 20, subtract 2, then divide by 2
415
+
416
+ The formula is optimized for technical writing and works best with
417
+ passages of about 100 words.
418
+
419
+ Advantages:
420
+ - Simple binary classification (easy/hard)
421
+ - Effective for technical documents
422
+ - Fast computation
423
+ - Developed specifically for instructional materials
424
+
425
+ Disadvantages:
426
+ - Less well-known than other formulas
427
+ - Binary word classification is crude
428
+ - May overestimate difficulty of technical terms
429
+ - Limited validation compared to Flesch or Dale-Chall
430
+
431
+ Args:
432
+ text: Input text to analyze. Works best with 100-word samples.
433
+ Empty text returns NaN values.
434
+
435
+ Returns:
436
+ LinsearWriteResult containing:
437
+ - linsear_score: The Linsear Write score
438
+ - grade_level: Corresponding U.S. grade level (integer)
439
+ - easy_word_count: Words with 1-2 syllables
440
+ - hard_word_count: Words with 3+ syllables
441
+ - avg_sentence_length: Average words per sentence
442
+ - metadata: Calculation details, sentence count, etc.
443
+
444
+ Example:
445
+ >>> result = compute_linsear_write("Technical manual text...")
446
+ >>> print(f"Linsear Write score: {result.linsear_score:.2f}")
447
+ Linsear Write score: 11.3
448
+ >>> print(f"Grade level: {result.grade_level}")
449
+ Grade level: 11
450
+ >>> print(f"Easy words: {result.easy_word_count}")
451
+ Easy words: 78
452
+ >>> print(f"Hard words: {result.hard_word_count}")
453
+ Hard words: 22
454
+
455
+ Note:
456
+ - Syllable counting required (use existing syllable module)
457
+ - Punctuation and numbers typically excluded
458
+ - Most accurate with 100-word samples
459
+ - Grade level is rounded to nearest integer
460
+ """
461
+ # Tokenize and segment
462
+ sentences = split_sentences(text)
463
+ tokens = tokenize(text)
464
+ word_tokens = normalize_for_readability(tokens)
465
+
466
+ if len(sentences) == 0 or len(word_tokens) == 0:
467
+ return LinsearWriteResult(
468
+ linsear_score=float("nan"),
469
+ grade_level=0,
470
+ easy_word_count=0,
471
+ hard_word_count=0,
472
+ avg_sentence_length=float("nan"),
473
+ metadata={"sentence_count": 0, "total_words": 0, "raw_score": float("nan")},
474
+ )
475
+
476
+ # Classify words as easy (1-2 syllables) or hard (3+ syllables)
477
+ easy_word_count = 0
478
+ hard_word_count = 0
479
+
480
+ for word in word_tokens:
481
+ syllable_count = count_syllables(word)
482
+ if syllable_count <= 2:
483
+ easy_word_count += 1
484
+ else:
485
+ hard_word_count += 1
486
+
487
+ # Calculate weighted sum
488
+ weighted_sum = (easy_word_count * 1) + (hard_word_count * 3)
489
+
490
+ # Calculate score
491
+ raw_score = weighted_sum / len(sentences)
492
+
493
+ # Convert to grade level
494
+ if raw_score > 20:
495
+ grade_level = round(raw_score / 2)
496
+ else:
497
+ grade_level = round((raw_score - 2) / 2)
498
+
499
+ # Ensure grade level is non-negative
500
+ grade_level = max(0, grade_level)
501
+
502
+ # Calculate average sentence length
503
+ avg_sentence_length = len(word_tokens) / len(sentences)
504
+
505
+ # Build metadata
506
+ metadata = {
507
+ "total_words": len(word_tokens),
508
+ "sentence_count": len(sentences),
509
+ "raw_score": raw_score,
510
+ "weighted_sum": weighted_sum,
511
+ }
512
+
513
+ return LinsearWriteResult(
514
+ linsear_score=raw_score,
515
+ grade_level=grade_level,
516
+ easy_word_count=easy_word_count,
517
+ hard_word_count=hard_word_count,
518
+ avg_sentence_length=avg_sentence_length,
519
+ metadata=metadata,
520
+ )
521
+
522
+
523
+ def compute_fry(text: str) -> FryResult:
524
+ """
525
+ Compute Fry Readability Graph metrics.
526
+
527
+ The Fry Readability Graph plots average sentence length against average
528
+ syllables per 100 words to determine reading difficulty. This implementation
529
+ provides the numerical coordinates and estimated grade level.
530
+
531
+ Related GitHub Issue:
532
+ #16 - Additional Readability Formulas
533
+ https://github.com/craigtrim/pystylometry/issues/16
534
+
535
+ Method:
536
+ 1. Select three 100-word samples from text
537
+ 2. Count average sentence length across samples
538
+ 3. Count average syllables per 100 words across samples
539
+ 4. Plot coordinates on Fry graph (or use numerical approximation)
540
+ 5. Determine grade level from graph zone
541
+
542
+ The original Fry graph has zones corresponding to grade levels 1-17+.
543
+ This implementation uses numerical approximation to estimate grade level.
544
+
545
+ Advantages:
546
+ - Visual/graphical approach (intuitive)
547
+ - Uses two independent dimensions (length & syllables)
548
+ - Well-validated for educational materials
549
+ - Covers wide range of grade levels (1-17+)
550
+
551
+ Disadvantages:
552
+ - Requires exactly 100-word samples (padding/truncation needed)
553
+ - Graph reading can be subjective
554
+ - Less precise than formula-based methods
555
+ - Multiple samples needed for reliability
556
+
557
+ Args:
558
+ text: Input text to analyze. Should contain at least 100 words.
559
+ Shorter texts are padded or return limited results.
560
+
561
+ Returns:
562
+ FryResult containing:
563
+ - avg_sentence_length: Average words per sentence
564
+ - avg_syllables_per_100: Average syllables per 100 words
565
+ - grade_level: Estimated grade level (e.g., "5", "7", "College")
566
+ - graph_zone: Which zone of Fry graph (for validity checking)
567
+ - metadata: Sample details, total sentences, syllables, etc.
568
+
569
+ Example:
570
+ >>> result = compute_fry("Educational text for grade assessment...")
571
+ >>> print(f"Avg sentence length: {result.avg_sentence_length:.1f}")
572
+ Avg sentence length: 14.3
573
+ >>> print(f"Syllables/100 words: {result.avg_syllables_per_100:.1f}")
574
+ Syllables/100 words: 142.7
575
+ >>> print(f"Grade level: {result.grade_level}")
576
+ Grade level: 6
577
+
578
+ Note:
579
+ - Original method uses three 100-word samples
580
+ - Implementation may use single sample or whole text
581
+ - Syllable counting required
582
+ - Grade level estimation uses zone boundaries
583
+ - Some texts fall outside graph zones (marked as invalid)
584
+ """
585
+ # Tokenize and segment
586
+ sentences = split_sentences(text)
587
+ tokens = tokenize(text)
588
+ word_tokens = normalize_for_readability(tokens)
589
+
590
+ if len(sentences) == 0 or len(word_tokens) == 0:
591
+ return FryResult(
592
+ avg_sentence_length=float("nan"),
593
+ avg_syllables_per_100=float("nan"),
594
+ grade_level="Unknown",
595
+ graph_zone="invalid",
596
+ metadata={
597
+ "total_sentences": 0,
598
+ "total_syllables": 0,
599
+ "total_words": 0,
600
+ "sample_size": 0,
601
+ },
602
+ )
603
+
604
+ # Use first 100 words for sample (or entire text if < 100 words)
605
+ sample_size = min(100, len(word_tokens))
606
+ sample_tokens = word_tokens[:sample_size]
607
+
608
+ # Count syllables in sample
609
+ total_syllables = sum(count_syllables(word) for word in sample_tokens)
610
+
611
+ # Count sentences within the sample
612
+ # We need to determine how many sentences are in the first sample_size words
613
+ word_count_so_far = 0
614
+ sentences_in_sample = 0
615
+ for sent in sentences:
616
+ sent_tokens = tokenize(sent)
617
+ sent_word_tokens = normalize_for_readability(sent_tokens)
618
+ if word_count_so_far + len(sent_word_tokens) <= sample_size:
619
+ sentences_in_sample += 1
620
+ word_count_so_far += len(sent_word_tokens)
621
+ else:
622
+ # Partial sentence in sample
623
+ if word_count_so_far < sample_size:
624
+ sentences_in_sample += 1
625
+ break
626
+
627
+ # Ensure at least 1 sentence for division
628
+ sentences_in_sample = max(1, sentences_in_sample)
629
+
630
+ # Calculate avg_sentence_length (for the sample)
631
+ avg_sentence_length = sample_size / sentences_in_sample
632
+
633
+ # Calculate avg_syllables_per_100 (scale if sample < 100)
634
+ avg_syllables_per_100 = (total_syllables / sample_size) * 100
635
+
636
+ # Map to grade level using Fry graph approximation
637
+ # Fry graph zones (simplified numerical approximation):
638
+ # These are rough boundaries based on Fry graph zones
639
+ # X-axis: avg sentences per 100 words (inverse of avg_sentence_length)
640
+ # Y-axis: avg syllables per 100 words
641
+
642
+ # Determine grade level based on avg_sentence_length and avg_syllables_per_100
643
+ # Higher syllables per 100 = higher grade
644
+ # Longer sentences = higher grade
645
+ # Simplified zone mapping:
646
+ if avg_syllables_per_100 < 125:
647
+ if avg_sentence_length < 7:
648
+ grade_level = "1"
649
+ graph_zone = "valid"
650
+ elif avg_sentence_length < 11:
651
+ grade_level = "2"
652
+ graph_zone = "valid"
653
+ else:
654
+ grade_level = "3"
655
+ graph_zone = "valid"
656
+ elif avg_syllables_per_100 < 135:
657
+ if avg_sentence_length < 8:
658
+ grade_level = "2"
659
+ graph_zone = "valid"
660
+ elif avg_sentence_length < 12:
661
+ grade_level = "3"
662
+ graph_zone = "valid"
663
+ else:
664
+ grade_level = "4"
665
+ graph_zone = "valid"
666
+ elif avg_syllables_per_100 < 145:
667
+ if avg_sentence_length < 9:
668
+ grade_level = "3"
669
+ graph_zone = "valid"
670
+ elif avg_sentence_length < 13:
671
+ grade_level = "5"
672
+ graph_zone = "valid"
673
+ else:
674
+ grade_level = "6"
675
+ graph_zone = "valid"
676
+ elif avg_syllables_per_100 < 155:
677
+ if avg_sentence_length < 10:
678
+ grade_level = "4"
679
+ graph_zone = "valid"
680
+ elif avg_sentence_length < 14:
681
+ grade_level = "7"
682
+ graph_zone = "valid"
683
+ else:
684
+ grade_level = "8"
685
+ graph_zone = "valid"
686
+ elif avg_syllables_per_100 < 165:
687
+ if avg_sentence_length < 12:
688
+ grade_level = "6"
689
+ graph_zone = "valid"
690
+ elif avg_sentence_length < 16:
691
+ grade_level = "9"
692
+ graph_zone = "valid"
693
+ else:
694
+ grade_level = "10"
695
+ graph_zone = "valid"
696
+ elif avg_syllables_per_100 < 175:
697
+ if avg_sentence_length < 14:
698
+ grade_level = "8"
699
+ graph_zone = "valid"
700
+ elif avg_sentence_length < 18:
701
+ grade_level = "11"
702
+ graph_zone = "valid"
703
+ else:
704
+ grade_level = "12"
705
+ graph_zone = "valid"
706
+ else: # avg_syllables_per_100 >= 175
707
+ if avg_sentence_length < 16:
708
+ grade_level = "10"
709
+ graph_zone = "valid"
710
+ elif avg_sentence_length < 20:
711
+ grade_level = "College"
712
+ graph_zone = "valid"
713
+ else:
714
+ grade_level = "College+"
715
+ graph_zone = "valid"
716
+
717
+ # Check if outside typical graph bounds
718
+ if avg_syllables_per_100 > 185 or avg_sentence_length > 25:
719
+ graph_zone = "above_graph"
720
+ elif avg_syllables_per_100 < 110:
721
+ graph_zone = "below_graph"
722
+
723
+ # Build metadata
724
+ metadata = {
725
+ "total_sentences": len(sentences),
726
+ "total_syllables": sum(count_syllables(w) for w in word_tokens),
727
+ "total_words": len(word_tokens),
728
+ "sample_size": sample_size,
729
+ "sentences_in_sample": sentences_in_sample,
730
+ "syllables_in_sample": total_syllables,
731
+ }
732
+
733
+ return FryResult(
734
+ avg_sentence_length=avg_sentence_length,
735
+ avg_syllables_per_100=avg_syllables_per_100,
736
+ grade_level=grade_level,
737
+ graph_zone=graph_zone,
738
+ metadata=metadata,
739
+ )
740
+
741
+
742
+ def compute_forcast(text: str) -> FORCASTResult:
743
+ """
744
+ Compute FORCAST Readability Formula.
745
+
746
+ FORCAST (FORmula for CASTing readability) was developed by the U.S. military
747
+ to assess readability without counting syllables. It uses only the count of
748
+ single-syllable words as its metric, making it fast and simple.
749
+
750
+ Related GitHub Issue:
751
+ #16 - Additional Readability Formulas
752
+ https://github.com/craigtrim/pystylometry/issues/16
753
+
754
+ Formula:
755
+ Grade Level = 20 - (N / 10)
756
+
757
+ Where N is the number of single-syllable words in a 150-word sample.
758
+
759
+ The formula is optimized for technical and military documents and works
760
+ best with standardized 150-word samples.
761
+
762
+ Advantages:
763
+ - Extremely simple (only counts single-syllable words)
764
+ - No sentence segmentation required
765
+ - Fast computation
766
+ - Developed specifically for military/technical texts
767
+
768
+ Disadvantages:
769
+ - Less well-known and validated than other formulas
770
+ - Requires exactly 150-word samples
771
+ - Single dimension (doesn't consider sentence length)
772
+ - May not generalize well beyond military context
773
+
774
+ Args:
775
+ text: Input text to analyze. Works best with 150-word samples.
776
+ Shorter texts are padded or scored proportionally.
777
+ Longer texts use first 150 words or multiple samples.
778
+
779
+ Returns:
780
+ FORCASTResult containing:
781
+ - forcast_score: The FORCAST readability score
782
+ - grade_level: Corresponding U.S. grade level (integer)
783
+ - single_syllable_ratio: Single-syllable words / total words
784
+ - single_syllable_count: Count of single-syllable words
785
+ - total_words: Total word count analyzed
786
+ - metadata: Sample details, calculation specifics, etc.
787
+
788
+ Example:
789
+ >>> result = compute_forcast("Military technical document...")
790
+ >>> print(f"FORCAST score: {result.forcast_score:.2f}")
791
+ FORCAST score: 9.7
792
+ >>> print(f"Grade level: {result.grade_level}")
793
+ Grade level: 10
794
+ >>> print(f"Single-syllable ratio: {result.single_syllable_ratio:.3f}")
795
+ Single-syllable ratio: 0.687
796
+
797
+ Note:
798
+ - Syllable counting required (but only to identify 1-syllable words)
799
+ - Recommended sample size is 150 words
800
+ - Multiple samples can be averaged for longer texts
801
+ - Simpler than most readability formulas
802
+ - Grade levels typically range from 5-12
803
+ """
804
+ # Tokenize
805
+ tokens = tokenize(text)
806
+ word_tokens = normalize_for_readability(tokens)
807
+
808
+ if len(word_tokens) == 0:
809
+ return FORCASTResult(
810
+ forcast_score=float("nan"),
811
+ grade_level=0,
812
+ single_syllable_ratio=float("nan"),
813
+ single_syllable_count=0,
814
+ total_words=0,
815
+ metadata={"sample_size": 0, "scaled_n": float("nan")},
816
+ )
817
+
818
+ # Use first 150 words for sample (or entire text if < 150 words)
819
+ sample_size = min(150, len(word_tokens))
820
+ sample_tokens = word_tokens[:sample_size]
821
+
822
+ # Count single-syllable words in sample
823
+ single_syllable_count = 0
824
+ for word in sample_tokens:
825
+ if count_syllables(word) == 1:
826
+ single_syllable_count += 1
827
+
828
+ # Scale N to 150-word basis if sample < 150
829
+ if sample_size < 150:
830
+ scaled_n = single_syllable_count * (150 / sample_size)
831
+ else:
832
+ scaled_n = single_syllable_count
833
+
834
+ # Calculate grade level: 20 - (N / 10)
835
+ forcast_score = 20 - (scaled_n / 10)
836
+ grade_level = round(forcast_score)
837
+
838
+ # Ensure grade level is in reasonable range (0-20)
839
+ grade_level = max(0, min(20, grade_level))
840
+
841
+ # Calculate single syllable ratio (for the sample)
842
+ single_syllable_ratio = single_syllable_count / sample_size
843
+
844
+ # Build metadata
845
+ metadata = {
846
+ "sample_size": sample_size,
847
+ "scaled_n": scaled_n,
848
+ "total_words_in_text": len(word_tokens),
849
+ }
850
+
851
+ return FORCASTResult(
852
+ forcast_score=forcast_score,
853
+ grade_level=grade_level,
854
+ single_syllable_ratio=single_syllable_ratio,
855
+ single_syllable_count=single_syllable_count,
856
+ total_words=sample_size,
857
+ metadata=metadata,
858
+ )
859
+
860
+
861
+ def compute_powers_sumner_kearl(text: str) -> PowersSumnerKearlResult:
862
+ """
863
+ Compute Powers-Sumner-Kearl Readability Formula.
864
+
865
+ The Powers-Sumner-Kearl (PSK) formula is a recalibration of the Flesch
866
+ Reading Ease formula, optimized for primary grade levels (grades 1-4).
867
+ It uses the same inputs (sentence length, syllables per word) but with
868
+ different coefficients.
869
+
870
+ Related GitHub Issue:
871
+ #16 - Additional Readability Formulas
872
+ https://github.com/craigtrim/pystylometry/issues/16
873
+
874
+ Formula:
875
+ Grade Level = 0.0778 * avg_sentence_length + 0.0455 * avg_syllables_per_word - 2.2029
876
+
877
+ The formula was derived from analysis of primary-grade texts and provides
878
+ more accurate grade-level estimates for beginning readers than the original
879
+ Flesch formula.
880
+
881
+ Advantages:
882
+ - Optimized for primary grades (1-4)
883
+ - More accurate than Flesch for young readers
884
+ - Uses same inputs as Flesch (easy to compare)
885
+ - Well-validated on educational materials
886
+
887
+ Disadvantages:
888
+ - Less accurate for higher grade levels
889
+ - Less well-known than Flesch
890
+ - Limited range (not suitable for college-level texts)
891
+ - Requires syllable counting
892
+
893
+ Args:
894
+ text: Input text to analyze. Optimized for children's literature
895
+ and primary-grade educational materials. Empty text returns
896
+ NaN values.
897
+
898
+ Returns:
899
+ PowersSumnerKearlResult containing:
900
+ - psk_score: The Powers-Sumner-Kearl score
901
+ - grade_level: Corresponding grade (decimal, e.g., 2.5 = mid-2nd grade)
902
+ - avg_sentence_length: Average words per sentence
903
+ - avg_syllables_per_word: Average syllables per word
904
+ - total_sentences: Total sentence count
905
+ - total_words: Total word count
906
+ - total_syllables: Total syllable count
907
+ - metadata: Comparison to Flesch, calculation details, etc.
908
+
909
+ Example:
910
+ >>> result = compute_powers_sumner_kearl("Children's book text...")
911
+ >>> print(f"PSK score: {result.psk_score:.2f}")
912
+ PSK score: 2.3
913
+ >>> print(f"Grade level: {result.grade_level:.1f}")
914
+ Grade level: 2.3
915
+ >>> print(f"Avg sentence length: {result.avg_sentence_length:.1f}")
916
+ Avg sentence length: 8.5
917
+
918
+ Note:
919
+ - Most accurate for grades 1-4
920
+ - Can produce negative scores for very simple texts
921
+ - Grade level is continuous (can be decimal)
922
+ - Syllable counting required (same as Flesch)
923
+ - Compare to Flesch results for validation
924
+ """
925
+ # Tokenize and segment
926
+ sentences = split_sentences(text)
927
+ tokens = tokenize(text)
928
+ word_tokens = normalize_for_readability(tokens)
929
+
930
+ if len(sentences) == 0 or len(word_tokens) == 0:
931
+ return PowersSumnerKearlResult(
932
+ psk_score=float("nan"),
933
+ grade_level=float("nan"),
934
+ avg_sentence_length=float("nan"),
935
+ avg_syllables_per_word=float("nan"),
936
+ total_sentences=0,
937
+ total_words=0,
938
+ total_syllables=0,
939
+ metadata={
940
+ "flesch_reading_ease": float("nan"),
941
+ "flesch_kincaid_grade": float("nan"),
942
+ },
943
+ )
944
+
945
+ # Count syllables
946
+ total_syllables = sum(count_syllables(word) for word in word_tokens)
947
+
948
+ # Calculate metrics
949
+ avg_sentence_length = len(word_tokens) / len(sentences)
950
+ avg_syllables_per_word = total_syllables / len(word_tokens)
951
+
952
+ # Apply Powers-Sumner-Kearl formula
953
+ # Grade = 0.0778 * avg_sentence_length + 0.0455 * avg_syllables_per_word - 2.2029
954
+ psk_score = (
955
+ 0.0778 * avg_sentence_length + 0.0455 * avg_syllables_per_word - 2.2029
956
+ )
957
+ grade_level = round(psk_score, 1) # Round to 1 decimal place
958
+
959
+ # Optional: Calculate Flesch scores for comparison
960
+ flesch_reading_ease = (
961
+ 206.835 - 1.015 * avg_sentence_length - 84.6 * avg_syllables_per_word
962
+ )
963
+ flesch_kincaid_grade = (
964
+ 0.39 * avg_sentence_length + 11.8 * avg_syllables_per_word - 15.59
965
+ )
966
+
967
+ # Build metadata
968
+ metadata = {
969
+ "flesch_reading_ease": flesch_reading_ease,
970
+ "flesch_kincaid_grade": flesch_kincaid_grade,
971
+ "difference_from_flesch": psk_score - flesch_kincaid_grade,
972
+ "words_per_sentence": avg_sentence_length,
973
+ "syllables_per_word": avg_syllables_per_word,
974
+ }
975
+
976
+ return PowersSumnerKearlResult(
977
+ psk_score=psk_score,
978
+ grade_level=grade_level,
979
+ avg_sentence_length=avg_sentence_length,
980
+ avg_syllables_per_word=avg_syllables_per_word,
981
+ total_sentences=len(sentences),
982
+ total_words=len(word_tokens),
983
+ total_syllables=total_syllables,
984
+ metadata=metadata,
985
+ )