pystylometry 0.1.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. pystylometry/__init__.py +30 -5
  2. pystylometry/_normalize.py +277 -0
  3. pystylometry/_types.py +1954 -28
  4. pystylometry/_utils.py +4 -0
  5. pystylometry/authorship/__init__.py +26 -1
  6. pystylometry/authorship/additional_methods.py +75 -0
  7. pystylometry/authorship/kilgarriff.py +347 -0
  8. pystylometry/character/__init__.py +15 -0
  9. pystylometry/character/character_metrics.py +389 -0
  10. pystylometry/cli.py +427 -0
  11. pystylometry/consistency/__init__.py +57 -0
  12. pystylometry/consistency/_thresholds.py +162 -0
  13. pystylometry/consistency/drift.py +549 -0
  14. pystylometry/dialect/__init__.py +65 -0
  15. pystylometry/dialect/_data/dialect_markers.json +1134 -0
  16. pystylometry/dialect/_loader.py +360 -0
  17. pystylometry/dialect/detector.py +533 -0
  18. pystylometry/lexical/__init__.py +13 -6
  19. pystylometry/lexical/advanced_diversity.py +680 -0
  20. pystylometry/lexical/function_words.py +590 -0
  21. pystylometry/lexical/hapax.py +310 -33
  22. pystylometry/lexical/mtld.py +180 -22
  23. pystylometry/lexical/ttr.py +149 -0
  24. pystylometry/lexical/word_frequency_sophistication.py +1805 -0
  25. pystylometry/lexical/yule.py +142 -29
  26. pystylometry/ngrams/__init__.py +2 -0
  27. pystylometry/ngrams/entropy.py +150 -49
  28. pystylometry/ngrams/extended_ngrams.py +235 -0
  29. pystylometry/prosody/__init__.py +12 -0
  30. pystylometry/prosody/rhythm_prosody.py +53 -0
  31. pystylometry/readability/__init__.py +12 -0
  32. pystylometry/readability/additional_formulas.py +2110 -0
  33. pystylometry/readability/ari.py +173 -35
  34. pystylometry/readability/coleman_liau.py +150 -30
  35. pystylometry/readability/complex_words.py +531 -0
  36. pystylometry/readability/flesch.py +181 -32
  37. pystylometry/readability/gunning_fog.py +208 -35
  38. pystylometry/readability/smog.py +126 -28
  39. pystylometry/readability/syllables.py +137 -30
  40. pystylometry/stylistic/__init__.py +20 -0
  41. pystylometry/stylistic/cohesion_coherence.py +45 -0
  42. pystylometry/stylistic/genre_register.py +45 -0
  43. pystylometry/stylistic/markers.py +131 -0
  44. pystylometry/stylistic/vocabulary_overlap.py +47 -0
  45. pystylometry/syntactic/__init__.py +4 -0
  46. pystylometry/syntactic/advanced_syntactic.py +494 -0
  47. pystylometry/syntactic/pos_ratios.py +172 -17
  48. pystylometry/syntactic/sentence_stats.py +105 -18
  49. pystylometry/syntactic/sentence_types.py +526 -0
  50. pystylometry/viz/__init__.py +71 -0
  51. pystylometry/viz/drift.py +589 -0
  52. pystylometry/viz/jsx/__init__.py +31 -0
  53. pystylometry/viz/jsx/_base.py +144 -0
  54. pystylometry/viz/jsx/report.py +677 -0
  55. pystylometry/viz/jsx/timeline.py +716 -0
  56. pystylometry/viz/jsx/viewer.py +1032 -0
  57. {pystylometry-0.1.0.dist-info → pystylometry-1.1.0.dist-info}/METADATA +49 -9
  58. pystylometry-1.1.0.dist-info/RECORD +63 -0
  59. pystylometry-1.1.0.dist-info/entry_points.txt +4 -0
  60. pystylometry-0.1.0.dist-info/RECORD +0 -26
  61. {pystylometry-0.1.0.dist-info → pystylometry-1.1.0.dist-info}/WHEEL +0 -0
@@ -1,9 +1,13 @@
1
1
  """Syntactic analysis metrics (requires spaCy)."""
2
2
 
3
+ from .advanced_syntactic import compute_advanced_syntactic
3
4
  from .pos_ratios import compute_pos_ratios
4
5
  from .sentence_stats import compute_sentence_stats
6
+ from .sentence_types import compute_sentence_types
5
7
 
6
8
  __all__ = [
7
9
  "compute_pos_ratios",
8
10
  "compute_sentence_stats",
11
+ "compute_advanced_syntactic",
12
+ "compute_sentence_types",
9
13
  ]
@@ -0,0 +1,494 @@
1
+ """Advanced syntactic analysis using dependency parsing.
2
+
3
+ This module provides sophisticated syntactic metrics beyond basic POS tagging.
4
+ Using dependency parsing, it extracts features related to sentence complexity,
5
+ grammatical sophistication, and syntactic style preferences.
6
+
7
+ Related GitHub Issue:
8
+ #17 - Advanced Syntactic Analysis
9
+ https://github.com/craigtrim/pystylometry/issues/17
10
+
11
+ Features implemented:
12
+ - Parse tree depth (sentence structural complexity)
13
+ - T-units (minimal terminable units - independent clauses with modifiers)
14
+ - Clausal density (clauses per T-unit)
15
+ - Dependent clause ratio
16
+ - Passive voice ratio
17
+ - Subordination and coordination indices
18
+ - Dependency distance metrics
19
+ - Branching direction (left vs. right)
20
+
21
+ References:
22
+ Hunt, K. W. (1965). Grammatical structures written at three grade levels.
23
+ NCTE Research Report No. 3.
24
+ Biber, D. (1988). Variation across speech and writing. Cambridge University Press.
25
+ Lu, X. (2010). Automatic analysis of syntactic complexity in second language
26
+ writing. International Journal of Corpus Linguistics, 15(4), 474-496.
27
+ Gibson, E. (2000). The dependency locality theory: A distance-based theory
28
+ of linguistic complexity. In Image, language, brain (pp. 95-126).
29
+ """
30
+
31
+ from typing import Any
32
+
33
+ from .._types import AdvancedSyntacticResult, Distribution, make_distribution
34
+ from .._utils import check_optional_dependency
35
+
36
+ # Type aliases for spaCy objects (loaded dynamically)
37
+ _SpaCyToken = Any
38
+ _SpaCyDoc = Any
39
+ _SpaCySpan = Any
40
+
41
+
42
+ def compute_advanced_syntactic(
43
+ text: str,
44
+ model: str = "en_core_web_sm",
45
+ chunk_size: int = 1000,
46
+ ) -> AdvancedSyntacticResult:
47
+ """
48
+ Compute advanced syntactic complexity metrics using dependency parsing.
49
+
50
+ This function uses spaCy's dependency parser to extract sophisticated
51
+ syntactic features that go beyond simple POS tagging. These features
52
+ capture sentence complexity, grammatical sophistication, and stylistic
53
+ preferences in syntactic structure.
54
+
55
+ Related GitHub Issue:
56
+ #17 - Advanced Syntactic Analysis
57
+ https://github.com/craigtrim/pystylometry/issues/17
58
+
59
+ Why syntactic complexity matters:
60
+ 1. Correlates with writing proficiency and cognitive development
61
+ 2. Distinguishes between genres (academic vs. conversational)
62
+ 3. Captures authorial style preferences
63
+ 4. Indicates text difficulty and readability
64
+ 5. Varies systematically across languages and registers
65
+
66
+ Metrics computed:
67
+
68
+ Parse Tree Depth:
69
+ - Mean and maximum depth of dependency parse trees
70
+ - Deeper trees = more complex syntactic structures
71
+ - Indicates level of embedding and subordination
72
+
73
+ T-units:
74
+ - Minimal terminable units (Hunt 1965)
75
+ - Independent clause + all dependent clauses attached to it
76
+ - More reliable than sentence length for measuring complexity
77
+ - Mean T-unit length is standard complexity measure
78
+
79
+ Clausal Density:
80
+ - Number of clauses per T-unit
81
+ - Higher density = more complex, embedded structures
82
+ - Academic writing typically has higher clausal density
83
+
84
+ Passive Voice:
85
+ - Ratio of passive constructions to total sentences
86
+ - Academic/formal writing uses more passive voice
87
+ - Fiction/conversational writing uses more active voice
88
+
89
+ Subordination & Coordination:
90
+ - Subordination: Use of dependent clauses
91
+ - Coordination: Use of coordinate clauses (and, but, or)
92
+ - Balance indicates syntactic style
93
+
94
+ Dependency Distance:
95
+ - Average distance between heads and dependents
96
+ - Longer distances = more processing difficulty
97
+ - Related to working memory load
98
+
99
+ Branching Direction:
100
+ - Left-branching: Modifiers before head
101
+ - Right-branching: Modifiers after head
102
+ - English tends toward right-branching
103
+
104
+ Args:
105
+ text: Input text to analyze. Should contain multiple sentences for
106
+ reliable metrics. Very short texts may have unstable values.
107
+ model: spaCy model name with dependency parser. Default is "en_core_web_sm".
108
+ Larger models (en_core_web_md, en_core_web_lg) may provide better
109
+ parsing accuracy but are slower.
110
+
111
+ Returns:
112
+ AdvancedSyntacticResult containing:
113
+ - mean_parse_tree_depth: Average depth across all parse trees
114
+ - max_parse_tree_depth: Maximum depth in any parse tree
115
+ - t_unit_count: Number of T-units detected
116
+ - mean_t_unit_length: Average words per T-unit
117
+ - clausal_density: Clauses per T-unit
118
+ - dependent_clause_ratio: Dependent clauses / total clauses
119
+ - passive_voice_ratio: Passive sentences / total sentences
120
+ - subordination_index: Subordinate clauses / total clauses
121
+ - coordination_index: Coordinate clauses / total clauses
122
+ - sentence_complexity_score: Composite complexity metric
123
+ - dependency_distance: Mean distance between heads and dependents
124
+ - left_branching_ratio: Left-branching structures / total
125
+ - right_branching_ratio: Right-branching structures / total
126
+ - metadata: Parse tree details, clause counts, etc.
127
+
128
+ Example:
129
+ >>> result = compute_advanced_syntactic("Complex multi-clause text...")
130
+ >>> print(f"Parse tree depth: {result.mean_parse_tree_depth:.1f}")
131
+ Parse tree depth: 5.3
132
+ >>> print(f"T-units: {result.t_unit_count}")
133
+ T-units: 12
134
+ >>> print(f"Clausal density: {result.clausal_density:.2f}")
135
+ Clausal density: 2.4
136
+ >>> print(f"Passive voice: {result.passive_voice_ratio * 100:.1f}%")
137
+ Passive voice: 23.5%
138
+
139
+ >>> # Compare genres
140
+ >>> academic = compute_advanced_syntactic("Academic paper...")
141
+ >>> fiction = compute_advanced_syntactic("Fiction narrative...")
142
+ >>> print(f"Academic clausal density: {academic.clausal_density:.2f}")
143
+ >>> print(f"Fiction clausal density: {fiction.clausal_density:.2f}")
144
+ >>> # Academic typically higher
145
+
146
+ Note:
147
+ - Requires spaCy with dependency parser (small model minimum)
148
+ - Parse accuracy affects metrics (larger models are better)
149
+ - Very long sentences may have parsing errors
150
+ - Passive voice detection uses dependency patterns
151
+ - T-unit segmentation follows Hunt (1965) criteria
152
+ - Empty or very short texts return NaN for ratios
153
+ """
154
+ check_optional_dependency("spacy", "syntactic")
155
+
156
+ try:
157
+ import spacy # type: ignore
158
+ except ImportError as e:
159
+ raise ImportError(
160
+ "spaCy is required for advanced syntactic analysis. "
161
+ "Install with: pip install spacy && python -m spacy download en_core_web_sm"
162
+ ) from e
163
+
164
+ # Load spaCy model
165
+ try:
166
+ nlp = spacy.load(model)
167
+ except OSError as e:
168
+ raise OSError(
169
+ f"spaCy model '{model}' not found. Download with: python -m spacy download {model}"
170
+ ) from e
171
+
172
+ # Parse text
173
+ doc = nlp(text)
174
+ sentences = list(doc.sents)
175
+
176
+ # Handle empty text
177
+ if len(sentences) == 0 or len(doc) == 0:
178
+ empty_dist = Distribution(
179
+ values=[],
180
+ mean=float("nan"),
181
+ median=float("nan"),
182
+ std=0.0,
183
+ range=0.0,
184
+ iqr=0.0,
185
+ )
186
+ return AdvancedSyntacticResult(
187
+ mean_parse_tree_depth=float("nan"),
188
+ max_parse_tree_depth=0,
189
+ t_unit_count=0,
190
+ mean_t_unit_length=float("nan"),
191
+ clausal_density=float("nan"),
192
+ dependent_clause_ratio=float("nan"),
193
+ passive_voice_ratio=float("nan"),
194
+ subordination_index=float("nan"),
195
+ coordination_index=float("nan"),
196
+ sentence_complexity_score=float("nan"),
197
+ dependency_distance=float("nan"),
198
+ left_branching_ratio=float("nan"),
199
+ right_branching_ratio=float("nan"),
200
+ mean_parse_tree_depth_dist=empty_dist,
201
+ max_parse_tree_depth_dist=empty_dist,
202
+ mean_t_unit_length_dist=empty_dist,
203
+ clausal_density_dist=empty_dist,
204
+ dependent_clause_ratio_dist=empty_dist,
205
+ passive_voice_ratio_dist=empty_dist,
206
+ subordination_index_dist=empty_dist,
207
+ coordination_index_dist=empty_dist,
208
+ sentence_complexity_score_dist=empty_dist,
209
+ dependency_distance_dist=empty_dist,
210
+ left_branching_ratio_dist=empty_dist,
211
+ right_branching_ratio_dist=empty_dist,
212
+ chunk_size=chunk_size,
213
+ chunk_count=0,
214
+ metadata={
215
+ "sentence_count": 0,
216
+ "word_count": 0,
217
+ "total_clauses": 0,
218
+ "warning": "Empty text or no sentences found",
219
+ },
220
+ )
221
+
222
+ # 1. Calculate parse tree depth
223
+ parse_depths = []
224
+ for sent in sentences:
225
+ depth = _calculate_max_tree_depth(sent.root)
226
+ parse_depths.append(depth)
227
+
228
+ mean_parse_tree_depth = sum(parse_depths) / len(parse_depths)
229
+ max_parse_tree_depth = max(parse_depths)
230
+
231
+ # 2. Calculate mean dependency distance
232
+ dependency_distances = []
233
+ for token in doc:
234
+ if token != token.head: # Exclude root
235
+ distance = abs(token.i - token.head.i)
236
+ dependency_distances.append(distance)
237
+
238
+ if dependency_distances:
239
+ mean_dependency_distance = sum(dependency_distances) / len(dependency_distances)
240
+ else:
241
+ mean_dependency_distance = 0.0
242
+
243
+ # 3. Identify T-units and calculate mean T-unit length
244
+ t_units = _identify_t_units(doc)
245
+ t_unit_count = len(t_units)
246
+ t_unit_lengths = [len(t_unit) for t_unit in t_units]
247
+
248
+ if t_unit_count > 0:
249
+ mean_t_unit_length = sum(t_unit_lengths) / t_unit_count
250
+ else:
251
+ mean_t_unit_length = float("nan")
252
+
253
+ # 4. Count clauses (total, dependent, subordinate, coordinate)
254
+ total_clauses = 0
255
+ dependent_clause_count = 0
256
+ subordinate_clause_count = 0
257
+ coordinate_clause_count = 0
258
+
259
+ for sent in sentences:
260
+ sent_total, sent_dependent, sent_subordinate, sent_coordinate = _count_clauses(sent)
261
+ total_clauses += sent_total
262
+ dependent_clause_count += sent_dependent
263
+ subordinate_clause_count += sent_subordinate
264
+ coordinate_clause_count += sent_coordinate
265
+
266
+ # Calculate ratios
267
+ if total_clauses > 0:
268
+ dependent_clause_ratio = dependent_clause_count / total_clauses
269
+ subordination_index = subordinate_clause_count / total_clauses
270
+ coordination_index = coordinate_clause_count / total_clauses
271
+ else:
272
+ dependent_clause_ratio = float("nan")
273
+ subordination_index = float("nan")
274
+ coordination_index = float("nan")
275
+
276
+ if t_unit_count > 0:
277
+ clausal_density = total_clauses / t_unit_count
278
+ else:
279
+ clausal_density = float("nan")
280
+
281
+ # 5. Detect passive voice
282
+ passive_sentence_count = sum(1 for sent in sentences if _is_passive_voice(sent))
283
+ passive_voice_ratio = passive_sentence_count / len(sentences)
284
+
285
+ # 6. Calculate branching direction
286
+ left_branching = 0
287
+ right_branching = 0
288
+
289
+ for token in doc:
290
+ if token != token.head: # Exclude root
291
+ if token.i < token.head.i:
292
+ left_branching += 1
293
+ else:
294
+ right_branching += 1
295
+
296
+ total_branching = left_branching + right_branching
297
+ if total_branching > 0:
298
+ left_branching_ratio = left_branching / total_branching
299
+ right_branching_ratio = right_branching / total_branching
300
+ else:
301
+ left_branching_ratio = float("nan")
302
+ right_branching_ratio = float("nan")
303
+
304
+ # 7. Calculate sentence complexity score (composite metric)
305
+ # Normalize individual metrics to 0-1 range
306
+ normalized_parse_depth = min(mean_parse_tree_depth / 10, 1.0)
307
+ normalized_clausal_density = (
308
+ min(clausal_density / 3, 1.0)
309
+ if not isinstance(clausal_density, float) or not (clausal_density != clausal_density)
310
+ else 0.0
311
+ )
312
+ normalized_t_unit_length = (
313
+ min(mean_t_unit_length / 25, 1.0)
314
+ if not isinstance(mean_t_unit_length, float)
315
+ or not (mean_t_unit_length != mean_t_unit_length)
316
+ else 0.0
317
+ )
318
+ normalized_dependency_distance = min(mean_dependency_distance / 5, 1.0)
319
+ normalized_subordination = (
320
+ subordination_index
321
+ if not isinstance(subordination_index, float)
322
+ or not (subordination_index != subordination_index)
323
+ else 0.0
324
+ )
325
+
326
+ # Weighted combination
327
+ sentence_complexity_score = (
328
+ 0.3 * normalized_parse_depth
329
+ + 0.3 * normalized_clausal_density
330
+ + 0.2 * normalized_t_unit_length
331
+ + 0.1 * normalized_subordination
332
+ + 0.1 * normalized_dependency_distance
333
+ )
334
+
335
+ # Create single-value distributions (analysis is done on full text)
336
+ mean_parse_tree_depth_dist = make_distribution([mean_parse_tree_depth])
337
+ max_parse_tree_depth_dist = make_distribution([float(max_parse_tree_depth)])
338
+ mean_t_unit_length_dist = make_distribution([mean_t_unit_length])
339
+ clausal_density_dist = make_distribution([clausal_density])
340
+ dependent_clause_ratio_dist = make_distribution([dependent_clause_ratio])
341
+ passive_voice_ratio_dist = make_distribution([passive_voice_ratio])
342
+ subordination_index_dist = make_distribution([subordination_index])
343
+ coordination_index_dist = make_distribution([coordination_index])
344
+ sentence_complexity_score_dist = make_distribution([sentence_complexity_score])
345
+ dependency_distance_dist = make_distribution([mean_dependency_distance])
346
+ left_branching_ratio_dist = make_distribution([left_branching_ratio])
347
+ right_branching_ratio_dist = make_distribution([right_branching_ratio])
348
+
349
+ # Collect metadata
350
+ metadata = {
351
+ "sentence_count": len(sentences),
352
+ "word_count": len(doc),
353
+ "total_clauses": total_clauses,
354
+ "independent_clause_count": total_clauses - dependent_clause_count,
355
+ "dependent_clause_count": dependent_clause_count,
356
+ "subordinate_clause_count": subordinate_clause_count,
357
+ "coordinate_clause_count": coordinate_clause_count,
358
+ "passive_sentence_count": passive_sentence_count,
359
+ "parse_depths_per_sentence": parse_depths,
360
+ "t_unit_lengths": t_unit_lengths,
361
+ "t_unit_count": t_unit_count,
362
+ "dependency_distances": dependency_distances[:100], # Sample for brevity
363
+ "left_branching_count": left_branching,
364
+ "right_branching_count": right_branching,
365
+ "model_used": model,
366
+ }
367
+
368
+ return AdvancedSyntacticResult(
369
+ mean_parse_tree_depth=mean_parse_tree_depth,
370
+ max_parse_tree_depth=max_parse_tree_depth,
371
+ t_unit_count=t_unit_count,
372
+ mean_t_unit_length=mean_t_unit_length,
373
+ clausal_density=clausal_density,
374
+ dependent_clause_ratio=dependent_clause_ratio,
375
+ passive_voice_ratio=passive_voice_ratio,
376
+ subordination_index=subordination_index,
377
+ coordination_index=coordination_index,
378
+ sentence_complexity_score=sentence_complexity_score,
379
+ dependency_distance=mean_dependency_distance,
380
+ left_branching_ratio=left_branching_ratio,
381
+ right_branching_ratio=right_branching_ratio,
382
+ mean_parse_tree_depth_dist=mean_parse_tree_depth_dist,
383
+ max_parse_tree_depth_dist=max_parse_tree_depth_dist,
384
+ mean_t_unit_length_dist=mean_t_unit_length_dist,
385
+ clausal_density_dist=clausal_density_dist,
386
+ dependent_clause_ratio_dist=dependent_clause_ratio_dist,
387
+ passive_voice_ratio_dist=passive_voice_ratio_dist,
388
+ subordination_index_dist=subordination_index_dist,
389
+ coordination_index_dist=coordination_index_dist,
390
+ sentence_complexity_score_dist=sentence_complexity_score_dist,
391
+ dependency_distance_dist=dependency_distance_dist,
392
+ left_branching_ratio_dist=left_branching_ratio_dist,
393
+ right_branching_ratio_dist=right_branching_ratio_dist,
394
+ chunk_size=chunk_size,
395
+ chunk_count=1, # Single pass analysis
396
+ metadata=metadata,
397
+ )
398
+
399
+
400
+ def _calculate_max_tree_depth(token: _SpaCyToken) -> int:
401
+ """
402
+ Calculate maximum depth of dependency tree starting from token.
403
+
404
+ Args:
405
+ token: spaCy Token to start from (typically sentence root)
406
+
407
+ Returns:
408
+ Maximum depth of tree (root = 0, children = parent + 1)
409
+ """
410
+ if not list(token.children):
411
+ return 0
412
+
413
+ child_depths = [_calculate_max_tree_depth(child) for child in token.children]
414
+ return max(child_depths) + 1
415
+
416
+
417
+ def _identify_t_units(doc: _SpaCyDoc) -> list[_SpaCySpan]:
418
+ """
419
+ Identify T-units (minimal terminable units) in document.
420
+
421
+ A T-unit is one main clause plus all subordinate clauses attached to it.
422
+ This follows Hunt (1965) definition.
423
+
424
+ Args:
425
+ doc: spaCy Doc object
426
+
427
+ Returns:
428
+ List of spaCy Span objects, each representing a T-unit
429
+ """
430
+ # For simplicity, treat each sentence as a T-unit
431
+ # More sophisticated approach would split compound sentences
432
+ # into separate T-units, but this requires complex coordination analysis
433
+ return list(doc.sents)
434
+
435
+
436
+ def _count_clauses(sent: _SpaCySpan) -> tuple[int, int, int, int]:
437
+ """
438
+ Count different types of clauses in sentence.
439
+
440
+ Args:
441
+ sent: spaCy Span representing a sentence
442
+
443
+ Returns:
444
+ Tuple of (total_clauses, dependent_clauses, subordinate_clauses, coordinate_clauses)
445
+ """
446
+ # Start with 1 for the main clause
447
+ total = 1
448
+ dependent = 0
449
+ subordinate = 0
450
+ coordinate = 0
451
+
452
+ # Dependency labels that indicate clauses
453
+ dependent_clause_labels = {"csubj", "ccomp", "xcomp", "advcl", "acl", "relcl"}
454
+ subordinate_clause_labels = {"advcl", "acl", "relcl"}
455
+ coordinate_clause_labels = {"conj"}
456
+
457
+ for token in sent:
458
+ if token.dep_ in dependent_clause_labels:
459
+ total += 1
460
+ dependent += 1
461
+ if token.dep_ in subordinate_clause_labels:
462
+ subordinate += 1
463
+ elif token.dep_ in coordinate_clause_labels and token.pos_ == "VERB":
464
+ # Coordinate clause (conj) with verb = coordinated main clause
465
+ total += 1
466
+ coordinate += 1
467
+
468
+ return total, dependent, subordinate, coordinate
469
+
470
+
471
+ def _is_passive_voice(sent: _SpaCySpan) -> bool:
472
+ """
473
+ Detect if sentence contains passive voice construction.
474
+
475
+ Args:
476
+ sent: spaCy Span representing a sentence
477
+
478
+ Returns:
479
+ True if passive voice detected, False otherwise
480
+ """
481
+ # Look for passive auxiliary + past participle pattern
482
+ for token in sent:
483
+ # Check for passive subject dependency (older spaCy versions)
484
+ if token.dep_ == "nsubjpass":
485
+ return True
486
+ # Check for passive auxiliary + past participle (newer spaCy versions)
487
+ # In newer spaCy, passive is marked with nsubj:pass or through aux:pass
488
+ if "pass" in token.dep_:
489
+ return True
490
+ # Alternative: check for "be" verb + past participle
491
+ if token.dep_ == "auxpass":
492
+ return True
493
+
494
+ return False