pystylometry 0.1.0__py3-none-any.whl → 1.1.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. pystylometry/__init__.py +30 -5
  2. pystylometry/_normalize.py +277 -0
  3. pystylometry/_types.py +1954 -28
  4. pystylometry/_utils.py +4 -0
  5. pystylometry/authorship/__init__.py +26 -1
  6. pystylometry/authorship/additional_methods.py +75 -0
  7. pystylometry/authorship/kilgarriff.py +347 -0
  8. pystylometry/character/__init__.py +15 -0
  9. pystylometry/character/character_metrics.py +389 -0
  10. pystylometry/cli.py +427 -0
  11. pystylometry/consistency/__init__.py +57 -0
  12. pystylometry/consistency/_thresholds.py +162 -0
  13. pystylometry/consistency/drift.py +549 -0
  14. pystylometry/dialect/__init__.py +65 -0
  15. pystylometry/dialect/_data/dialect_markers.json +1134 -0
  16. pystylometry/dialect/_loader.py +360 -0
  17. pystylometry/dialect/detector.py +533 -0
  18. pystylometry/lexical/__init__.py +13 -6
  19. pystylometry/lexical/advanced_diversity.py +680 -0
  20. pystylometry/lexical/function_words.py +590 -0
  21. pystylometry/lexical/hapax.py +310 -33
  22. pystylometry/lexical/mtld.py +180 -22
  23. pystylometry/lexical/ttr.py +149 -0
  24. pystylometry/lexical/word_frequency_sophistication.py +1805 -0
  25. pystylometry/lexical/yule.py +142 -29
  26. pystylometry/ngrams/__init__.py +2 -0
  27. pystylometry/ngrams/entropy.py +150 -49
  28. pystylometry/ngrams/extended_ngrams.py +235 -0
  29. pystylometry/prosody/__init__.py +12 -0
  30. pystylometry/prosody/rhythm_prosody.py +53 -0
  31. pystylometry/readability/__init__.py +12 -0
  32. pystylometry/readability/additional_formulas.py +2110 -0
  33. pystylometry/readability/ari.py +173 -35
  34. pystylometry/readability/coleman_liau.py +150 -30
  35. pystylometry/readability/complex_words.py +531 -0
  36. pystylometry/readability/flesch.py +181 -32
  37. pystylometry/readability/gunning_fog.py +208 -35
  38. pystylometry/readability/smog.py +126 -28
  39. pystylometry/readability/syllables.py +137 -30
  40. pystylometry/stylistic/__init__.py +20 -0
  41. pystylometry/stylistic/cohesion_coherence.py +45 -0
  42. pystylometry/stylistic/genre_register.py +45 -0
  43. pystylometry/stylistic/markers.py +131 -0
  44. pystylometry/stylistic/vocabulary_overlap.py +47 -0
  45. pystylometry/syntactic/__init__.py +4 -0
  46. pystylometry/syntactic/advanced_syntactic.py +494 -0
  47. pystylometry/syntactic/pos_ratios.py +172 -17
  48. pystylometry/syntactic/sentence_stats.py +105 -18
  49. pystylometry/syntactic/sentence_types.py +526 -0
  50. pystylometry/viz/__init__.py +71 -0
  51. pystylometry/viz/drift.py +589 -0
  52. pystylometry/viz/jsx/__init__.py +31 -0
  53. pystylometry/viz/jsx/_base.py +144 -0
  54. pystylometry/viz/jsx/report.py +677 -0
  55. pystylometry/viz/jsx/timeline.py +716 -0
  56. pystylometry/viz/jsx/viewer.py +1032 -0
  57. {pystylometry-0.1.0.dist-info → pystylometry-1.1.0.dist-info}/METADATA +49 -9
  58. pystylometry-1.1.0.dist-info/RECORD +63 -0
  59. pystylometry-1.1.0.dist-info/entry_points.txt +4 -0
  60. pystylometry-0.1.0.dist-info/RECORD +0 -26
  61. {pystylometry-0.1.0.dist-info → pystylometry-1.1.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,1134 @@
1
+ {
2
+ "metadata": {
3
+ "version": "1.1.0",
4
+ "last_updated": "2026-01-25",
5
+ "sources": [
6
+ "Oxford English Dictionary",
7
+ "Cambridge Dictionary",
8
+ "Merriam-Webster Dictionary",
9
+ "British National Corpus",
10
+ "Corpus of Contemporary American English",
11
+ "Lingua 2022 - Dialectometrical evaluation",
12
+ "Encyclopedia.com - Slang, Dialect, and Marked Language"
13
+ ],
14
+ "notes": "Extensible dialect marker database for stylometric analysis with feature weighting based on linguistic research"
15
+ },
16
+ "feature_levels": {
17
+ "description": "Categorize markers by linguistic level for fine-grained analysis",
18
+ "phonological": {
19
+ "description": "Spelling differences reflecting pronunciation",
20
+ "examples": ["grey/gray", "programme/program", "colour/color"]
21
+ },
22
+ "morphological": {
23
+ "description": "Word formation differences",
24
+ "examples": ["travelled/traveled", "-ise/-ize", "-our/-or"]
25
+ },
26
+ "lexical": {
27
+ "description": "Different words for same concept",
28
+ "examples": ["flat/apartment", "lorry/truck", "lift/elevator"]
29
+ },
30
+ "syntactic": {
31
+ "description": "Grammar and sentence structure differences",
32
+ "examples": ["have got/have", "collective noun agreement", "shall/will"]
33
+ }
34
+ },
35
+ "eye_dialect": {
36
+ "description": "Spellings that look nonstandard but reflect standard pronunciation - indicate informal register, not regional dialect",
37
+ "informal_contractions": [
38
+ "gonna", "wanna", "gotta", "kinda", "sorta", "outta", "shoulda", "coulda", "woulda",
39
+ "lemme", "gimme", "dunno", "innit", "ain't", "y'all", "dontcha", "gotcha", "betcha",
40
+ "whatcha", "cuz", "cos", "cause", "coz", "tryna", "finna"
41
+ ],
42
+ "phonetic_spellings": [
43
+ "wuz", "sez", "cuz", "ur", "u", "r", "b4", "2", "4", "thru", "tho", "bro",
44
+ "nite", "lite", "rite", "luv", "ya", "yea", "yeah", "nah", "huh", "mhm", "uh-huh"
45
+ ],
46
+ "note": "High eye_dialect count suggests informal register or speech transcription, not true dialect"
47
+ },
48
+ "pragmatic_markers": {
49
+ "description": "Discourse and politeness markers that vary by dialect",
50
+ "british_hedging": {
51
+ "words": ["quite", "rather", "terribly", "awfully", "frightfully", "jolly", "fairly", "somewhat"],
52
+ "note": "British English uses more hedging and softening"
53
+ },
54
+ "british_discourse": {
55
+ "words": ["right then", "brilliant", "lovely", "cheers", "mind you", "I reckon", "as it were", "at any rate", "in a sense"],
56
+ "note": "Common British discourse markers"
57
+ },
58
+ "american_discourse": {
59
+ "words": ["awesome", "cool", "you guys", "totally", "like", "basically", "literally", "actually", "I guess", "for sure", "no way"],
60
+ "note": "Common American discourse markers"
61
+ },
62
+ "british_politeness": {
63
+ "patterns": ["would you mind", "I was wondering if", "might I", "could I possibly", "I don't suppose"],
64
+ "note": "More indirect request forms"
65
+ },
66
+ "american_politeness": {
67
+ "patterns": ["can you", "could you", "would you", "I want", "I need"],
68
+ "note": "More direct request forms"
69
+ }
70
+ },
71
+ "vocabulary": {
72
+ "pairs": [
73
+ {"american": "airplane", "british": "aeroplane", "category": "transportation"},
74
+ {"american": "aluminum", "british": "aluminium", "category": "materials"},
75
+ {"american": "antenna", "british": "aerial", "category": "technology"},
76
+ {"american": "apartment", "british": "flat", "category": "housing"},
77
+ {"american": "appetizer", "british": "starter", "category": "food"},
78
+ {"american": "ATM", "british": "cash point", "category": "finance"},
79
+ {"american": "attorney", "british": "solicitor", "category": "legal"},
80
+ {"american": "baby carriage", "british": "pram", "category": "household"},
81
+ {"american": "backyard", "british": "garden", "category": "housing"},
82
+ {"american": "band-aid", "british": "plaster", "category": "medical"},
83
+ {"american": "bangs", "british": "fringe", "category": "appearance"},
84
+ {"american": "bathroom", "british": "loo", "category": "housing"},
85
+ {"american": "bathrobe", "british": "dressing gown", "category": "clothing"},
86
+ {"american": "beet", "british": "beetroot", "category": "food"},
87
+ {"american": "bill", "british": "note", "category": "finance"},
88
+ {"american": "billion", "british": "milliard", "category": "numbers", "notes": "historical British usage"},
89
+ {"american": "biscuit", "british": "scone", "category": "food"},
90
+ {"american": "bleachers", "british": "terraces", "category": "sports"},
91
+ {"american": "blinders", "british": "blinkers", "category": "animals"},
92
+ {"american": "broil", "british": "grill", "category": "cooking"},
93
+ {"american": "busy signal", "british": "engaged tone", "category": "technology"},
94
+ {"american": "cab", "british": "taxi", "category": "transportation"},
95
+ {"american": "call collect", "british": "reverse charges", "category": "technology"},
96
+ {"american": "can", "british": "tin", "category": "food"},
97
+ {"american": "candy", "british": "sweets", "category": "food"},
98
+ {"american": "cart", "british": "trolley", "category": "shopping"},
99
+ {"american": "catalog", "british": "catalogue", "category": "general"},
100
+ {"american": "cell phone", "british": "mobile phone", "category": "technology"},
101
+ {"american": "check", "british": "cheque", "category": "finance"},
102
+ {"american": "checkers", "british": "draughts", "category": "games"},
103
+ {"american": "chips", "british": "crisps", "category": "food"},
104
+ {"american": "cilantro", "british": "coriander", "category": "food"},
105
+ {"american": "closet", "british": "wardrobe", "category": "housing"},
106
+ {"american": "comforter", "british": "duvet", "category": "household"},
107
+ {"american": "cookie", "british": "biscuit", "category": "food"},
108
+ {"american": "corn", "british": "maize", "category": "food"},
109
+ {"american": "cotton candy", "british": "candyfloss", "category": "food"},
110
+ {"american": "counterclockwise", "british": "anticlockwise", "category": "direction"},
111
+ {"american": "crib", "british": "cot", "category": "household"},
112
+ {"american": "crosswalk", "british": "zebra crossing", "category": "transportation"},
113
+ {"american": "curb", "british": "kerb", "category": "infrastructure"},
114
+ {"american": "diaper", "british": "nappy", "category": "household"},
115
+ {"american": "dish towel", "british": "tea towel", "category": "household"},
116
+ {"american": "divided highway", "british": "dual carriageway", "category": "transportation"},
117
+ {"american": "downtown", "british": "city centre", "category": "location"},
118
+ {"american": "drapes", "british": "curtains", "category": "household"},
119
+ {"american": "driver's license", "british": "driving licence", "category": "legal"},
120
+ {"american": "drugstore", "british": "chemist", "category": "shopping"},
121
+ {"american": "dumpster", "british": "skip", "category": "infrastructure"},
122
+ {"american": "eggplant", "british": "aubergine", "category": "food"},
123
+ {"american": "elementary school", "british": "primary school", "category": "education"},
124
+ {"american": "elevator", "british": "lift", "category": "building"},
125
+ {"american": "eraser", "british": "rubber", "category": "stationery"},
126
+ {"american": "expressway", "british": "motorway", "category": "transportation"},
127
+ {"american": "fall", "british": "autumn", "category": "time"},
128
+ {"american": "faucet", "british": "tap", "category": "household"},
129
+ {"american": "fender", "british": "wing", "category": "automotive"},
130
+ {"american": "first floor", "british": "ground floor", "category": "building"},
131
+ {"american": "flashlight", "british": "torch", "category": "household"},
132
+ {"american": "french fries", "british": "chips", "category": "food"},
133
+ {"american": "freeway", "british": "motorway", "category": "transportation"},
134
+ {"american": "garbage", "british": "rubbish", "category": "general"},
135
+ {"american": "garbage can", "british": "dustbin", "category": "household"},
136
+ {"american": "gas", "british": "petrol", "category": "automotive"},
137
+ {"american": "gas pedal", "british": "accelerator", "category": "automotive"},
138
+ {"american": "gear shift", "british": "gear lever", "category": "automotive"},
139
+ {"american": "generator", "british": "dynamo", "category": "technology"},
140
+ {"american": "gotten", "british": "got", "category": "grammar"},
141
+ {"american": "grade", "british": "year", "category": "education"},
142
+ {"american": "gram", "british": "gramme", "category": "measurement"},
143
+ {"american": "ground beef", "british": "mince", "category": "food"},
144
+ {"american": "highway", "british": "main road", "category": "transportation"},
145
+ {"american": "hood", "british": "bonnet", "category": "automotive"},
146
+ {"american": "intersection", "british": "junction", "category": "transportation"},
147
+ {"american": "jello", "british": "jelly", "category": "food"},
148
+ {"american": "jelly", "british": "jam", "category": "food"},
149
+ {"american": "kerosene", "british": "paraffin", "category": "materials"},
150
+ {"american": "ladybug", "british": "ladybird", "category": "animals"},
151
+ {"american": "lawyer", "british": "barrister", "category": "legal"},
152
+ {"american": "license plate", "british": "number plate", "category": "automotive"},
153
+ {"american": "line", "british": "queue", "category": "general"},
154
+ {"american": "liquor store", "british": "off-licence", "category": "shopping"},
155
+ {"american": "living room", "british": "lounge", "category": "housing"},
156
+ {"american": "mail", "british": "post", "category": "communication"},
157
+ {"american": "mailbox", "british": "postbox", "category": "communication"},
158
+ {"american": "mailman", "british": "postman", "category": "occupation"},
159
+ {"american": "math", "british": "maths", "category": "education"},
160
+ {"american": "median strip", "british": "central reservation", "category": "transportation"},
161
+ {"american": "mom", "british": "mum", "category": "family"},
162
+ {"american": "movie", "british": "film", "category": "entertainment"},
163
+ {"american": "movie theater", "british": "cinema", "category": "entertainment"},
164
+ {"american": "muffler", "british": "silencer", "category": "automotive"},
165
+ {"american": "napkin", "british": "serviette", "category": "household"},
166
+ {"american": "nightstand", "british": "bedside table", "category": "household"},
167
+ {"american": "oatmeal", "british": "porridge", "category": "food"},
168
+ {"american": "overpass", "british": "flyover", "category": "transportation"},
169
+ {"american": "pacifier", "british": "dummy", "category": "household"},
170
+ {"american": "pajamas", "british": "pyjamas", "category": "clothing"},
171
+ {"american": "pants", "british": "trousers", "category": "clothing"},
172
+ {"american": "pantyhose", "british": "tights", "category": "clothing"},
173
+ {"american": "parking garage", "british": "car park", "category": "transportation"},
174
+ {"american": "parking lot", "british": "car park", "category": "transportation"},
175
+ {"american": "pavement", "british": "road surface", "category": "infrastructure"},
176
+ {"american": "period", "british": "full stop", "category": "punctuation"},
177
+ {"american": "pharmacist", "british": "chemist", "category": "occupation"},
178
+ {"american": "pitcher", "british": "jug", "category": "household"},
179
+ {"american": "potato chips", "british": "crisps", "category": "food"},
180
+ {"american": "private school", "british": "public school", "category": "education"},
181
+ {"american": "purse", "british": "handbag", "category": "accessories"},
182
+ {"american": "race car", "british": "racing car", "category": "automotive"},
183
+ {"american": "railroad", "british": "railway", "category": "transportation"},
184
+ {"american": "raise", "british": "rise", "category": "finance"},
185
+ {"american": "realtor", "british": "estate agent", "category": "occupation"},
186
+ {"american": "recess", "british": "break", "category": "education"},
187
+ {"american": "rent", "british": "hire", "category": "finance"},
188
+ {"american": "restroom", "british": "toilet", "category": "building"},
189
+ {"american": "resume", "british": "CV", "category": "employment"},
190
+ {"american": "rooster", "british": "cockerel", "category": "animals"},
191
+ {"american": "rotary", "british": "roundabout", "category": "transportation"},
192
+ {"american": "row house", "british": "terraced house", "category": "housing"},
193
+ {"american": "run", "british": "ladder", "category": "clothing", "notes": "in stockings"},
194
+ {"american": "rutabaga", "british": "swede", "category": "food"},
195
+ {"american": "sailboat", "british": "sailing boat", "category": "transportation"},
196
+ {"american": "sanitarium", "british": "sanatorium", "category": "medical"},
197
+ {"american": "schedule", "british": "timetable", "category": "general"},
198
+ {"american": "sedan", "british": "saloon", "category": "automotive"},
199
+ {"american": "semester", "british": "term", "category": "education"},
200
+ {"american": "shrimp", "british": "prawn", "category": "food"},
201
+ {"american": "sidewalk", "british": "pavement", "category": "infrastructure"},
202
+ {"american": "silverware", "british": "cutlery", "category": "household"},
203
+ {"american": "sneakers", "british": "trainers", "category": "clothing"},
204
+ {"american": "soccer", "british": "football", "category": "sports"},
205
+ {"american": "soda", "british": "fizzy drink", "category": "food"},
206
+ {"american": "softball", "british": "rounders", "category": "sports"},
207
+ {"american": "spelling bee", "british": "spelling competition", "category": "education"},
208
+ {"american": "squash", "british": "marrow", "category": "food"},
209
+ {"american": "station wagon", "british": "estate car", "category": "automotive"},
210
+ {"american": "stove", "british": "cooker", "category": "household"},
211
+ {"american": "streetcar", "british": "tram", "category": "transportation"},
212
+ {"american": "stroller", "british": "pushchair", "category": "household"},
213
+ {"american": "subway", "british": "underground", "category": "transportation"},
214
+ {"american": "suspenders", "british": "braces", "category": "clothing"},
215
+ {"american": "sweater", "british": "jumper", "category": "clothing"},
216
+ {"american": "takeout", "british": "takeaway", "category": "food"},
217
+ {"american": "thumbtack", "british": "drawing pin", "category": "stationery"},
218
+ {"american": "tic-tac-toe", "british": "noughts and crosses", "category": "games"},
219
+ {"american": "tire", "british": "tyre", "category": "automotive"},
220
+ {"american": "trailer", "british": "caravan", "category": "transportation"},
221
+ {"american": "trash", "british": "rubbish", "category": "general"},
222
+ {"american": "trash can", "british": "bin", "category": "household"},
223
+ {"american": "truck", "british": "lorry", "category": "transportation"},
224
+ {"american": "trunk", "british": "boot", "category": "automotive"},
225
+ {"american": "underpants", "british": "pants", "category": "clothing"},
226
+ {"american": "undershirt", "british": "vest", "category": "clothing"},
227
+ {"american": "vacation", "british": "holiday", "category": "time"},
228
+ {"american": "van", "british": "people carrier", "category": "transportation"},
229
+ {"american": "vest", "british": "waistcoat", "category": "clothing"},
230
+ {"american": "windshield", "british": "windscreen", "category": "automotive"},
231
+ {"american": "wrench", "british": "spanner", "category": "tools"},
232
+ {"american": "yard", "british": "garden", "category": "housing"},
233
+ {"american": "zee", "british": "zed", "category": "alphabet"},
234
+ {"american": "zero", "british": "nought", "category": "numbers"},
235
+ {"american": "zipper", "british": "zip", "category": "clothing"},
236
+ {"american": "zucchini", "british": "courgette", "category": "food"}
237
+ ],
238
+ "exclusive": {
239
+ "australian": [
240
+ "arvo",
241
+ "barbie",
242
+ "bathers",
243
+ "bikkie",
244
+ "bloke",
245
+ "bogan",
246
+ "brekkie",
247
+ "chook",
248
+ "dunny",
249
+ "esky",
250
+ "footy",
251
+ "g'day",
252
+ "heaps",
253
+ "lollies",
254
+ "macca's",
255
+ "mate",
256
+ "mozzie",
257
+ "outback",
258
+ "reckon",
259
+ "ripper",
260
+ "servo",
261
+ "sheila",
262
+ "sickie",
263
+ "snag",
264
+ "strewth",
265
+ "sunnies",
266
+ "ta",
267
+ "thongs",
268
+ "ute",
269
+ "yakka"
270
+ ],
271
+ "british": [
272
+ "blimey",
273
+ "bloody",
274
+ "bollocks",
275
+ "brilliant",
276
+ "cheers",
277
+ "chuffed",
278
+ "codswallop",
279
+ "crikey",
280
+ "dodgy",
281
+ "fortnight",
282
+ "gobsmacked",
283
+ "gutted",
284
+ "knackered",
285
+ "lovely",
286
+ "mental",
287
+ "naff",
288
+ "posh",
289
+ "proper",
290
+ "quid",
291
+ "rubbish",
292
+ "sacked",
293
+ "shattered",
294
+ "snog",
295
+ "sorted",
296
+ "spot-on",
297
+ "stonking",
298
+ "whilst",
299
+ "wicked"
300
+ ],
301
+ "american": [
302
+ "awesome",
303
+ "buck",
304
+ "bummer",
305
+ "cop",
306
+ "dude",
307
+ "fanny pack",
308
+ "gotten",
309
+ "guy",
310
+ "jock",
311
+ "kinda",
312
+ "lame",
313
+ "nerd",
314
+ "nuts",
315
+ "oftentimes",
316
+ "rookie",
317
+ "schlep",
318
+ "schmuck",
319
+ "swell",
320
+ "trash",
321
+ "wanna",
322
+ "yep"
323
+ ],
324
+ "canadian": [
325
+ "chesterfield",
326
+ "chinook",
327
+ "donair",
328
+ "eh",
329
+ "garburator",
330
+ "homo milk",
331
+ "keener",
332
+ "loonie",
333
+ "mickey",
334
+ "parkade",
335
+ "poutine",
336
+ "serviette",
337
+ "toque",
338
+ "toonie",
339
+ "two-four",
340
+ "washroom"
341
+ ],
342
+ "south_african": [
343
+ "braai",
344
+ "bru",
345
+ "eish",
346
+ "howzit",
347
+ "ja",
348
+ "jol",
349
+ "lekker",
350
+ "now-now",
351
+ "robot",
352
+ "shame",
353
+ "yebo"
354
+ ],
355
+ "irish": [
356
+ "craic",
357
+ "eejit",
358
+ "feck",
359
+ "fierce",
360
+ "grand",
361
+ "gurrier",
362
+ "knacker",
363
+ "langers",
364
+ "manky",
365
+ "mot",
366
+ "savage",
367
+ "shift",
368
+ "slagging",
369
+ "sound",
370
+ "yoke"
371
+ ],
372
+ "scottish": [
373
+ "bairn",
374
+ "bonnie",
375
+ "braw",
376
+ "dinnae",
377
+ "dreich",
378
+ "frae",
379
+ "ken",
380
+ "lad",
381
+ "lass",
382
+ "wee"
383
+ ]
384
+ }
385
+ },
386
+ "spelling_patterns": {
387
+ "british_american": [
388
+ {
389
+ "name": "ise_ize",
390
+ "description": "British '-ise' vs American '-ize' suffix",
391
+ "british_suffix": "ise",
392
+ "american_suffix": "ize",
393
+ "pattern_british": "\\b\\w+ise\\b",
394
+ "pattern_american": "\\b\\w+ize\\b",
395
+ "weight": 0.7,
396
+ "feature_level": "morphological",
397
+ "note": "Lower weight - both forms accepted in British English",
398
+ "examples": [
399
+ "organise/organize",
400
+ "realise/realize",
401
+ "recognise/recognize",
402
+ "apologise/apologize",
403
+ "authorise/authorize",
404
+ "categorise/categorize",
405
+ "centralise/centralize",
406
+ "characterise/characterize",
407
+ "civilise/civilize",
408
+ "colonise/colonize",
409
+ "criticise/criticize",
410
+ "customise/customize",
411
+ "emphasise/emphasize",
412
+ "finalise/finalize",
413
+ "generalise/generalize",
414
+ "hospitalise/hospitalize",
415
+ "hypnotise/hypnotize",
416
+ "legalise/legalize",
417
+ "localise/localize",
418
+ "memorise/memorize",
419
+ "minimise/minimize",
420
+ "modernise/modernize",
421
+ "nationalise/nationalize",
422
+ "normalise/normalize",
423
+ "optimise/optimize",
424
+ "paralyse/paralyze",
425
+ "patronise/patronize",
426
+ "penalise/penalize",
427
+ "personalise/personalize",
428
+ "plagiarise/plagiarize",
429
+ "polarise/polarize",
430
+ "prioritise/prioritize",
431
+ "privatise/privatize",
432
+ "publicise/publicize",
433
+ "rationalise/rationalize",
434
+ "revolutionise/revolutionize",
435
+ "scrutinise/scrutinize",
436
+ "specialise/specialize",
437
+ "stabilise/stabilize",
438
+ "standardise/standardize",
439
+ "summarise/summarize",
440
+ "symbolise/symbolize",
441
+ "sympathise/sympathize",
442
+ "synchronise/synchronize",
443
+ "terrorise/terrorize",
444
+ "tranquillise/tranquilize",
445
+ "trivialise/trivialize",
446
+ "urbanise/urbanize",
447
+ "utilise/utilize",
448
+ "visualise/visualize"
449
+ ],
450
+ "exceptions": ["advertise", "advise", "arise", "comprise", "compromise", "despise", "devise", "disguise", "enterprise", "excise", "exercise", "franchise", "improvise", "incise", "merchandise", "premise", "revise", "rise", "supervise", "surmise", "surprise", "televise"]
451
+ },
452
+ {
453
+ "name": "isation_ization",
454
+ "description": "British '-isation' vs American '-ization' suffix",
455
+ "british_suffix": "isation",
456
+ "american_suffix": "ization",
457
+ "pattern_british": "\\b\\w+isation\\b",
458
+ "pattern_american": "\\b\\w+ization\\b",
459
+ "weight": 0.7,
460
+ "feature_level": "morphological",
461
+ "examples": [
462
+ "organisation/organization",
463
+ "realisation/realization",
464
+ "civilisation/civilization",
465
+ "colonisation/colonization",
466
+ "globalisation/globalization",
467
+ "hospitalisation/hospitalization",
468
+ "nationalisation/nationalization",
469
+ "normalisation/normalization",
470
+ "optimisation/optimization",
471
+ "privatisation/privatization",
472
+ "specialisation/specialization",
473
+ "standardisation/standardization",
474
+ "synchronisation/synchronization",
475
+ "urbanisation/urbanization"
476
+ ]
477
+ },
478
+ {
479
+ "name": "our_or",
480
+ "description": "British '-our' vs American '-or' suffix",
481
+ "british_suffix": "our",
482
+ "american_suffix": "or",
483
+ "pattern_british": "\\b(colour|favour|honour|labour|neighbour|behaviour|endeavour|flavour|harbour|humour|odour|parlour|rumour|savour|splendour|tumour|vapour|vigour|clamour|candour|fervour|rancour|succour|armour|demeanour|valour|saviour|glamour)s?\\b",
484
+ "pattern_american": "\\b(color|favor|honor|labor|neighbor|behavior|endeavor|flavor|harbor|humor|odor|parlor|rumor|savor|splendor|tumor|vapor|vigor|clamor|candor|fervor|rancor|succor|armor|demeanor|valor|savior)s?\\b",
485
+ "weight": 0.95,
486
+ "feature_level": "phonological",
487
+ "note": "High diagnostic value - rarely confused",
488
+ "examples": [
489
+ "colour/color",
490
+ "favour/favor",
491
+ "honour/honor",
492
+ "labour/labor",
493
+ "neighbour/neighbor",
494
+ "behaviour/behavior",
495
+ "endeavour/endeavor",
496
+ "flavour/flavor",
497
+ "harbour/harbor",
498
+ "humour/humor",
499
+ "odour/odor",
500
+ "parlour/parlor",
501
+ "rumour/rumor",
502
+ "savour/savor",
503
+ "splendour/splendor",
504
+ "tumour/tumor",
505
+ "vapour/vapor",
506
+ "vigour/vigor"
507
+ ],
508
+ "exceptions": ["contour", "velour", "paramour", "troubadour"]
509
+ },
510
+ {
511
+ "name": "re_er",
512
+ "description": "British '-re' vs American '-er' suffix",
513
+ "british_suffix": "re",
514
+ "american_suffix": "er",
515
+ "pattern_british": "\\b(centre|theatre|metre|litre|fibre|calibre|goitre|kilometre|louvre|lustre|meagre|mitre|nitre|ochre|reconnoitre|sabre|saltpetre|sepulchre|sombre|spectre|titre|centimetre|millimetre)s?\\b",
516
+ "pattern_american": "\\b(center|theater|meter|liter|fiber|caliber|goiter|kilometer|louver|luster|meager|miter|niter|ocher|reconnoiter|saber|saltpeter|sepulcher|somber|specter|titer|centimeter|millimeter)s?\\b",
517
+ "weight": 0.9,
518
+ "feature_level": "phonological",
519
+ "examples": [
520
+ "centre/center",
521
+ "theatre/theater",
522
+ "metre/meter",
523
+ "litre/liter",
524
+ "fibre/fiber",
525
+ "calibre/caliber",
526
+ "goitre/goiter",
527
+ "kilometre/kilometer",
528
+ "louvre/louver",
529
+ "lustre/luster",
530
+ "manoeuvre/maneuver",
531
+ "meagre/meager",
532
+ "mitre/miter",
533
+ "nitre/niter",
534
+ "ochre/ocher",
535
+ "reconnoitre/reconnoiter",
536
+ "sabre/saber",
537
+ "saltpetre/saltpeter",
538
+ "sepulchre/sepulcher",
539
+ "sombre/somber",
540
+ "spectre/specter",
541
+ "titre/titer"
542
+ ],
543
+ "exceptions": ["acre", "lucre", "massacre", "mediocre", "ogre"]
544
+ },
545
+ {
546
+ "name": "ce_se",
547
+ "description": "British '-ce' vs American '-se' in nouns (licence/license, defence/defense)",
548
+ "british_suffix": "ce",
549
+ "american_suffix": "se",
550
+ "pattern_british": "\\b(defence|offence|licence|pretence)\\b",
551
+ "pattern_american": "\\b(defense|offense|license|pretense)\\b",
552
+ "weight": 0.95,
553
+ "feature_level": "phonological",
554
+ "examples": [
555
+ "defence/defense",
556
+ "offence/offense",
557
+ "licence/license",
558
+ "pretence/pretense"
559
+ ],
560
+ "notes": "In British English, nouns end in '-ce' while verbs end in '-se'. American English uses '-se' for both."
561
+ },
562
+ {
563
+ "name": "ogue_og",
564
+ "description": "British '-ogue' vs American '-og' suffix",
565
+ "british_suffix": "ogue",
566
+ "american_suffix": "og",
567
+ "pattern_british": "\\b(catalogue|dialogue|analogue|epilogue|monologue|prologue|homologue|travelogue|demagogue|pedagogue|synagogue)s?\\b",
568
+ "pattern_american": "\\b(catalog|dialog|analog|epilog|monolog|prolog|homolog|travelog|demagog|pedagog|synagog)s?\\b",
569
+ "weight": 0.8,
570
+ "feature_level": "morphological",
571
+ "examples": [
572
+ "catalogue/catalog",
573
+ "dialogue/dialog",
574
+ "analogue/analog",
575
+ "epilogue/epilog",
576
+ "monologue/monolog",
577
+ "prologue/prolog",
578
+ "homologue/homolog",
579
+ "travelogue/travelog"
580
+ ],
581
+ "notes": "Both spellings are used in American English, but the shorter form is more common in computing contexts"
582
+ },
583
+ {
584
+ "name": "ae_e",
585
+ "description": "British '-ae-' vs American '-e-' (Latin/Greek origins)",
586
+ "british_infix": "ae",
587
+ "american_infix": "e",
588
+ "pattern_british": "\\b(anaemia|anaesthesia|anaesthetic|anaesthetist|archaeology|encyclopaedia|faeces|gynaecology|haematology|haemoglobin|haemophilia|haemorrhage|leukaemia|orthopaedic|paediatric|paediatrician|aeon|mediaeval|caesarean)s?\\b",
589
+ "pattern_american": "\\b(anemia|anesthesia|anesthetic|anesthetist|archeology|encyclopedia|feces|gynecology|hematology|hemoglobin|hemophilia|hemorrhage|leukemia|orthopedic|pediatric|pediatrician|eon|medieval|cesarean)s?\\b",
590
+ "weight": 0.9,
591
+ "feature_level": "phonological",
592
+ "examples": [
593
+ "anaemia/anemia",
594
+ "anaesthesia/anesthesia",
595
+ "anaesthetic/anesthetic",
596
+ "archaeology/archeology",
597
+ "encyclopaedia/encyclopedia",
598
+ "faeces/feces",
599
+ "gynaecology/gynecology",
600
+ "haematology/hematology",
601
+ "haemoglobin/hemoglobin",
602
+ "haemophilia/hemophilia",
603
+ "haemorrhage/hemorrhage",
604
+ "leukaemia/leukemia",
605
+ "orthopaedic/orthopedic",
606
+ "paediatric/pediatric"
607
+ ]
608
+ },
609
+ {
610
+ "name": "oe_e",
611
+ "description": "British '-oe-' vs American '-e-' (Latin/Greek origins)",
612
+ "british_infix": "oe",
613
+ "american_infix": "e",
614
+ "pattern_british": "\\b(foetus|oestrogen|oesophagus|diarrhoea|gonorrhoea|amoeba|manoeuvre|coeliac|homoeopathy|phoenician)s?\\b",
615
+ "pattern_american": "\\b(fetus|estrogen|esophagus|diarrhea|gonorrhea|ameba|maneuver|celiac|homeopathy|phoenician)s?\\b",
616
+ "weight": 0.9,
617
+ "feature_level": "phonological",
618
+ "examples": [
619
+ "foetus/fetus",
620
+ "oestrogen/estrogen",
621
+ "oesophagus/esophagus",
622
+ "diarrhoea/diarrhea",
623
+ "gonorrhoea/gonorrhea",
624
+ "amoeba/ameba",
625
+ "manoeuvre/maneuver"
626
+ ]
627
+ },
628
+ {
629
+ "name": "doubled_l",
630
+ "description": "British doubles 'l' before suffixes, American does not",
631
+ "pattern_british": "\\b(cancelled|cancelling|counselled|counselling|counsellor|dialled|dialling|equalled|equalling|fuelled|fuelling|gruelling|jewelled|jeweller|labelled|labelling|levelled|levelling|libelled|libelling|marvelled|marvelling|marvellous|modelled|modelling|panelled|panelling|pedalled|pedalling|pencilled|pencilling|quarrelled|quarrelling|ravelled|ravelling|revelled|revelling|rivalled|rivalling|signalled|signalling|snorkelled|snorkelling|spiralled|spiralling|stencilled|stencilling|swivelled|swivelling|totalled|totalling|towelled|towelling|travelled|travelling|traveller|trialled|trialling|tunnelled|tunnelling|yodelled|yodelling)\\b",
632
+ "pattern_american": "\\b(canceled|canceling|counseled|counseling|counselor|dialed|dialing|equaled|equaling|fueled|fueling|grueling|jeweled|jeweler|labeled|labeling|leveled|leveling|libeled|libeling|marveled|marveling|marvelous|modeled|modeling|paneled|paneling|pedaled|pedaling|penciled|penciling|quarreled|quarreling|raveled|raveling|reveled|reveling|rivaled|rivaling|signaled|signaling|snorkeled|snorkeling|spiraled|spiraling|stenciled|stenciling|swiveled|swiveling|totaled|totaling|toweled|toweling|traveled|traveling|traveler|trialed|trialing|tunneled|tunneling|yodeled|yodeling)\\b",
633
+ "weight": 0.85,
634
+ "feature_level": "morphological",
635
+ "examples": [
636
+ "cancelled/canceled",
637
+ "cancelling/canceling",
638
+ "counselled/counseled",
639
+ "counselling/counseling",
640
+ "counsellor/counselor",
641
+ "dialled/dialed",
642
+ "dialling/dialing",
643
+ "equalled/equaled",
644
+ "equalling/equaling",
645
+ "fuelled/fueled",
646
+ "fuelling/fueling",
647
+ "gruelling/grueling",
648
+ "jewelled/jeweled",
649
+ "jeweller/jeweler",
650
+ "labelled/labeled",
651
+ "labelling/labeling",
652
+ "levelled/leveled",
653
+ "levelling/leveling",
654
+ "libelled/libeled",
655
+ "libelling/libeling",
656
+ "marvelled/marveled",
657
+ "marvelling/marveling",
658
+ "marvellous/marvelous",
659
+ "modelled/modeled",
660
+ "modelling/modeling",
661
+ "panelled/paneled",
662
+ "panelling/paneling",
663
+ "pedalled/pedaled",
664
+ "pedalling/pedaling",
665
+ "pencilled/penciled",
666
+ "pencilling/penciling",
667
+ "quarrelled/quarreled",
668
+ "quarrelling/quarreling",
669
+ "ravelled/raveled",
670
+ "ravelling/raveling",
671
+ "revelled/reveled",
672
+ "revelling/reveling",
673
+ "rivalled/rivaled",
674
+ "rivalling/rivaling",
675
+ "signalled/signaled",
676
+ "signalling/signaling",
677
+ "snorkelled/snorkeled",
678
+ "snorkelling/snorkeling",
679
+ "spiralled/spiraled",
680
+ "spiralling/spiraling",
681
+ "stencilled/stenciled",
682
+ "stencilling/stenciling",
683
+ "swivelled/swiveled",
684
+ "swivelling/swiveling",
685
+ "totalled/totaled",
686
+ "totalling/totaling",
687
+ "towelled/toweled",
688
+ "towelling/toweling",
689
+ "travelled/traveled",
690
+ "travelling/traveling",
691
+ "traveller/traveler",
692
+ "trialled/trialed",
693
+ "trialling/trialing",
694
+ "tunnelled/tunneled",
695
+ "tunnelling/tunneling",
696
+ "yodelled/yodeled",
697
+ "yodelling/yodeling"
698
+ ]
699
+ },
700
+ {
701
+ "name": "ence_ense",
702
+ "description": "British '-ence' vs American '-ense' in some words",
703
+ "pattern_british": "\\b(defence|offence|pretence)\\b",
704
+ "pattern_american": "\\b(defense|offense|pretense)\\b",
705
+ "examples": [
706
+ "defence/defense",
707
+ "offence/offense",
708
+ "pretence/pretense"
709
+ ]
710
+ },
711
+ {
712
+ "name": "eable_able",
713
+ "description": "British keeps 'e' before '-able', American drops it",
714
+ "pattern_british": "\\b(sizeable|likeable|moveable|rateable|saleable|shakeable|unshakeable|nameable|loveable|liveable|blameable|tameable)\\b",
715
+ "pattern_american": "\\b(sizable|likable|movable|ratable|salable|shakable|unshakable|namable|lovable|livable|blamable|tamable)\\b",
716
+ "weight": 0.8,
717
+ "feature_level": "morphological",
718
+ "examples": [
719
+ "sizeable/sizable",
720
+ "likeable/likable",
721
+ "moveable/movable",
722
+ "rateable/ratable",
723
+ "saleable/salable",
724
+ "shakeable/shakable",
725
+ "unshakeable/unshakable"
726
+ ]
727
+ },
728
+ {
729
+ "name": "xion_ction",
730
+ "description": "British '-xion' vs American '-ction' (archaic in British)",
731
+ "pattern_british": "\\b(connexion|inflexion|reflexion)s?\\b",
732
+ "pattern_american": null,
733
+ "weight": 0.7,
734
+ "feature_level": "morphological",
735
+ "examples": [
736
+ "connexion/connection",
737
+ "inflexion/inflection",
738
+ "reflexion/reflection"
739
+ ],
740
+ "notes": "The '-xion' forms are largely archaic in modern British English. American pattern disabled as -ction is too common."
741
+ },
742
+ {
743
+ "name": "yse_yze",
744
+ "description": "British '-yse' vs American '-yze'",
745
+ "british_suffix": "yse",
746
+ "american_suffix": "yze",
747
+ "pattern_british": "\\b\\w+yse\\b",
748
+ "pattern_american": "\\b\\w+yze\\b",
749
+ "weight": 0.95,
750
+ "feature_level": "morphological",
751
+ "examples": [
752
+ "analyse/analyze",
753
+ "breathalyse/breathalyze",
754
+ "catalyse/catalyze",
755
+ "dialyse/dialyze",
756
+ "electrolyse/electrolyze",
757
+ "hydrolyse/hydrolyze",
758
+ "paralyse/paralyze",
759
+ "psychoanalyse/psychoanalyze"
760
+ ]
761
+ },
762
+ {
763
+ "name": "gramme_gram",
764
+ "description": "British '-gramme' vs American '-gram' for units",
765
+ "pattern_british": "\\b(gramme|kilogramme|programme|centigramme|decigramme|milligramme|programmes)\\b",
766
+ "pattern_american": "\\b(gram|kilogram|program|centigram|decigram|milligram|programs)\\b",
767
+ "weight": 0.85,
768
+ "feature_level": "morphological",
769
+ "examples": [
770
+ "gramme/gram",
771
+ "kilogramme/kilogram",
772
+ "programme/program"
773
+ ],
774
+ "notes": "British English uses 'program' for computer software but 'programme' for TV/radio/events"
775
+ },
776
+ {
777
+ "name": "wards_ward",
778
+ "description": "British '-wards' vs American '-ward'",
779
+ "british_suffix": "wards",
780
+ "american_suffix": "ward",
781
+ "pattern_british": "\\b(afterwards|backwards|downwards|forwards|inwards|onwards|outwards|towards|upwards|homewards|northwards|southwards|eastwards|westwards)\\b",
782
+ "pattern_american": "\\b(afterward|backward|downward|forward|inward|onward|outward|toward|upward|homeward|northward|southward|eastward|westward)\\b",
783
+ "weight": 0.6,
784
+ "feature_level": "morphological",
785
+ "examples": [
786
+ "afterwards/afterward",
787
+ "backwards/backward",
788
+ "downwards/downward",
789
+ "forwards/forward",
790
+ "inwards/inward",
791
+ "onwards/onward",
792
+ "outwards/outward",
793
+ "towards/toward",
794
+ "upwards/upward"
795
+ ],
796
+ "notes": "Both forms are used in both dialects, but the preferences differ"
797
+ },
798
+ {
799
+ "name": "ement_ment",
800
+ "description": "British keeps 'e' in judgement/acknowledgement",
801
+ "pattern_british": "\\b(judgement|acknowledgement|abridgement)\\b",
802
+ "pattern_american": "\\b(judgment|acknowledgment|abridgment)\\b",
803
+ "examples": [
804
+ "judgement/judgment",
805
+ "acknowledgement/acknowledgment",
806
+ "abridgement/abridgment"
807
+ ],
808
+ "notes": "British legal writing often uses 'judgment'"
809
+ }
810
+ ],
811
+ "standalone": [
812
+ {"british": "aeroplane", "american": "airplane"},
813
+ {"british": "ageing", "american": "aging"},
814
+ {"british": "aluminium", "american": "aluminum"},
815
+ {"british": "annexe", "american": "annex"},
816
+ {"british": "artefact", "american": "artifact"},
817
+ {"british": "axe", "american": "ax"},
818
+ {"british": "behove", "american": "behoove"},
819
+ {"british": "burnt", "american": "burned"},
820
+ {"british": "busses", "american": "buses"},
821
+ {"british": "cancelled", "american": "canceled"},
822
+ {"british": "catalogue", "american": "catalog"},
823
+ {"british": "cheque", "american": "check"},
824
+ {"british": "chilli", "american": "chili"},
825
+ {"british": "cosy", "american": "cozy"},
826
+ {"british": "defence", "american": "defense"},
827
+ {"british": "dialogue", "american": "dialog"},
828
+ {"british": "disc", "american": "disk"},
829
+ {"british": "doughnut", "american": "donut"},
830
+ {"british": "draught", "american": "draft"},
831
+ {"british": "dreamt", "american": "dreamed"},
832
+ {"british": "encyclopaedia", "american": "encyclopedia"},
833
+ {"british": "enquiry", "american": "inquiry"},
834
+ {"british": "enrol", "american": "enroll"},
835
+ {"british": "enrolment", "american": "enrollment"},
836
+ {"british": "favourite", "american": "favorite"},
837
+ {"british": "fibre", "american": "fiber"},
838
+ {"british": "flavour", "american": "flavor"},
839
+ {"british": "focussed", "american": "focused"},
840
+ {"british": "fulfil", "american": "fulfill"},
841
+ {"british": "gaol", "american": "jail"},
842
+ {"british": "grey", "american": "gray"},
843
+ {"british": "harbour", "american": "harbor"},
844
+ {"british": "honour", "american": "honor"},
845
+ {"british": "humour", "american": "humor"},
846
+ {"british": "jewellery", "american": "jewelry"},
847
+ {"british": "kerb", "american": "curb"},
848
+ {"british": "labour", "american": "labor"},
849
+ {"british": "learnt", "american": "learned"},
850
+ {"british": "licence", "american": "license"},
851
+ {"british": "litre", "american": "liter"},
852
+ {"british": "manoeuvre", "american": "maneuver"},
853
+ {"british": "maths", "american": "math"},
854
+ {"british": "metre", "american": "meter"},
855
+ {"british": "mould", "american": "mold"},
856
+ {"british": "moustache", "american": "mustache"},
857
+ {"british": "neighbour", "american": "neighbor"},
858
+ {"british": "offence", "american": "offense"},
859
+ {"british": "omelette", "american": "omelet"},
860
+ {"british": "organisation", "american": "organization"},
861
+ {"british": "paediatric", "american": "pediatric"},
862
+ {"british": "plough", "american": "plow"},
863
+ {"british": "practise", "american": "practice"},
864
+ {"british": "programme", "american": "program"},
865
+ {"british": "pyjamas", "american": "pajamas"},
866
+ {"british": "realise", "american": "realize"},
867
+ {"british": "recognise", "american": "recognize"},
868
+ {"british": "rumour", "american": "rumor"},
869
+ {"british": "sceptic", "american": "skeptic"},
870
+ {"british": "skilful", "american": "skillful"},
871
+ {"british": "smelt", "american": "smelled"},
872
+ {"british": "speciality", "american": "specialty"},
873
+ {"british": "spelt", "american": "spelled"},
874
+ {"british": "spoilt", "american": "spoiled"},
875
+ {"british": "storey", "american": "story"},
876
+ {"british": "sulphur", "american": "sulfur"},
877
+ {"british": "theatre", "american": "theater"},
878
+ {"british": "tonne", "american": "ton"},
879
+ {"british": "travelled", "american": "traveled"},
880
+ {"british": "travelling", "american": "traveling"},
881
+ {"british": "traveller", "american": "traveler"},
882
+ {"british": "tyre", "american": "tire"},
883
+ {"british": "valour", "american": "valor"},
884
+ {"british": "vigour", "american": "vigor"},
885
+ {"british": "waggon", "american": "wagon"},
886
+ {"british": "whilst", "american": "while"},
887
+ {"british": "whisky", "american": "whiskey"},
888
+ {"british": "wilful", "american": "willful"},
889
+ {"british": "woollen", "american": "woolen"},
890
+ {"british": "yoghurt", "american": "yogurt"}
891
+ ]
892
+ },
893
+ "grammar_patterns": {
894
+ "have_got": {
895
+ "description": "British prefers 'have got', American prefers 'have'",
896
+ "british_pattern": "\\bhave got\\b|\\bhas got\\b|\\b've got\\b|\\b's got\\b",
897
+ "american_pattern": "\\bhave a\\b|\\bhas a\\b",
898
+ "examples": {
899
+ "british": ["I've got a car", "She's got three children", "Have you got any milk?"],
900
+ "american": ["I have a car", "She has three children", "Do you have any milk?"]
901
+ }
902
+ },
903
+ "gotten": {
904
+ "description": "American uses 'gotten' as past participle of 'get'",
905
+ "american_pattern": "\\bgotten\\b",
906
+ "british_equivalent": "got",
907
+ "examples": {
908
+ "american": ["I've gotten better", "Things have gotten worse", "She's gotten a promotion"],
909
+ "british": ["I've got better", "Things have got worse", "She's got a promotion"]
910
+ }
911
+ },
912
+ "collective_nouns": {
913
+ "description": "British treats collective nouns as plural, American as singular",
914
+ "examples": {
915
+ "british": ["The team are playing well", "The government have decided", "The staff are on strike", "The committee have voted"],
916
+ "american": ["The team is playing well", "The government has decided", "The staff is on strike", "The committee has voted"]
917
+ }
918
+ },
919
+ "present_perfect_vs_past": {
920
+ "description": "British uses present perfect more often for recent events",
921
+ "examples": {
922
+ "british": ["I've just eaten", "Have you seen the news?", "She's already left", "I've lost my keys"],
923
+ "american": ["I just ate", "Did you see the news?", "She already left", "I lost my keys"]
924
+ }
925
+ },
926
+ "preposition_usage": {
927
+ "description": "Different preposition preferences",
928
+ "patterns": [
929
+ {
930
+ "context": "days of week",
931
+ "british": "at the weekend",
932
+ "american": "on the weekend"
933
+ },
934
+ {
935
+ "context": "time",
936
+ "british": "quarter past/to",
937
+ "american": "quarter after/of"
938
+ },
939
+ {
940
+ "context": "location",
941
+ "british": "in hospital",
942
+ "american": "in the hospital"
943
+ },
944
+ {
945
+ "context": "correspondence",
946
+ "british": "write to",
947
+ "american": "write"
948
+ },
949
+ {
950
+ "context": "streets",
951
+ "british": "in the street",
952
+ "american": "on the street"
953
+ },
954
+ {
955
+ "context": "teams",
956
+ "british": "in the team",
957
+ "american": "on the team"
958
+ },
959
+ {
960
+ "context": "protest",
961
+ "british": "protest against",
962
+ "american": "protest"
963
+ },
964
+ {
965
+ "context": "different",
966
+ "british": "different to/from",
967
+ "american": "different from/than"
968
+ },
969
+ {
970
+ "context": "fill",
971
+ "british": "fill in a form",
972
+ "american": "fill out a form"
973
+ },
974
+ {
975
+ "context": "home",
976
+ "british": "at home",
977
+ "american": "home"
978
+ }
979
+ ]
980
+ },
981
+ "verb_forms": {
982
+ "description": "Different past tense and participle forms",
983
+ "patterns": [
984
+ {"verb": "dive", "british_past": "dived", "american_past": "dove"},
985
+ {"verb": "fit", "british_past": "fitted", "american_past": "fit"},
986
+ {"verb": "get", "british_participle": "got", "american_participle": "gotten"},
987
+ {"verb": "plead", "british_past": "pleaded", "american_past": "pled"},
988
+ {"verb": "prove", "british_participle": "proved", "american_participle": "proven"},
989
+ {"verb": "quit", "british_past": "quitted", "american_past": "quit"},
990
+ {"verb": "saw", "british_past": "sawed", "american_past": "sawed", "british_participle": "sawn", "american_participle": "sawed"},
991
+ {"verb": "sew", "british_participle": "sewn", "american_participle": "sewed"},
992
+ {"verb": "shine", "british_past": "shone", "american_past": "shined"},
993
+ {"verb": "show", "british_participle": "shown", "american_participle": "showed"},
994
+ {"verb": "shrink", "british_past": "shrank", "american_past": "shrunk"},
995
+ {"verb": "sink", "british_past": "sank", "american_past": "sunk"},
996
+ {"verb": "smell", "british_past": "smelt", "american_past": "smelled"},
997
+ {"verb": "sneak", "british_past": "sneaked", "american_past": "snuck"},
998
+ {"verb": "spell", "british_past": "spelt", "american_past": "spelled"},
999
+ {"verb": "spill", "british_past": "spilt", "american_past": "spilled"},
1000
+ {"verb": "spit", "british_past": "spat", "american_past": "spit"},
1001
+ {"verb": "spring", "british_past": "sprang", "american_past": "sprung"},
1002
+ {"verb": "stink", "british_past": "stank", "american_past": "stunk"},
1003
+ {"verb": "strive", "british_past": "strove", "american_past": "strived"},
1004
+ {"verb": "swear", "british_participle": "sworn", "american_participle": "swore"},
1005
+ {"verb": "wake", "british_past": "woke", "american_past": "waked"},
1006
+ {"verb": "wet", "british_past": "wet", "american_past": "wetted"}
1007
+ ]
1008
+ },
1009
+ "shall_will": {
1010
+ "description": "British uses 'shall' for first person future, American prefers 'will'",
1011
+ "british_pattern": "\\b(I|we) shall\\b",
1012
+ "american_pattern": "\\b(I|we) will\\b",
1013
+ "examples": {
1014
+ "british": ["I shall go tomorrow", "Shall we dance?", "We shall see"],
1015
+ "american": ["I will go tomorrow", "Will we dance?", "We will see"]
1016
+ }
1017
+ },
1018
+ "do_auxiliary": {
1019
+ "description": "British uses 'do' forms more in certain constructions",
1020
+ "examples": {
1021
+ "british": ["I haven't a clue", "Have you any idea?", "I hadn't time"],
1022
+ "american": ["I don't have a clue", "Do you have any idea?", "I didn't have time"]
1023
+ }
1024
+ },
1025
+ "neednt_dont_need": {
1026
+ "description": "British uses 'needn't', American uses 'don't need to'",
1027
+ "british_pattern": "\\bneedn't\\b",
1028
+ "american_pattern": "\\bdon't need to\\b|\\bdoesn't need to\\b",
1029
+ "examples": {
1030
+ "british": ["You needn't worry", "She needn't come"],
1031
+ "american": ["You don't need to worry", "She doesn't need to come"]
1032
+ }
1033
+ }
1034
+ },
1035
+ "punctuation_patterns": {
1036
+ "quotation_marks": {
1037
+ "description": "British prefers single quotes, American prefers double",
1038
+ "british_primary": "'",
1039
+ "american_primary": "\"",
1040
+ "examples": {
1041
+ "british": ["He said, 'Hello.'", "'That's odd,' she replied."],
1042
+ "american": ["He said, \"Hello.\"", "\"That's odd,\" she replied."]
1043
+ }
1044
+ },
1045
+ "period_placement": {
1046
+ "description": "American puts periods inside quotes, British outside (for non-sentence quotes)",
1047
+ "examples": {
1048
+ "british": ["The sign said 'No Entry'.", "She called it 'rubbish'."],
1049
+ "american": ["The sign said \"No Entry.\"", "She called it \"rubbish.\""]
1050
+ }
1051
+ },
1052
+ "titles_abbreviations": {
1053
+ "description": "British omits periods after titles, American includes them",
1054
+ "british_pattern": "\\b(Mr|Mrs|Ms|Dr|Prof|Jr|Sr)\\b(?!\\.)",
1055
+ "american_pattern": "\\b(Mr|Mrs|Ms|Dr|Prof|Jr|Sr)\\.",
1056
+ "examples": {
1057
+ "british": ["Mr Smith", "Dr Jones", "Mrs Brown"],
1058
+ "american": ["Mr. Smith", "Dr. Jones", "Mrs. Brown"]
1059
+ }
1060
+ },
1061
+ "date_formats": {
1062
+ "description": "British uses DD/MM/YYYY, American uses MM/DD/YYYY",
1063
+ "british_pattern": "\\b(0?[1-9]|[12][0-9]|3[01])/(0?[1-9]|1[0-2])/\\d{2,4}\\b",
1064
+ "american_pattern": "\\b(0?[1-9]|1[0-2])/(0?[1-9]|[12][0-9]|3[01])/\\d{2,4}\\b",
1065
+ "examples": {
1066
+ "british": ["25/01/2026", "1st January 2026", "1 January 2026"],
1067
+ "american": ["01/25/2026", "January 1st, 2026", "January 1, 2026"]
1068
+ }
1069
+ },
1070
+ "time_formats": {
1071
+ "description": "British prefers 24-hour clock, American prefers 12-hour with AM/PM",
1072
+ "examples": {
1073
+ "british": ["14:30", "09:00", "half past two"],
1074
+ "american": ["2:30 PM", "9:00 AM", "two thirty"]
1075
+ }
1076
+ },
1077
+ "oxford_comma": {
1078
+ "description": "American typically uses Oxford comma, British often omits",
1079
+ "examples": {
1080
+ "british": ["red, white and blue", "apples, oranges and bananas"],
1081
+ "american": ["red, white, and blue", "apples, oranges, and bananas"]
1082
+ },
1083
+ "notes": "This is a tendency rather than a strict rule; both forms are used in both dialects"
1084
+ }
1085
+ },
1086
+ "idiomatic_expressions": {
1087
+ "british": [
1088
+ {"expression": "at the end of the day", "meaning": "ultimately"},
1089
+ {"expression": "bob's your uncle", "meaning": "there you have it"},
1090
+ {"expression": "brilliant", "meaning": "excellent (as exclamation)"},
1091
+ {"expression": "can't be arsed", "meaning": "can't be bothered"},
1092
+ {"expression": "couldn't care less", "meaning": "don't care at all"},
1093
+ {"expression": "fancy", "meaning": "want/like (as verb)"},
1094
+ {"expression": "give someone a bell", "meaning": "call someone"},
1095
+ {"expression": "go pear-shaped", "meaning": "go wrong"},
1096
+ {"expression": "have a go", "meaning": "try"},
1097
+ {"expression": "it's not my cup of tea", "meaning": "not my preference"},
1098
+ {"expression": "knackered", "meaning": "exhausted"},
1099
+ {"expression": "mind the gap", "meaning": "watch out for the space"},
1100
+ {"expression": "not my cup of tea", "meaning": "not to my liking"},
1101
+ {"expression": "pissed", "meaning": "drunk (not angry)"},
1102
+ {"expression": "quite", "meaning": "somewhat (understating)"},
1103
+ {"expression": "rather", "meaning": "somewhat/quite"},
1104
+ {"expression": "spot on", "meaning": "exactly right"},
1105
+ {"expression": "straightaway", "meaning": "immediately"},
1106
+ {"expression": "take the piss", "meaning": "mock/tease"},
1107
+ {"expression": "the bee's knees", "meaning": "excellent"},
1108
+ {"expression": "throw a spanner in the works", "meaning": "cause problems"}
1109
+ ],
1110
+ "american": [
1111
+ {"expression": "a dime a dozen", "meaning": "very common"},
1112
+ {"expression": "ballpark figure", "meaning": "rough estimate"},
1113
+ {"expression": "beat around the bush", "meaning": "avoid the main topic"},
1114
+ {"expression": "blow off steam", "meaning": "release stress"},
1115
+ {"expression": "could care less", "meaning": "don't care (informal)"},
1116
+ {"expression": "figure out", "meaning": "understand/solve"},
1117
+ {"expression": "get the hang of", "meaning": "learn how to do"},
1118
+ {"expression": "go figure", "meaning": "who would have thought"},
1119
+ {"expression": "go with the flow", "meaning": "be flexible"},
1120
+ {"expression": "hang out", "meaning": "spend time casually"},
1121
+ {"expression": "heads up", "meaning": "warning/advance notice"},
1122
+ {"expression": "hit the sack", "meaning": "go to bed"},
1123
+ {"expression": "how come", "meaning": "why"},
1124
+ {"expression": "monday morning quarterback", "meaning": "critic after the fact"},
1125
+ {"expression": "on the same page", "meaning": "in agreement"},
1126
+ {"expression": "piece of cake", "meaning": "easy"},
1127
+ {"expression": "pissed", "meaning": "angry (not drunk)"},
1128
+ {"expression": "take a rain check", "meaning": "postpone"},
1129
+ {"expression": "throw a wrench in", "meaning": "cause problems"},
1130
+ {"expression": "touch base", "meaning": "make contact"},
1131
+ {"expression": "you bet", "meaning": "certainly"}
1132
+ ]
1133
+ }
1134
+ }