strings-inflection 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,715 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Strings
4
+ module Inflection
5
+ module Nouns
6
+ # Remove suffix from a word
7
+ #
8
+ # @return [Array[String]]
9
+ # a list of all stems without suffixes
10
+ #
11
+ # @api private
12
+ def self.to_stem(list, suffix)
13
+ list.map { |word| word[0...-suffix.size] }
14
+ end
15
+
16
+ @category_o_os = %w[
17
+ albino
18
+ alto
19
+ archipelago
20
+ armadillo
21
+ auto
22
+ basso
23
+ bongo
24
+ banjo
25
+ bolero
26
+ burrito
27
+ burro
28
+ bronco
29
+ canto
30
+ cargo
31
+ casino
32
+ cello
33
+ commando
34
+ congo
35
+ contralto
36
+ crescendo
37
+ ditto
38
+ dynamo
39
+ embryo
40
+ fiasco
41
+ fugato
42
+ generalissimo
43
+ ghetto
44
+ guano
45
+ halo
46
+ jalapeno
47
+ inferno
48
+ jumbo
49
+ kimono
50
+ lingo
51
+ libido
52
+ limo
53
+ logo
54
+ lumbago
55
+ macro
56
+ maestro
57
+ magneto
58
+ mambo
59
+ manifesto
60
+ medico
61
+ metro
62
+ memo
63
+ octavo
64
+ oregano
65
+ photo
66
+ piano
67
+ piccolo
68
+ pinto
69
+ placebo
70
+ pro
71
+ polo
72
+ pomelo
73
+ poncho
74
+ quarto
75
+ rhino
76
+ stylo
77
+ tempo
78
+ solo
79
+ soprano
80
+ silo
81
+ sombrero
82
+ stiletto
83
+ stucco
84
+ taco
85
+ taro
86
+ tobacco
87
+ tomatillo
88
+ tornado
89
+ torso
90
+ tuxedo
91
+ yoyo
92
+ zero
93
+ ]
94
+
95
+ @category_ex_ices = to_stem(%w[
96
+ codex
97
+ index
98
+ murex
99
+ silex
100
+ vortex
101
+ ], "ex")
102
+
103
+ @category_on_a = to_stem(%w[
104
+ criterion
105
+ phenomenon
106
+ ], "on")
107
+
108
+ @category_a_ae = %w[
109
+ alumna
110
+ alga
111
+ antenna
112
+ vertebra
113
+ ]
114
+
115
+ @category_um_a = to_stem(%w[
116
+ agendum
117
+ bacterium
118
+ candelabrum
119
+ datum
120
+ desideratum
121
+ erratum
122
+ extremum
123
+ formicarium
124
+ fusarium
125
+ ovum
126
+ stratum
127
+ ], "um")
128
+
129
+ @category_ie_ies = %w[
130
+ calorie
131
+ cookie
132
+ beastie
133
+ beanie
134
+ bowtie
135
+ bourgeoisie
136
+ brownie
137
+ doggie
138
+ floozie
139
+ goodie
140
+ lie
141
+ movie
142
+ necktie
143
+ oldie
144
+ pie
145
+ pinkie
146
+ prairie
147
+ ramie
148
+ tie
149
+ zombie
150
+ ]
151
+
152
+ @category_us_uses = %w[
153
+ apparatus
154
+ cantus
155
+ coitus
156
+ focus
157
+ fungus
158
+ genius
159
+ hiatus
160
+ impetus
161
+ incubus
162
+ nexus
163
+ nimbus
164
+ nucleolus
165
+ radius
166
+ plexus
167
+ prospectus
168
+ sinus
169
+ status
170
+ stylus
171
+ succubus
172
+ torus
173
+ umbilicus
174
+ uterus
175
+ ]
176
+
177
+ @category_man_mans = %w[
178
+ ataman
179
+ caiman
180
+ cayman
181
+ ceriman
182
+ desman
183
+ dolman
184
+ farman
185
+ harman
186
+ hetman
187
+ human
188
+ leman
189
+ ottoman
190
+ shaman
191
+ talisman
192
+ ]
193
+
194
+ @category_us_i = to_stem(%w[
195
+ alumnus
196
+ cactus
197
+ fungus
198
+ nucleus
199
+ leptocephalus
200
+ stimulus
201
+ ], "us")
202
+
203
+ @category_se_ses = %w[
204
+ apse
205
+ abuse
206
+ anise
207
+ bookcase
208
+ case
209
+ cheese
210
+ close
211
+ creche
212
+ database
213
+ dose
214
+ doe
215
+ disease
216
+ enterprise
217
+ excuse
218
+ expose
219
+ glimpse
220
+ glucose
221
+ grease
222
+ high-rise
223
+ hoe
224
+ hose
225
+ impulse
226
+ initialise
227
+ initialize
228
+ inverse
229
+ merchandise
230
+ mousse
231
+ noise
232
+ nose
233
+ nucleotidase
234
+ paradise
235
+ phase
236
+ phrase
237
+ pickaxe
238
+ porpoise
239
+ pose
240
+ prose
241
+ pulse
242
+ purchase
243
+ purpose
244
+ refuse
245
+ resolve
246
+ reverse
247
+ rise
248
+ rose
249
+ staircase
250
+ suitcase
251
+ sunrise
252
+ surprise
253
+ tortoise
254
+ universe
255
+ use
256
+ vase
257
+ verse
258
+ viscose
259
+ vise
260
+ ]
261
+
262
+ @category_ze_zes = %w[
263
+ haze
264
+ kamikaze
265
+ maize
266
+ prize
267
+ size
268
+ ]
269
+
270
+ @uncountable = %w[
271
+ acoustics
272
+ adrenalin
273
+ aircraft
274
+ aluminium
275
+ aluminum
276
+ analytics
277
+ anesthesiology
278
+ anime
279
+ applewood
280
+ arthur
281
+ athletics
282
+ bakeware
283
+ barium
284
+ barracks
285
+ barbeque
286
+ bellows
287
+ beyond
288
+ bifocals
289
+ bison
290
+ bloodflow
291
+ bowling
292
+ bream
293
+ breeches
294
+ britches
295
+ cappuccino
296
+ carp
297
+ cash
298
+ catsup
299
+ cattle
300
+ celeriac
301
+ celsius
302
+ chemotaxis
303
+ cod
304
+ coevolution
305
+ collagen
306
+ colorlessness
307
+ commonsense
308
+ cooking
309
+ cooperation
310
+ coordination
311
+ copying
312
+ corporatism
313
+ corps
314
+ counterterrorism
315
+ crystallography
316
+ crayfish
317
+ clothes
318
+ chassis
319
+ chaos
320
+ chem
321
+ chess
322
+ chino
323
+ cilantro
324
+ corps
325
+ contretemps
326
+ clippers
327
+ decrease
328
+ decryption
329
+ deer
330
+ debris
331
+ diabetes
332
+ diagnosis
333
+ dining
334
+ disco
335
+ djinn
336
+ downstairs
337
+ dynamics
338
+ earnings
339
+ eaves
340
+ east
341
+ economics
342
+ ego
343
+ electronics
344
+ elk
345
+ eland
346
+ emery
347
+ ethics
348
+ equipment
349
+ eyestrain
350
+ fahrenheit
351
+ fish
352
+ flounder
353
+ fedelini
354
+ fibrosis
355
+ flesh
356
+ footwear
357
+ fun
358
+ grass
359
+ gallows
360
+ genetics
361
+ garbage
362
+ graffiti
363
+ gigantism
364
+ goldfish
365
+ golf
366
+ gravitas
367
+ gymnastics
368
+ hovercraft
369
+ headquarters
370
+ hydraulics
371
+ herpes
372
+ high-jinks
373
+ homework
374
+ innings
375
+ info
376
+ information
377
+ internet
378
+ jackanapes
379
+ jazz
380
+ jeans
381
+ jellyfish
382
+ judo
383
+ karate
384
+ kendo
385
+ kingfish
386
+ laryngitis
387
+ lieu
388
+ linguistics
389
+ logistics
390
+ lox
391
+ lycra
392
+ mackerel
393
+ mankind
394
+ mathematics
395
+ measles
396
+ melatonin
397
+ mercury
398
+ methane
399
+ molasses
400
+ money
401
+ most
402
+ moose
403
+ multimedia
404
+ mews
405
+ moose
406
+ mosquito
407
+ mumps
408
+ netball
409
+ news
410
+ oasis
411
+ osmosis
412
+ oxygen
413
+ ozone
414
+ paintwork
415
+ pantyhose
416
+ pathogenesis
417
+ pants
418
+ physics
419
+ pincers
420
+ pince-nez
421
+ police
422
+ postgres
423
+ pliers
424
+ proceedings
425
+ politics
426
+ police
427
+ press
428
+ proceedings
429
+ pyridine
430
+ quinoa
431
+ rain
432
+ rabies
433
+ reindeer
434
+ rice
435
+ rhinoceros
436
+ robotics
437
+ rubbish
438
+ rugby
439
+ salmon
440
+ series
441
+ sheep
442
+ shears
443
+ spacecraft
444
+ species
445
+ swine
446
+ scissors
447
+ sea-bass
448
+ seafood
449
+ semantics
450
+ series
451
+ soccer
452
+ software
453
+ soot
454
+ soy
455
+ spaghetti
456
+ spandex
457
+ stamina
458
+ subconscious
459
+ sushi
460
+ suspenders
461
+ sustenance
462
+ sweets
463
+ swordfish
464
+ tech
465
+ tectonics
466
+ tennis
467
+ teriyaki
468
+ thanks
469
+ tights
470
+ thermals
471
+ trousers
472
+ tights
473
+ titanium
474
+ today
475
+ tonight
476
+ tongs
477
+ tomography
478
+ trash
479
+ trout
480
+ trigonometry
481
+ tuna
482
+ twins
483
+ underclothes
484
+ underpants
485
+ upstairs
486
+ vascular
487
+ veal
488
+ venison
489
+ vertigo
490
+ veto
491
+ vitro
492
+ vivo
493
+ volcano
494
+ voting
495
+ warfare
496
+ wasabi
497
+ watercress
498
+ wealth
499
+ whiting
500
+ wildebeest
501
+ wheat
502
+ west
503
+ whey
504
+ woolens
505
+ willingness
506
+ yang
507
+ yoga
508
+ young
509
+ zebrafish
510
+ ]
511
+
512
+ @singular_irregular = [
513
+ [/\A(.*[mlftw]ax)es$/i, "\\1"],
514
+ [/\A(.*[k]axe)s$/i, "\\1"],
515
+ [/\A(.*ax)es$/i, "\\1is"],
516
+ [/\A(.*iri)[ds]es$/, "\\1s"],
517
+ [/\A(.*ox)en$/i, "\\1"],
518
+ [/\A(.*child)ren$/i, "\\1"],
519
+ [/\A(.*pe)ople$/i, "\\1rson"],
520
+ [/\A(.*g)eese$/i, "\\1oose"],
521
+ [/\A(.*(mong|cab)oose)s$/i, "\\1"],
522
+ [/\A(.*t)eeth$/i, "\\1ooth"],
523
+ [/\A(.*f)eet$/i, "\\1oot"],
524
+ [/\A(.*geni)(es|i)$/i, "\\1e"],
525
+ [/\A(.*(ga|canva))sses$/i, "\\1s"],
526
+ [/\A(.*di)ce$/i, "\\1e"],
527
+ [/\A(.*pen)ce$/i, "\\1ny"],
528
+ [/\A(.*(append|matr))ices$/i, "\\1ix"],
529
+ [/\A(.*loo)ies$/i, "\\1ey"],
530
+ [/\A(.*(l|m))ice$/i, "\\1ouse"],
531
+ [/\A(.*lanche)s$/, "\\1"],
532
+ [/\A(.*canoe)s$/, "\\1"],
533
+ [/\A(.*[aum]ise)s$/, "\\1"],
534
+ [/\A(.*[u]rse)s$/, "\\1"],
535
+ [/\A(.*(trell|it)is)es$/i, "\\1"],
536
+ [/\A(.*chrysalis)es$/i, "\\1"],
537
+ [/\A(.*cirr)i$/i, "\\1us"],
538
+ [/\A(.*nase)s$/i, "\\1"],
539
+ [/\A(.*(heart|head|moust|must)ache)s$/i, "\\1"],
540
+ [/\A(.*(galoshe|quiche|niche))s$/i, "\\1"],
541
+ [/\A(.*oboe)s$/i, "\\1"],
542
+ [/\A(.*olive)s$/i, "\\1"],
543
+ [/\A(toe)s$/i, "\\1"],
544
+ # ends in -ies
545
+ [/\A(.*#{@category_ie_ies.join("|")})s$/, "\\1"],
546
+ [/\A(.*yrie)s$/, "\\1"],
547
+ # ends in -zes
548
+ [/\A(.*#{@category_ze_zes.join("|")})s$/, "\\1"],
549
+ # ends in -ses
550
+ [/\A(.*#{@category_se_ses.join("|")})s$/, "\\1"],
551
+ [/\A(.*pancreas)es$/, "\\1"],
552
+ [/\A(.*ease)s$/, "\\1"],
553
+ [/\A(.*ipse)s$/, "\\1"],
554
+ [/\A(.*hubris)es$/i, "\\1"],
555
+ [/\A(.*meris)es$/, "\\1"],
556
+ [/\A(.*lens)es$/, "\\1"],
557
+ [/\A(.*nis)es$/i, "\\1"],
558
+ # ends in -man
559
+ [/\A(#{@category_man_mans.join("|")})s$/, "\\1"],
560
+ [/\A(.*m)en$/i, "\\1an"],
561
+ # ends in -i
562
+ [/\A(#{@category_us_i.join("|")})i$/, "\\1us"],
563
+ # ends in -f
564
+ [/\A(.*(tur|thie|loa|hoo|bee))ves$/i, "\\1f"],
565
+ [/\A(.*(roof|belief|chef|chief))s$/i, "\\1"],
566
+ ]
567
+
568
+ @singular_rules = [
569
+ [/\A(.+[aeiou])os$/, "\\1o"],
570
+ # ends in -uses
571
+ [/\A(.+[i]as)es$/i, "\\1"],
572
+ [/\A(.+(aus|ous))es$/i, "\\1e"],
573
+ [/\A(.+us)es$/i, "\\1"],
574
+ # ends in -ices, -a, -ae
575
+ [/\A(#{@category_ex_ices.join("|")})ices$/i, "\\1ex"],
576
+ [/\A(#{@category_um_a.join("|")})a$/i, "\\1um"],
577
+ [/\A(#{@category_a_ae.join("|")})e$/i, "\\1"],
578
+ [/\A(.*)a$/i, "\\1on"],
579
+ # ends in -ves
580
+ [/\A(.*[aeo]l)ves$/, "\\1f"],
581
+ [/\A(.*[^d]ea)ves$/, "\\1f"],
582
+ [/\A(.*[nlw]i)ves$/i, "\\1fe"],
583
+ [/\A(.+ar)ves$/i, "\\1f"],
584
+ # ends in -ys
585
+ [/\A(.+[aeiou]y)s$/i, "\\1"],
586
+ # ends in -ches, -shes, -sses, -zes, -xes
587
+ [/\A(.+uzz)es$/i, "\\1"],
588
+ [/\A(.+)zzes$/i, "\\1z"],
589
+ [/\A(.+[en]ze)s$/i, "\\1"],
590
+ [/\A(.+orse)s$/i, "\\1"],
591
+ [/\A(.*r[ct]ise)s$/, "\\1"],
592
+ [/\A(.+[n]s)es$/i, "\\1e"],
593
+ [/\A(.+[^ns]s)es$/i, "\\1is"],
594
+ [/\A(.+(ss|sh|ch|x|z))es$/i, "\\1"],
595
+ # ends in -ies
596
+ [/\A(.+)ies$/i, "\\1y"],
597
+ # ends in -os, -oes
598
+ [/\A(#{@category_o_os.join("|")})s$/i, "\\1"],
599
+ [/\A(.+[aeiou]o)s$/i, "\\1"],
600
+ [/\A(.+ho)es$/i, "\\1e"],
601
+ [/\A(.+o)es$/i, "\\1"],
602
+ [/\A(.+)ses$/i, "\\1s"],
603
+ [/\A(.+)s$/i, "\\1"]
604
+ ]
605
+
606
+ @singulars = @singular_irregular + @singular_rules
607
+
608
+ @plural_irregular = [
609
+ [/\A(.*avocado)$/i, "\\1s"],
610
+ [/\A(.*child)$/i, "\\1ren"],
611
+ [/\A(.*pe)rson$/i, "\\1ople"],
612
+ [/\A(.*mongoose)$/i, "\\1s"],
613
+ [/\A(.*g)oose$/i, "\\1eese"],
614
+ [/\A(.*t)ooth$/i, "\\1eeth"],
615
+ [/\A(.*f)oot$/i, "\\1eet"],
616
+ [/\A(ox)$/i, "\\1en"],
617
+ [/\A(.*qu)y$/i, "\\1ies"],
618
+ [/\A(di)e$/i, "\\1ce"],
619
+ [/\A(.*pen)ny$/i, "\\1ce"],
620
+ [/\A(.*(append|matr))ix$/i, "\\1ices"],
621
+ [/\A(.*trilby)$/i, "\\1s"],
622
+ [/\A(.*loo)ey$/i, "\\1ies"],
623
+ [/\A(.*(chrysa|trel)lis)$/i, "\\1es"],
624
+ [/\A(.*cirr)us$/i, "\\1i"],
625
+ [/\A(.*blouse)$/i, "\\1s"],
626
+ [/\A(.*canvas)$/i, "\\1ses"],
627
+ [/\A(.*(l|m))ouse$/i, "\\1ice"],
628
+ [/\A(.*gyro)$/i, "\\1s"],
629
+ [/\A(.*helo)$/i, "\\1s"],
630
+ [/\A(.*lens)$/i, "\\1es"],
631
+ [/\A(.*patriarch)$/i, "\\1s"],
632
+ [/\A(.*nis)$/i, "\\1es"],
633
+ [/\A(.*pancreas)$/i, "\\1es"],
634
+ # ends in -man
635
+ [/\A(#{@category_man_mans.join("|")})$/, "\\1s"],
636
+ [/\A(.*m)an$/i, "\\1en"],
637
+ # ends in -us
638
+ [/(#{@category_us_i.join("|")})us$/, "\\1i"],
639
+ # ends in -ch, -sh, -ss, -z, -x
640
+ [/\A(.*(stomach|epoch))$/i, "\\1s"],
641
+ [/\A(.*gas)$/i, "\\1ses"],
642
+ # ends in -f
643
+ [/\A(.*(tur|thie|loa|hoo))f$/i, "\\1ves"],
644
+ [/\A(.*(behalf|beef|roof|belief|chef|chief))$/i, "\\1s"],
645
+ # ends in -s
646
+ [/\A(.+[a]s)$/, "\\1es"]
647
+ ]
648
+
649
+ @plural_rules = [
650
+ # ends in -is
651
+ [/([csx])is$/i, "\\1es"],
652
+ [/(ri)s$/i, "\\1ses"],
653
+ # ends in -us
654
+ [/\A(#{@category_us_uses.join("|")})$/i, "\\1es"],
655
+ [/\A(.+us)$/i, "\\1es"],
656
+ # ends in -ex, -um, -on, -a
657
+ [/\A(#{@category_ex_ices.join("|")})ex$/i, "\\1ices"],
658
+ [/\A(#{@category_um_a.join("|")})um$/i, "\\1a"],
659
+ [/\A(#{@category_on_a.join("|")})on$/i, "\\1a"],
660
+ [/(#{@category_a_ae.join("|")})$/i, "\\1e"],
661
+ # ends in -ch, -sh, -ss, -z, -x
662
+ [/\A(.+[ie]z)$/i, "\\1zes"],
663
+ [/\A(.+(ss|sh|ch|x|z))$/i, "\\1es"],
664
+ # ends in -f/fe
665
+ [/\A(.*[aeo]l)f$/, "\\1ves"],
666
+ [/\A(.*[^d]ea)f$/, "\\1ves"],
667
+ [/\A(.*[nlw]i)fe$/i, "\\1ves"],
668
+ [/\A(.+ar)f$/i, "\\1ves"],
669
+ # ends in -y
670
+ [/\A((.+)[aeiou]y)$/i, "\\1s"],
671
+ [/\A(.+)y$/i, "\\1ies"],
672
+ # ends in -o
673
+ [/\A(#{@category_o_os.join("|")})$/i, "\\1s"],
674
+ [/\A((.+)[aeiou]o)$/i, "\\1s"],
675
+ [/\A(.+o)$/i, "\\1es"],
676
+ # default
677
+ [/\A(.+itis)$/i, "\\1es"],
678
+ [/\A(.+)s$/i, "\\1s"],
679
+ [/\A(.+)$/i, "\\1s"]
680
+ ]
681
+
682
+ @plurals = @plural_irregular + @plural_rules
683
+
684
+ # A set of uncoutanble nouns
685
+ #
686
+ # @return [Array[String]]
687
+ #
688
+ # @api private
689
+ def uncountable
690
+ @uncountable
691
+ end
692
+ module_function :uncountable
693
+
694
+ # A list of singular rules
695
+ #
696
+ # @return [Array[String]]
697
+ #
698
+ # @api private
699
+ def singulars
700
+ @singulars
701
+ end
702
+ module_function :singulars
703
+
704
+ # A list of plural rules
705
+ #
706
+ # @return [Array[String]]
707
+ #
708
+ # @api private
709
+ def plurals
710
+ @plurals
711
+ end
712
+ module_function :plurals
713
+ end # Nouns
714
+ end # Inflection
715
+ end # Strings