srx-languagetool 0.11.0 → 0.13.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/main.yml +1 -1
- data/CHANGELOG.md +8 -0
- data/Gemfile.lock +34 -31
- data/lib/srx/languagetool/version.rb +1 -1
- data/lib/srx/segment.srx +102 -16
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 037ab00a25a87ba79d5bb760cc4d292aad1289d792edea55d8b05034a2ac0f5f
|
4
|
+
data.tar.gz: 760147c7ba571a06943244728a709a02bedcb320244342ba38694a1148091a7b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 15184b2b378ab3c2f0667ac6168894583959833403b4181c223eb5cd51a43ab8286c2f9110c1769fd5fb9898aa386cff525e6c170e48bc6aa2247d2cf171126c
|
7
|
+
data.tar.gz: e30f82724c3bcb264bde6cf2a99f3ea09eb6815f43a9609b71b2b4bbb1fd2e662cbb1abee2bdfd52035130fee9ed1118fe545f48efb61b103c27acaf29773e2e
|
data/.github/workflows/main.yml
CHANGED
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
srx-languagetool (0.
|
4
|
+
srx-languagetool (0.13.0)
|
5
5
|
srx (< 1.0)
|
6
6
|
|
7
7
|
GEM
|
@@ -9,69 +9,72 @@ GEM
|
|
9
9
|
specs:
|
10
10
|
ast (2.4.2)
|
11
11
|
backport (1.2.0)
|
12
|
-
benchmark (0.
|
12
|
+
benchmark (0.3.0)
|
13
13
|
byebug (11.1.3)
|
14
|
-
diff-lcs (1.5.
|
14
|
+
diff-lcs (1.5.1)
|
15
15
|
e2mmap (0.1.0)
|
16
|
-
jaro_winkler (1.5.
|
17
|
-
json (2.
|
16
|
+
jaro_winkler (1.5.6)
|
17
|
+
json (2.7.1)
|
18
18
|
kramdown (2.4.0)
|
19
19
|
rexml
|
20
20
|
kramdown-parser-gfm (1.1.0)
|
21
21
|
kramdown (~> 2.0)
|
22
|
-
|
23
|
-
|
22
|
+
language_server-protocol (3.17.0.3)
|
23
|
+
minitest (5.22.3)
|
24
|
+
nokogiri (1.15.6-x86_64-darwin)
|
24
25
|
racc (~> 1.4)
|
25
|
-
parallel (1.
|
26
|
-
parser (3.
|
26
|
+
parallel (1.24.0)
|
27
|
+
parser (3.3.0.5)
|
27
28
|
ast (~> 2.4.1)
|
28
|
-
|
29
|
+
racc
|
30
|
+
racc (1.7.3)
|
29
31
|
rainbow (3.1.1)
|
30
|
-
rake (13.0
|
31
|
-
|
32
|
+
rake (13.1.0)
|
33
|
+
rbs (2.8.4)
|
34
|
+
regexp_parser (2.9.0)
|
32
35
|
reverse_markdown (2.1.1)
|
33
36
|
nokogiri
|
34
|
-
rexml (3.2.
|
35
|
-
rspec-expectations (3.
|
37
|
+
rexml (3.2.6)
|
38
|
+
rspec-expectations (3.13.0)
|
36
39
|
diff-lcs (>= 1.2.0, < 2.0)
|
37
|
-
rspec-support (~> 3.
|
38
|
-
rspec-support (3.
|
39
|
-
rubocop (1.
|
40
|
+
rspec-support (~> 3.13.0)
|
41
|
+
rspec-support (3.13.1)
|
42
|
+
rubocop (1.62.1)
|
40
43
|
json (~> 2.3)
|
44
|
+
language_server-protocol (>= 3.17.0)
|
41
45
|
parallel (~> 1.10)
|
42
|
-
parser (>= 3.
|
46
|
+
parser (>= 3.3.0.2)
|
43
47
|
rainbow (>= 2.2.2, < 4.0)
|
44
48
|
regexp_parser (>= 1.8, < 3.0)
|
45
49
|
rexml (>= 3.2.5, < 4.0)
|
46
|
-
rubocop-ast (>= 1.
|
50
|
+
rubocop-ast (>= 1.31.1, < 2.0)
|
47
51
|
ruby-progressbar (~> 1.7)
|
48
52
|
unicode-display_width (>= 2.4.0, < 3.0)
|
49
|
-
rubocop-ast (1.
|
50
|
-
parser (>= 3.
|
53
|
+
rubocop-ast (1.31.2)
|
54
|
+
parser (>= 3.3.0.4)
|
51
55
|
ruby-progressbar (1.13.0)
|
52
|
-
solargraph (0.
|
56
|
+
solargraph (0.50.0)
|
53
57
|
backport (~> 1.2)
|
54
58
|
benchmark
|
55
|
-
bundler (
|
59
|
+
bundler (~> 2.0)
|
56
60
|
diff-lcs (~> 1.4)
|
57
61
|
e2mmap
|
58
62
|
jaro_winkler (~> 1.5)
|
59
63
|
kramdown (~> 2.3)
|
60
64
|
kramdown-parser-gfm (~> 1.1)
|
61
65
|
parser (~> 3.0)
|
62
|
-
|
63
|
-
|
66
|
+
rbs (~> 2.0)
|
67
|
+
reverse_markdown (~> 2.0)
|
68
|
+
rubocop (~> 1.38)
|
64
69
|
thor (~> 1.0)
|
65
70
|
tilt (~> 2.0)
|
66
71
|
yard (~> 0.9, >= 0.9.24)
|
67
72
|
srx (0.6.0)
|
68
73
|
nokogiri (~> 1.11)
|
69
|
-
thor (1.
|
70
|
-
tilt (2.
|
71
|
-
unicode-display_width (2.
|
72
|
-
|
73
|
-
yard (0.9.28)
|
74
|
-
webrick (~> 1.7.0)
|
74
|
+
thor (1.3.1)
|
75
|
+
tilt (2.3.0)
|
76
|
+
unicode-display_width (2.5.0)
|
77
|
+
yard (0.9.36)
|
75
78
|
|
76
79
|
PLATFORMS
|
77
80
|
x86_64-darwin-20
|
data/lib/srx/segment.srx
CHANGED
@@ -5,7 +5,7 @@
|
|
5
5
|
<formathandle type="end" include="yes"></formathandle>
|
6
6
|
<formathandle type="isolated" include="no"></formathandle>
|
7
7
|
<okpsrx:options oneSegmentIncludesAll="no" trimLeadingWhitespaces="no" trimTrailingWhitespaces="no" useJavaRegex="yes" useIcu4JBreakRules="no" treatIsolatedCodesAsWhitespace="no"></okpsrx:options>
|
8
|
-
<okpsrx:sample language="nl" useMappedRules="yes">
|
8
|
+
<okpsrx:sample language="nl" useMappedRules="yes"> ON! is een omroep.</okpsrx:sample>
|
9
9
|
<okpsrx:rangeRule></okpsrx:rangeRule>
|
10
10
|
</header>
|
11
11
|
<body>
|
@@ -1164,7 +1164,7 @@
|
|
1164
1164
|
<afterbreak>D\.?</afterbreak>
|
1165
1165
|
</rule>
|
1166
1166
|
<rule break="no">
|
1167
|
-
<beforebreak>\b([Aa]vg|[Ee]d|pp|[Vv]iz|i\.?[\s\u00A0]*e|[Vvol]|[Rr]col|maj|Lt|[Ff]ig|[Ff]igs|[Vv]iz|[Vv]ols|[Aa]pprox|[Ii]ncl?|[Aa]cc|Pres|[Cc]orp|[Ee]x|[Cc]onn|[Dd]ept|[Ll]tda|[Mm]in|[Mm]ax|[Gg]ovt|[Rr]etd|Ing|lb|lbf|ft|c\.?[\s\u00A0]*f|vs|dia|lbs|\d+-(:?oz|kc|in|h[rp]|ml)|M?sec)\.[\s\u00A0]</beforebreak>
|
1167
|
+
<beforebreak>\b([Aa]vg|[Ee]d|pp|[Vv]iz|i\.?[\s\u00A0]*e|[Vvol]|[Rr]col|maj|Lt|[Ff]ig|[Ff]igs|[Vv]iz|[Vv]ols|[Aa]pprox|[Aa]cq|[Ii]ncl?|[Ee]xcl|[Aa]cc|Pres|[Cc]orp|[Ee]x|[Cc]onn|[Dd]ept|[Ll]tda|[Mm]in|[Mm]ax|[Gg]ovt|[Rr]etd|Ing|lb|lbf|ft|c\.?[\s\u00A0]*f|vs|dia|lbs|\d+-(:?oz|kc|in|h[rp]|ml)|M?sec)\.[\s\u00A0]</beforebreak>
|
1168
1168
|
<afterbreak>[^\p{Lu}]|I</afterbreak>
|
1169
1169
|
</rule>
|
1170
1170
|
<rule break="no">
|
@@ -1273,7 +1273,7 @@
|
|
1273
1273
|
<afterbreak></afterbreak>
|
1274
1274
|
</rule>
|
1275
1275
|
<rule break="no">
|
1276
|
-
<beforebreak>\bLL\.[\s\u00A0]?[
|
1276
|
+
<beforebreak>\bLL\.[\s\u00A0]?[BMD]\.[\s\u00A0]</beforebreak>
|
1277
1277
|
<afterbreak></afterbreak>
|
1278
1278
|
</rule>
|
1279
1279
|
<rule break="no">
|
@@ -1282,7 +1282,7 @@
|
|
1282
1282
|
</rule>
|
1283
1283
|
<rule break="no">
|
1284
1284
|
<beforebreak>\bLL\.[\s\u00A0]?</beforebreak>
|
1285
|
-
<afterbreak>[
|
1285
|
+
<afterbreak>[BMD]\.?</afterbreak>
|
1286
1286
|
</rule>
|
1287
1287
|
<rule break="no">
|
1288
1288
|
<beforebreak>\b[BM]\.[\s\u00A0]?</beforebreak>
|
@@ -1329,7 +1329,11 @@
|
|
1329
1329
|
<afterbreak></afterbreak>
|
1330
1330
|
</rule>
|
1331
1331
|
<rule break="no">
|
1332
|
-
<beforebreak>\
|
1332
|
+
<beforebreak>\b[cC]orp\.[\s\u00A0]</beforebreak>
|
1333
|
+
<afterbreak></afterbreak>
|
1334
|
+
</rule>
|
1335
|
+
<rule break="no">
|
1336
|
+
<beforebreak>\b[Rr]eg\.[\s\u00A0]</beforebreak>
|
1333
1337
|
<afterbreak></afterbreak>
|
1334
1338
|
</rule>
|
1335
1339
|
<rule break="no">
|
@@ -1540,6 +1544,19 @@
|
|
1540
1544
|
</rule>
|
1541
1545
|
</languagerule>
|
1542
1546
|
<languagerule languagerulename="Dutch">
|
1547
|
+
<rule break="no">
|
1548
|
+
<beforebreak>\sart\.\s</beforebreak>
|
1549
|
+
<afterbreak>[IVX]+[ .]</afterbreak>
|
1550
|
+
</rule>
|
1551
|
+
<!--Do not break after abbreviation of type a.b.c.-->
|
1552
|
+
<rule break="no">
|
1553
|
+
<beforebreak>\s([a-z]\.){2,10}\s</beforebreak>
|
1554
|
+
<afterbreak></afterbreak>
|
1555
|
+
</rule>
|
1556
|
+
<rule break="yes">
|
1557
|
+
<beforebreak>[ ]is[.][ ]</beforebreak>
|
1558
|
+
<afterbreak>[0-9]\.($|[ ])</afterbreak>
|
1559
|
+
</rule>
|
1543
1560
|
<rule break="yes">
|
1544
1561
|
<beforebreak>(^| )O\.\s</beforebreak>
|
1545
1562
|
<afterbreak>([A-Z][a-z]{1,3}[ ,:;.!?]|Indië|Wanneer|Kunnen|Sorry)</afterbreak>
|
@@ -1578,7 +1595,7 @@
|
|
1578
1595
|
<afterbreak>\p{Ll}</afterbreak>
|
1579
1596
|
</rule>
|
1580
1597
|
<rule break="yes">
|
1581
|
-
<beforebreak>\s(la|do|del)\sMar\.\s</beforebreak>
|
1598
|
+
<beforebreak>\s(la|do|del?)\sMar\.\s</beforebreak>
|
1582
1599
|
<afterbreak></afterbreak>
|
1583
1600
|
</rule>
|
1584
1601
|
<rule break="no">
|
@@ -1637,6 +1654,14 @@
|
|
1637
1654
|
<beforebreak>\b(geb|[Gg]em|get|gld|id|[Ii]ncl|ind|inf|ing|intern|[Ss]ec|inz|ir|jhr|jkvr)\.\s</beforebreak>
|
1638
1655
|
<afterbreak></afterbreak>
|
1639
1656
|
</rule>
|
1657
|
+
<rule break="yes">
|
1658
|
+
<beforebreak>\s(tel|red|min)\.\s</beforebreak>
|
1659
|
+
<afterbreak>[A-Z]</afterbreak>
|
1660
|
+
</rule>
|
1661
|
+
<rule break="yes">
|
1662
|
+
<beforebreak>\.(nl|be|com)\.\s</beforebreak>
|
1663
|
+
<afterbreak></afterbreak>
|
1664
|
+
</rule>
|
1640
1665
|
<rule break="no">
|
1641
1666
|
<beforebreak>\b(jl|jr|kr|kt|lic|ll|lt|lw|max|[Mm]evr|mi|[Mm]in|mld)\.\s</beforebreak>
|
1642
1667
|
<afterbreak></afterbreak>
|
@@ -1658,9 +1683,17 @@
|
|
1658
1683
|
<afterbreak>[A-Z]</afterbreak>
|
1659
1684
|
</rule>
|
1660
1685
|
<rule break="yes">
|
1686
|
+
<beforebreak>\svitamine [A-Z]\.\s</beforebreak>
|
1687
|
+
<afterbreak>[A-Z]</afterbreak>
|
1688
|
+
</rule>
|
1689
|
+
<rule break="yes">
|
1661
1690
|
<beforebreak>°C\.\s</beforebreak>
|
1662
1691
|
<afterbreak>[A-Z][a-z]</afterbreak>
|
1663
1692
|
</rule>
|
1693
|
+
<rule break="yes">
|
1694
|
+
<beforebreak>[A-Z]&[A-Z]\.\s</beforebreak>
|
1695
|
+
<afterbreak>[A-Z][a-z]</afterbreak>
|
1696
|
+
</rule>
|
1664
1697
|
<rule break="no">
|
1665
1698
|
<beforebreak>\b([A-Z]|Adr|Chr|Fr|Fred|IJ|Jac|Joh|Ph|St|Th|Tj|v|v\.(\s)?d)\.(\s)?</beforebreak>
|
1666
1699
|
<afterbreak>\p{Lu}</afterbreak>
|
@@ -1706,6 +1739,34 @@
|
|
1706
1739
|
<afterbreak>\p{L}\.\s</afterbreak>
|
1707
1740
|
</rule>
|
1708
1741
|
<rule break="no">
|
1742
|
+
<beforebreak>\set al\.\s</beforebreak>
|
1743
|
+
<afterbreak></afterbreak>
|
1744
|
+
</rule>
|
1745
|
+
<!--pa. as (wrong) abbrev for pag.-->
|
1746
|
+
<rule break="no">
|
1747
|
+
<beforebreak>\spa\.\s</beforebreak>
|
1748
|
+
<afterbreak>[0-9]</afterbreak>
|
1749
|
+
</rule>
|
1750
|
+
<!--op. as abbrev for opus-->
|
1751
|
+
<rule break="no">
|
1752
|
+
<beforebreak>\sop\.\s</beforebreak>
|
1753
|
+
<afterbreak>[0-9]|cit\.</afterbreak>
|
1754
|
+
</rule>
|
1755
|
+
<rule break="no">
|
1756
|
+
<beforebreak>\soa\.\s</beforebreak>
|
1757
|
+
<afterbreak>[a-z]</afterbreak>
|
1758
|
+
</rule>
|
1759
|
+
<!--al. as abbrev for alinea-->
|
1760
|
+
<rule break="no">
|
1761
|
+
<beforebreak>\sal\.\s</beforebreak>
|
1762
|
+
<afterbreak>[0-9]</afterbreak>
|
1763
|
+
</rule>
|
1764
|
+
<!--Break also when the next sentence has no capital-->
|
1765
|
+
<rule break="yes">
|
1766
|
+
<beforebreak>\s((is|op|in|af|ik|ze|om|me|je|na|nu|al|ja|VS|EU|er|we|tv|he|ga|hè|hé|TV|as|ei|SP|pc|wc|PC|IS|NS|ok|AD|OK|at|OM|cd|VN|it|EK|In|pa|AZ|up|IT|FM|VI|ui|la|CD|CV|pr|ie|cv|WW|GB|Jo|Aa|UK|HD|oa|VU))\.\s</beforebreak>
|
1767
|
+
<afterbreak></afterbreak>
|
1768
|
+
</rule>
|
1769
|
+
<rule break="no">
|
1709
1770
|
<beforebreak>\b\p{L}\.</beforebreak>
|
1710
1771
|
<afterbreak>\p{L}\.</afterbreak>
|
1711
1772
|
</rule>
|
@@ -1729,6 +1790,10 @@
|
|
1729
1790
|
<beforebreak>\b\p{Lu}\.\p{Lu}\.\s</beforebreak>
|
1730
1791
|
<afterbreak></afterbreak>
|
1731
1792
|
</rule>
|
1793
|
+
<rule break="yes">
|
1794
|
+
<beforebreak>\s(op)\sX\.\s</beforebreak>
|
1795
|
+
<afterbreak></afterbreak>
|
1796
|
+
</rule>
|
1732
1797
|
<rule break="no">
|
1733
1798
|
<beforebreak>[^\.]\s[A-Z]\.\s</beforebreak>
|
1734
1799
|
<afterbreak></afterbreak>
|
@@ -1763,10 +1828,18 @@
|
|
1763
1828
|
<afterbreak></afterbreak>
|
1764
1829
|
</rule>
|
1765
1830
|
<rule break="no">
|
1766
|
-
<beforebreak
|
1831
|
+
<beforebreak>(^|\s)[A-Z].+!\s</beforebreak>
|
1767
1832
|
<afterbreak>[a-z]</afterbreak>
|
1768
1833
|
</rule>
|
1769
1834
|
<rule break="no">
|
1835
|
+
<beforebreak>\s[A-Z].+z\.\s</beforebreak>
|
1836
|
+
<afterbreak>[a-z]</afterbreak>
|
1837
|
+
</rule>
|
1838
|
+
<rule break="no">
|
1839
|
+
<beforebreak>\sart\.\s</beforebreak>
|
1840
|
+
<afterbreak>[0-9]</afterbreak>
|
1841
|
+
</rule>
|
1842
|
+
<rule break="no">
|
1770
1843
|
<beforebreak>\b(jan|mrt|mar|jun|jul|aug|sept|okt|sep|spt|nov|dec|.*opp)\.\s</beforebreak>
|
1771
1844
|
<afterbreak>[a-z]</afterbreak>
|
1772
1845
|
</rule>
|
@@ -4704,7 +4777,7 @@
|
|
4704
4777
|
<afterbreak>[XIV\d]+\b</afterbreak>
|
4705
4778
|
</rule>
|
4706
4779
|
<rule break="no">
|
4707
|
-
<beforebreak>\b([Ee]ds?|[Cc]oords?|\d+(r|n|t|è|é|a|rs|ns|es)|seg|masc|fem|sing|pl|adj|adv|g|kg|m|km|cm|ha|u|h|hrs|s|ss|alt|cant|cast|cert|com|dir|
|
4780
|
+
<beforebreak>\b(Ltd|[Ee]ds?|[Cc]oords?|\d+(r|n|t|è|é|a|rs|ns|es)|seg|masc|fem|sing|pl|adj|adv|g|kg|m|km|cm|ha|u|h|hrs|s|ss|alt|cant|cast|cert|com|dir|grs?|nom|parc|pres|set|Sr|Jr|Admón|Adm|Inc|Co|Hnos|Vda|[VU]d[s]?)\.[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
|
4708
4781
|
<afterbreak>[\-¡¿«»"'\u2018\u201C\p{Ps}\u2012\u2013\u2014\u2015\u2053]*\p{Ll}</afterbreak>
|
4709
4782
|
</rule>
|
4710
4783
|
<!-- Any word in acronyms like U.S.A.F or F. B. I. or C. or c.s.p. or p. e. -->
|
@@ -4854,7 +4927,7 @@
|
|
4854
4927
|
</rule>
|
4855
4928
|
<!-- Abbreviations that can finish sentences -->
|
4856
4929
|
<rule break="no">
|
4857
|
-
<beforebreak>\b([Ee]ds?|[Cc]oords
|
4930
|
+
<beforebreak>\b(Ltd|[Ee]ds?|[Cc]oords?|\d+(r|n|t|è|é|a|rs|ns|es)|seg|masc|fem|sing|pl|adj|adv|g|kg|m|km|cm|ha|u|h|hrs|s|ss|alt|cant|cast|cert|com|dir|grs?|nom|parc|pres|set|Sr|Jr|Admón|Adm|Inc|Co|Hnos|Vda|[VU]d[s]?)\.[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
|
4858
4931
|
<afterbreak>[\-¡¿«»"'\u2018\u201C\p{Ps}\u2012\u2013\u2014\u2015\u2053]*\p{Ll}</afterbreak>
|
4859
4932
|
</rule>
|
4860
4933
|
<!-- Any word in acronyms like U.S.A.F or F. B. I. or C. or c.s.p. or p. e. -->
|
@@ -5058,11 +5131,11 @@
|
|
5058
5131
|
</rule>
|
5059
5132
|
<!-- German abbreviations -->
|
5060
5133
|
<rule break="no">
|
5061
|
-
<beforebreak>\b(betr|Geb|Stk|ggü|Mag|mtl|[Pp]arl|Bsp|versch|[Dd]iesbzgl|[Dd]bzgl[Ss]tellv|d|Übers|usw|[Bb]zw|Ab[hkst]|[Aa]bzü?gl|\d+-tlg|tlg|
|
5134
|
+
<beforebreak>\b(betr|Geb|Stk|ggü|Mag|mtl|Flgh?|[Pp]arl|Bsp|versch|[Dd]iesbzgl|[Zz]ykl|[Dd]bzgl[Ss]tellv|d|Übers|usw|[Bb]zw|Ab[hkst]|[Ee]ig|[Aa]bzü?gl|\d+-tlg|tlg|[Gg]gfls|[Ff]achspr|[Ll]tda|[Ee]inschl|[Vv]mtl|[Ss]tellv|Ev|[Bb]ezgl|lit|Abzw|[Vv]sl|ahd|Akk|aktual|[Öö]ffentl|prof|allg|alltagsspr|altdt|alttest|amerikan|Anh|Ank|Anm|Art|[Aa]utom|Auftragsnr|Az|Bat|bayr|Bde?|bearb|Bed|Bem|bes|bez|wsl|vsl|Bez|Bhf|Blvd|[Bb]spw|btto|bw|Dtl|[Gg]esetzl|Dez|[Jj]gdfr|[Ee]ff)\.[\u00A0\s]{1,2}</beforebreak>
|
5062
5135
|
<afterbreak></afterbreak>
|
5063
5136
|
</rule>
|
5064
5137
|
<rule break="no">
|
5065
|
-
<beforebreak>\b(cts?|[Cc]a|chem|chin|Chr|cresc|[Dd]at|desgl|ders|dgl|Dipl|Dir?|Doz?|durchg|durchges|Dr|
|
5138
|
+
<beforebreak>\b(cts?|[Cc]a|chem|chin|Chr|cresc|[Dd]at|desgl|ders|dgl|Dipl|Dir?|Doz?|durchg|durchges|Dr|[Dd]t|ebd|Ed|[Ee]igt?l|akt|[Ee]ngl|Erg|al|et[cw]|Etw|ev|[Ee]vtl?|[Ee]xkl|Expl|Exz)\.[\u00A0\s]{1,2}</beforebreak>
|
5066
5139
|
<afterbreak></afterbreak>
|
5067
5140
|
</rule>
|
5068
5141
|
<rule break="no">
|
@@ -5086,7 +5159,7 @@
|
|
5086
5159
|
<afterbreak></afterbreak>
|
5087
5160
|
</rule>
|
5088
5161
|
<rule break="no">
|
5089
|
-
<beforebreak>\b(Part|Per[fs]|Pfd|Pl(ur)?|pl|Plusq|Pos|pp|Prä[ps]|Prät|Pro[vf]|rd|reg|resp|Rhld|rit|Sa|südl|Br|se[ln]|Sept|Sing|sign|So|sog|Sp|
|
5162
|
+
<beforebreak>\b(Part|Per[fs]|Pfd|Pl(ur)?|pl|Plusq|Pos|pp|Prä[ps]|Prät|Pro[vf]|rd|reg|resp|Rhld|rit|Sa|südl|Br|se[ln]|Sept|Sing|sign|So|sog|Sp|[Ss]td?|stacc|Str|stud|Subst|sva|svw|sZ)\.[\u00A0\s]{1,2}</beforebreak>
|
5090
5163
|
<afterbreak></afterbreak>
|
5091
5164
|
</rule>
|
5092
5165
|
<rule break="no">
|
@@ -5219,7 +5292,7 @@
|
|
5219
5292
|
</rule>
|
5220
5293
|
<rule break="no">
|
5221
5294
|
<beforebreak>\bLL\.\s?</beforebreak>
|
5222
|
-
<afterbreak>[
|
5295
|
+
<afterbreak>[BMD]\.?</afterbreak>
|
5223
5296
|
</rule>
|
5224
5297
|
<rule break="no">
|
5225
5298
|
<beforebreak>\b[BM]\.\s?</beforebreak>
|
@@ -5518,7 +5591,7 @@
|
|
5518
5591
|
<afterbreak></afterbreak>
|
5519
5592
|
</rule>
|
5520
5593
|
<rule break="no">
|
5521
|
-
<beforebreak>\bLL\.[\s\u00A0]?[
|
5594
|
+
<beforebreak>\bLL\.[\s\u00A0]?[BMD]\.[\s\u00A0]</beforebreak>
|
5522
5595
|
<afterbreak></afterbreak>
|
5523
5596
|
</rule>
|
5524
5597
|
<rule break="no">
|
@@ -5527,7 +5600,7 @@
|
|
5527
5600
|
</rule>
|
5528
5601
|
<rule break="no">
|
5529
5602
|
<beforebreak>\bLL\.[\s\u00A0]?</beforebreak>
|
5530
|
-
<afterbreak>[
|
5603
|
+
<afterbreak>[BMD]\.?</afterbreak>
|
5531
5604
|
</rule>
|
5532
5605
|
<rule break="no">
|
5533
5606
|
<beforebreak>\b[BM]\.[\s\u00A0]?</beforebreak>
|
@@ -5583,6 +5656,14 @@
|
|
5583
5656
|
<afterbreak>\p{Lu}\p{Ll}</afterbreak>
|
5584
5657
|
</rule>
|
5585
5658
|
</languagerule>
|
5659
|
+
|
5660
|
+
<languagerule languagerulename="Crimean Tatar">
|
5661
|
+
<rule break="no">
|
5662
|
+
<beforebreak>\b[0-9]+(\.|:)[0-9][0-9][\s\u00A0\u202F]</beforebreak>
|
5663
|
+
<afterbreak></afterbreak>
|
5664
|
+
</rule>
|
5665
|
+
</languagerule>
|
5666
|
+
|
5586
5667
|
<languagerule languagerulename="Ukrainian">
|
5587
5668
|
<!-- when sentence starts with ellipsis: ...Мазій і Юхим теж. -->
|
5588
5669
|
<rule break="no">
|
@@ -5783,7 +5864,11 @@
|
|
5783
5864
|
<!-- статус правових держав. — Авт.). -->
|
5784
5865
|
<rule break="no">
|
5785
5866
|
<beforebreak></beforebreak>
|
5786
|
-
<afterbreak>[\h\v]*[‐-―-][\h\v]*([Рр]ед|[Аа]вт)[\h\v]
|
5867
|
+
<afterbreak>[\h\v]*[‐-―-][\h\v]*([Рр]ед|[Аа]вт)\.[\h\v]*[\)\]]</afterbreak>
|
5868
|
+
</rule>
|
5869
|
+
<rule break="no">
|
5870
|
+
<beforebreak>\b([Рр]ед)\.[\h\v]*</beforebreak>
|
5871
|
+
<afterbreak>[А-ЯІЇЄҐ]</afterbreak>
|
5787
5872
|
</rule>
|
5788
5873
|
<!-- Цензор.НЕТ -->
|
5789
5874
|
<rule break="no">
|
@@ -6829,6 +6914,7 @@
|
|
6829
6914
|
<languagemap languagepattern="(ML|ml).*" languagerulename="Generic"></languagemap>
|
6830
6915
|
<languagemap languagepattern="(TL|tl).*" languagerulename="Generic"></languagemap>
|
6831
6916
|
<languagemap languagepattern="(AST|ast).*" languagerulename="Generic"></languagemap>
|
6917
|
+
<languagemap languagepattern="(CRH|crh).*" languagerulename="Generic"></languagemap>
|
6832
6918
|
<languagemap languagepattern=".*" languagerulename="Default"></languagemap>
|
6833
6919
|
</maprules>
|
6834
6920
|
</body>
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: srx-languagetool
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.13.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aaron Madlon-Kay
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-03-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: srx
|