srx-languagetool 0.12.0 → 0.14.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b095dc51541d60d446b063cdff16940d7840e7d51891e1f5201117d42089b0da
4
- data.tar.gz: 5376dce7bda3c17a142477a3b19f629e65527c4fb96d540bfa1a853f0ed394f8
3
+ metadata.gz: a71886af9137758228b7fdbb92b1e0c9e8f64c1b3456597660186b43379bf098
4
+ data.tar.gz: 621f91e2be6bc34a564259f61953b56e02cb57aeaa7c68fdea2770f39ee2631a
5
5
  SHA512:
6
- metadata.gz: eb9f0aac7af1e2345842bdaf21917556429d8c887d356430c4c707b18b82d0a54cc46a5b6344247de27113f3ad0730f0d3fa37012f4a7c5c7cf4987dcedcf271
7
- data.tar.gz: 3370f8761982aa574c862f32d9a2702451cb992fa35dd0f842a4116224d448c7a2dd247be0ac2c61cace69f72364963c9072bd72e3fc2c46a3bc948996c8fc61
6
+ metadata.gz: 2dd8b533adbd82f274f492b1243330554a35a8e98d88af8a7790d2aaef6b2ebb85c0a3f0fc4bb87964b9a0135a5efd65554dbdae0ab8569e0838dbb2e5f504ef
7
+ data.tar.gz: d5270d27a3dba9622b84ebfb1a6874c5eaeb655cdc933e61001ae9827fbfd0158bd7ebe7da4491145d8acc1273501aa7aee2c687438f957026a3c16c5273c2a3
@@ -6,14 +6,14 @@ jobs:
6
6
  build:
7
7
  runs-on: ubuntu-latest
8
8
  steps:
9
- - uses: actions/checkout@v2
9
+ - uses: actions/checkout@v4
10
10
  - name: Set up Ruby
11
11
  uses: ruby/setup-ruby@v1
12
12
  with:
13
- ruby-version: 2.7.7
13
+ ruby-version: 3.2.3
14
14
  - name: Install
15
15
  run: |
16
- gem install bundler -v 2.4.10
16
+ gem install bundler -v 2.5.7
17
17
  bundle install
18
18
  - name: Type check
19
19
  run: bundle exec solargraph typecheck --level typed
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 2.7.7
1
+ 3.2.3
data/CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.14.0] - 2024-09-27
4
+
5
+ - Update rules to LanguageTool 6.5
6
+
7
+ ## [0.13.0] - 2024-03-29
8
+
9
+ - Update rules to LanguageTool 6.4
10
+
3
11
  ## [0.12.0] - 2023-10-07
4
12
 
5
13
  - Update rules to LanguageTool 6.3
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- srx-languagetool (0.12.0)
4
+ srx-languagetool (0.14.0)
5
5
  srx (< 1.0)
6
6
 
7
7
  GEM
@@ -9,74 +9,77 @@ GEM
9
9
  specs:
10
10
  ast (2.4.2)
11
11
  backport (1.2.0)
12
- benchmark (0.2.1)
12
+ benchmark (0.3.0)
13
13
  byebug (11.1.3)
14
- diff-lcs (1.5.0)
14
+ diff-lcs (1.5.1)
15
15
  e2mmap (0.1.0)
16
- jaro_winkler (1.5.4)
17
- json (2.6.3)
16
+ jaro_winkler (1.6.0)
17
+ json (2.7.2)
18
18
  kramdown (2.4.0)
19
19
  rexml
20
20
  kramdown-parser-gfm (1.1.0)
21
21
  kramdown (~> 2.0)
22
- minitest (5.18.0)
23
- nokogiri (1.14.2-x86_64-darwin)
22
+ language_server-protocol (3.17.0.3)
23
+ minitest (5.25.1)
24
+ nokogiri (1.16.7-x86_64-darwin)
24
25
  racc (~> 1.4)
25
- parallel (1.22.1)
26
- parser (3.2.1.1)
26
+ parallel (1.26.3)
27
+ parser (3.3.5.0)
27
28
  ast (~> 2.4.1)
28
- racc (1.6.2)
29
+ racc
30
+ racc (1.8.1)
29
31
  rainbow (3.1.1)
30
- rake (13.0.6)
31
- regexp_parser (2.7.0)
32
+ rake (13.2.1)
33
+ rbs (2.8.4)
34
+ regexp_parser (2.9.2)
32
35
  reverse_markdown (2.1.1)
33
36
  nokogiri
34
- rexml (3.2.5)
35
- rspec-expectations (3.12.2)
37
+ rexml (3.3.7)
38
+ rspec-expectations (3.13.3)
36
39
  diff-lcs (>= 1.2.0, < 2.0)
37
- rspec-support (~> 3.12.0)
38
- rspec-support (3.12.0)
39
- rubocop (1.48.1)
40
+ rspec-support (~> 3.13.0)
41
+ rspec-support (3.13.1)
42
+ rubocop (1.66.1)
40
43
  json (~> 2.3)
44
+ language_server-protocol (>= 3.17.0)
41
45
  parallel (~> 1.10)
42
- parser (>= 3.2.0.0)
46
+ parser (>= 3.3.0.2)
43
47
  rainbow (>= 2.2.2, < 4.0)
44
- regexp_parser (>= 1.8, < 3.0)
45
- rexml (>= 3.2.5, < 4.0)
46
- rubocop-ast (>= 1.26.0, < 2.0)
48
+ regexp_parser (>= 2.4, < 3.0)
49
+ rubocop-ast (>= 1.32.2, < 2.0)
47
50
  ruby-progressbar (~> 1.7)
48
51
  unicode-display_width (>= 2.4.0, < 3.0)
49
- rubocop-ast (1.28.0)
50
- parser (>= 3.2.1.0)
52
+ rubocop-ast (1.32.3)
53
+ parser (>= 3.3.1.0)
51
54
  ruby-progressbar (1.13.0)
52
- solargraph (0.48.0)
55
+ solargraph (0.50.0)
53
56
  backport (~> 1.2)
54
57
  benchmark
55
- bundler (>= 1.17.2)
58
+ bundler (~> 2.0)
56
59
  diff-lcs (~> 1.4)
57
60
  e2mmap
58
61
  jaro_winkler (~> 1.5)
59
62
  kramdown (~> 2.3)
60
63
  kramdown-parser-gfm (~> 1.1)
61
64
  parser (~> 3.0)
62
- reverse_markdown (>= 1.0.5, < 3)
63
- rubocop (>= 0.52)
65
+ rbs (~> 2.0)
66
+ reverse_markdown (~> 2.0)
67
+ rubocop (~> 1.38)
64
68
  thor (~> 1.0)
65
69
  tilt (~> 2.0)
66
70
  yard (~> 0.9, >= 0.9.24)
67
71
  srx (0.6.0)
68
72
  nokogiri (~> 1.11)
69
- thor (1.2.1)
70
- tilt (2.1.0)
71
- unicode-display_width (2.4.2)
72
- webrick (1.7.0)
73
- yard (0.9.28)
74
- webrick (~> 1.7.0)
73
+ thor (1.3.2)
74
+ tilt (2.4.0)
75
+ unicode-display_width (2.6.0)
76
+ yard (0.9.37)
75
77
 
76
78
  PLATFORMS
77
79
  x86_64-darwin-20
78
80
  x86_64-darwin-21
79
81
  x86_64-darwin-22
82
+ x86_64-darwin-23
80
83
 
81
84
  DEPENDENCIES
82
85
  byebug
@@ -88,4 +91,4 @@ DEPENDENCIES
88
91
  srx-languagetool!
89
92
 
90
93
  BUNDLED WITH
91
- 2.4.10
94
+ 2.5.7
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Srx
4
4
  module Languagetool
5
- VERSION = '0.12.0'
5
+ VERSION = '0.14.0'
6
6
  end
7
7
  end
data/lib/srx/segment.srx CHANGED
@@ -5,7 +5,7 @@
5
5
  <formathandle type="end" include="yes"></formathandle>
6
6
  <formathandle type="isolated" include="no"></formathandle>
7
7
  <okpsrx:options oneSegmentIncludesAll="no" trimLeadingWhitespaces="no" trimTrailingWhitespaces="no" useJavaRegex="yes" useIcu4JBreakRules="no" treatIsolatedCodesAsWhitespace="no"></okpsrx:options>
8
- <okpsrx:sample language="nl" useMappedRules="yes">Wat God buiten Christus is. 2.</okpsrx:sample>
8
+ <okpsrx:sample language="nl" useMappedRules="yes"> ON! is een omroep.</okpsrx:sample>
9
9
  <okpsrx:rangeRule></okpsrx:rangeRule>
10
10
  </header>
11
11
  <body>
@@ -1107,6 +1107,14 @@
1107
1107
  </rule>
1108
1108
  </languagerule>
1109
1109
  <languagerule languagerulename="English">
1110
+ <rule break="no"><!-- https://www.seven.one/ -->
1111
+ <beforebreak>\b[Se]even\.</beforebreak>
1112
+ <afterbreak>[Oo]ne\b</afterbreak>
1113
+ </rule>
1114
+ <rule break="no">
1115
+ <beforebreak>\b[1-9]\.[\s\u00A0]</beforebreak>
1116
+ <afterbreak>[a-z]</afterbreak>
1117
+ </rule>
1110
1118
  <rule break="no">
1111
1119
  <beforebreak>[\u00A0\s]</beforebreak>
1112
1120
  <afterbreak>\n</afterbreak>
@@ -1164,7 +1172,7 @@
1164
1172
  <afterbreak>D\.?</afterbreak>
1165
1173
  </rule>
1166
1174
  <rule break="no">
1167
- <beforebreak>\b([Aa]vg|[Ee]d|pp|[Vv]iz|i\.?[\s\u00A0]*e|[Vvol]|[Rr]col|maj|Lt|[Ff]ig|[Ff]igs|[Vv]iz|[Vv]ols|[Aa]pprox|[Aa]cq|[Ii]ncl?|[Aa]cc|Pres|[Cc]orp|[Ee]x|[Cc]onn|[Dd]ept|[Ll]tda|[Mm]in|[Mm]ax|[Gg]ovt|[Rr]etd|Ing|lb|lbf|ft|c\.?[\s\u00A0]*f|vs|dia|lbs|\d+-(:?oz|kc|in|h[rp]|ml)|M?sec)\.[\s\u00A0]</beforebreak>
1175
+ <beforebreak>\b([Aa]vg|[Ee]d|pp|[Vv]iz|i\.?[\s\u00A0]*e|[Vvol]|[Rr]col|maj|Lt|[Ff]ig|[Ee]xt|[Ff]igs|[Vv]iz|[Vv]ols|[Aa]pprox|[Aa]cq|[Ii]ncl?|[Ee]xcl|[Aa]cc|Pres|[Cc]orp|[Ee]x|[Cc]onn|[Dd]ept|[Ll]tda|[Mm]in|[Mm]ax|[Gg]ovt|[Rr]etd|Ing|lb|lbf|ft|c\.?[\s\u00A0]*f|vs|dia|lbs|\d+-(:?oz|kc|in|h[rp]|ml)|M?sec)\.[\s\u00A0]</beforebreak>
1168
1176
  <afterbreak>[^\p{Lu}]|I</afterbreak>
1169
1177
  </rule>
1170
1178
  <rule break="no">
@@ -1273,7 +1281,7 @@
1273
1281
  <afterbreak></afterbreak>
1274
1282
  </rule>
1275
1283
  <rule break="no">
1276
- <beforebreak>\bLL\.[\s\u00A0]?[BM]\.[\s\u00A0]</beforebreak>
1284
+ <beforebreak>\bLL\.[\s\u00A0]?[BMD]\.[\s\u00A0]</beforebreak>
1277
1285
  <afterbreak></afterbreak>
1278
1286
  </rule>
1279
1287
  <rule break="no">
@@ -1282,7 +1290,7 @@
1282
1290
  </rule>
1283
1291
  <rule break="no">
1284
1292
  <beforebreak>\bLL\.[\s\u00A0]?</beforebreak>
1285
- <afterbreak>[BM]\.?</afterbreak>
1293
+ <afterbreak>[BMD]\.?</afterbreak>
1286
1294
  </rule>
1287
1295
  <rule break="no">
1288
1296
  <beforebreak>\b[BM]\.[\s\u00A0]?</beforebreak>
@@ -1329,7 +1337,11 @@
1329
1337
  <afterbreak></afterbreak>
1330
1338
  </rule>
1331
1339
  <rule break="no">
1332
- <beforebreak>\bCorp\.[\s\u00A0]</beforebreak>
1340
+ <beforebreak>\b[cC]orp\.[\s\u00A0]</beforebreak>
1341
+ <afterbreak></afterbreak>
1342
+ </rule>
1343
+ <rule break="no">
1344
+ <beforebreak>\b[Rr]eg\.[\s\u00A0]</beforebreak>
1333
1345
  <afterbreak></afterbreak>
1334
1346
  </rule>
1335
1347
  <rule break="no">
@@ -1540,6 +1552,15 @@
1540
1552
  </rule>
1541
1553
  </languagerule>
1542
1554
  <languagerule languagerulename="Dutch">
1555
+ <rule break="no">
1556
+ <beforebreak>\sart\.\s</beforebreak>
1557
+ <afterbreak>[IVX]+[ .]</afterbreak>
1558
+ </rule>
1559
+ <!--Do not break after abbreviation of type a.b.c.-->
1560
+ <rule break="no">
1561
+ <beforebreak>\s([a-z]\.){2,10}\s</beforebreak>
1562
+ <afterbreak></afterbreak>
1563
+ </rule>
1543
1564
  <rule break="yes">
1544
1565
  <beforebreak>[ ]is[.][ ]</beforebreak>
1545
1566
  <afterbreak>[0-9]\.($|[ ])</afterbreak>
@@ -1582,7 +1603,7 @@
1582
1603
  <afterbreak>\p{Ll}</afterbreak>
1583
1604
  </rule>
1584
1605
  <rule break="yes">
1585
- <beforebreak>\s(la|do|del)\sMar\.\s</beforebreak>
1606
+ <beforebreak>\s(la|do|del?)\sMar\.\s</beforebreak>
1586
1607
  <afterbreak></afterbreak>
1587
1608
  </rule>
1588
1609
  <rule break="no">
@@ -1641,6 +1662,14 @@
1641
1662
  <beforebreak>\b(geb|[Gg]em|get|gld|id|[Ii]ncl|ind|inf|ing|intern|[Ss]ec|inz|ir|jhr|jkvr)\.\s</beforebreak>
1642
1663
  <afterbreak></afterbreak>
1643
1664
  </rule>
1665
+ <rule break="yes">
1666
+ <beforebreak>\s(tel|red|min)\.\s</beforebreak>
1667
+ <afterbreak>[A-Z]</afterbreak>
1668
+ </rule>
1669
+ <rule break="yes">
1670
+ <beforebreak>\.(nl|be|com)\.\s</beforebreak>
1671
+ <afterbreak></afterbreak>
1672
+ </rule>
1644
1673
  <rule break="no">
1645
1674
  <beforebreak>\b(jl|jr|kr|kt|lic|ll|lt|lw|max|[Mm]evr|mi|[Mm]in|mld)\.\s</beforebreak>
1646
1675
  <afterbreak></afterbreak>
@@ -1662,6 +1691,10 @@
1662
1691
  <afterbreak>[A-Z]</afterbreak>
1663
1692
  </rule>
1664
1693
  <rule break="yes">
1694
+ <beforebreak>\svitamine [A-Z]\.\s</beforebreak>
1695
+ <afterbreak>[A-Z]</afterbreak>
1696
+ </rule>
1697
+ <rule break="yes">
1665
1698
  <beforebreak>°C\.\s</beforebreak>
1666
1699
  <afterbreak>[A-Z][a-z]</afterbreak>
1667
1700
  </rule>
@@ -1714,6 +1747,34 @@
1714
1747
  <afterbreak>\p{L}\.\s</afterbreak>
1715
1748
  </rule>
1716
1749
  <rule break="no">
1750
+ <beforebreak>\set al\.\s</beforebreak>
1751
+ <afterbreak></afterbreak>
1752
+ </rule>
1753
+ <!--pa. as (wrong) abbrev for pag.-->
1754
+ <rule break="no">
1755
+ <beforebreak>\spa\.\s</beforebreak>
1756
+ <afterbreak>[0-9]</afterbreak>
1757
+ </rule>
1758
+ <!--op. as abbrev for opus-->
1759
+ <rule break="no">
1760
+ <beforebreak>\sop\.\s</beforebreak>
1761
+ <afterbreak>[0-9]|cit\.</afterbreak>
1762
+ </rule>
1763
+ <rule break="no">
1764
+ <beforebreak>\soa\.\s</beforebreak>
1765
+ <afterbreak>[a-z]</afterbreak>
1766
+ </rule>
1767
+ <!--al. as abbrev for alinea-->
1768
+ <rule break="no">
1769
+ <beforebreak>\sal\.\s</beforebreak>
1770
+ <afterbreak>[0-9]</afterbreak>
1771
+ </rule>
1772
+ <!--Break also when the next sentence has no capital-->
1773
+ <rule break="yes">
1774
+ <beforebreak>\s((is|op|in|af|ik|ze|om|me|je|na|nu|al|ja|VS|EU|er|we|tv|he|ga|hè|hé|TV|as|ei|SP|pc|wc|PC|IS|NS|ok|AD|OK|at|OM|cd|VN|it|EK|In|pa|AZ|up|IT|FM|VI|ui|la|CD|CV|pr|ie|cv|WW|GB|Jo|Aa|UK|HD|oa|VU))\.\s</beforebreak>
1775
+ <afterbreak></afterbreak>
1776
+ </rule>
1777
+ <rule break="no">
1717
1778
  <beforebreak>\b\p{L}\.</beforebreak>
1718
1779
  <afterbreak>\p{L}\.</afterbreak>
1719
1780
  </rule>
@@ -1737,6 +1798,10 @@
1737
1798
  <beforebreak>\b\p{Lu}\.\p{Lu}\.\s</beforebreak>
1738
1799
  <afterbreak></afterbreak>
1739
1800
  </rule>
1801
+ <rule break="yes">
1802
+ <beforebreak>\s(op)\sX\.\s</beforebreak>
1803
+ <afterbreak></afterbreak>
1804
+ </rule>
1740
1805
  <rule break="no">
1741
1806
  <beforebreak>[^\.]\s[A-Z]\.\s</beforebreak>
1742
1807
  <afterbreak></afterbreak>
@@ -1771,10 +1836,18 @@
1771
1836
  <afterbreak></afterbreak>
1772
1837
  </rule>
1773
1838
  <rule break="no">
1774
- <beforebreak>\s[A-Z].+!\s</beforebreak>
1839
+ <beforebreak>(^|\s)[A-Z].+!\s</beforebreak>
1775
1840
  <afterbreak>[a-z]</afterbreak>
1776
1841
  </rule>
1777
1842
  <rule break="no">
1843
+ <beforebreak>\s[A-Z].+z\.\s</beforebreak>
1844
+ <afterbreak>[a-z]</afterbreak>
1845
+ </rule>
1846
+ <rule break="no">
1847
+ <beforebreak>\sart\.\s</beforebreak>
1848
+ <afterbreak>[0-9]</afterbreak>
1849
+ </rule>
1850
+ <rule break="no">
1778
1851
  <beforebreak>\b(jan|mrt|mar|jun|jul|aug|sept|okt|sep|spt|nov|dec|.*opp)\.\s</beforebreak>
1779
1852
  <afterbreak>[a-z]</afterbreak>
1780
1853
  </rule>
@@ -4688,6 +4761,19 @@
4688
4761
  <beforebreak>\.\[\d+\][\s\u00A0]</beforebreak>
4689
4762
  <afterbreak></afterbreak>
4690
4763
  </rule>
4764
+ <!-- unknown abbreviations inside parentheses -->
4765
+ <rule break="no">
4766
+ <beforebreak>\([^\)]*\.[\s\u00A0]</beforebreak>
4767
+ <afterbreak>[^\)\r\n]*\)</afterbreak>
4768
+ </rule>
4769
+ <rule break="no">
4770
+ <beforebreak>\[[^\]]*\.[\s\u00A0]</beforebreak>
4771
+ <afterbreak>[^\]\r\n]*\]</afterbreak>
4772
+ </rule>
4773
+ <rule break="no">
4774
+ <beforebreak>\{[^\}]*\.[\s\u00A0]</beforebreak>
4775
+ <afterbreak>[^\}\r\n]*\}</afterbreak>
4776
+ </rule>
4691
4777
  <!-- initials: A. C. Jones. Problem: [...] d'Alfons I. Ell era [...] -->
4692
4778
  <rule break="no">
4693
4779
  <beforebreak>\b[A-ZÀÉÈÍÓÒÚ]\.[\s\u00A0]</beforebreak>
@@ -4695,7 +4781,7 @@
4695
4781
  </rule>
4696
4782
  <!-- Abbreviations that cannot finish sentences-->
4697
4783
  <rule break="no">
4698
- <beforebreak>\b(dc|(?iu)(n|Mr|C|Dr|Dra|Dra\. Ma|Sta\. Ma|E|Emm|Emma|Excm|Excma|Hble|I|Il·lm|Il·lma|Il·ltre|Im|Ima|Mgfc|Mgfca|Mn|R|Rev|Sr|Sra|Sres|Srs|St|Sta|a|abr|abs|acad|add|adj|adm|admdor|admdora|admtiu|admtiva|adv|ag|agl|agr|agron|agròn|aj|ajud|al|alim|amb|ampl|ant|ap|apmt|apnt|apr|aprox|apt|arm|arq|arqueol|arquit|assign|assoc|atm|aut|aux|av|b|batx|bda|bibl|bl|bnc|butll|bxs|c|calef|cartogr|cat|catedr|catol|cf|cia|cin|cint|circul|cit|climat|col|col·l|compt|cons|constr|cont|contr|conv|corp|corr|cpl|cpt|cró|ct|cte|ctra|cts|d|dept|derog|des|desp|dg|dip|disp|distr|div|dj|dl|doc|drec|ds|dt|dta|dte|dupl|dv|e|econ|ed|ef|entl|esc|esp|espf|esq|ex|exc|exp|exped|ext|f|fac|fca|febr|fig|figs|fra|gen|gov|gral|i|imp|impr|impt|inc|insp|inst|int|inv|j|jul|jur|jurispr|leg|llic|loc|ltda|làm|merc|mil·l|màx|mín|neg|nov|nre|núm|o|oct|op|p|pàg|pàgs|paq|par|pda|pg|pl|pobl|pol|ppda|ppt|pral|prev|prof|progr|prov|pta|ptes|ptge|pvt|pàg|quadr|quint|r|rbla|ref|reg|rev|secr|serv|sgt|sotsp|subsp|supl|supt|t|tel|telegr|tit|trad|trans|transcr|transf|trav|tripl|trv|tt|tèc|univ|urb|v|var|veg|venc|vid|vig|vocab|vs|x|àt|íd))\.[\s\u00A0]</beforebreak>
4784
+ <beforebreak>\b(dc|inst|(?iu)(n|Mr|C|Dr|Dra|Dra\. Ma|Sta\. Ma|E|Emm|Emma|Excm|Excma|Hble|I|Il·lm|Il·lma|Il·ltre|Im|Ima|Mgfc|Mgfca|Mn|R|Rev|Sr|Sra|Sres|Srs|St|Sta|a|abr|abs|acad|add|adj|adm|admdor|admdora|admtiu|admtiva|adv|ag|agl|agr|agron|agròn|aj|ajud|al|alim|amb|ampl|ant|ap|apmt|apnt|apr|aprox|apt|arm|arq|arqueol|arquit|assign|assoc|atm|aut|aux|av|b|batx|bda|bibl|bl|bnc|butll|bxs|c|calef|cartogr|cat|catedr|catol|cf|cia|cin|cint|circul|cit|climat|col|col·l|compt|cons|constr|cont|contr|conv|corp|corr|cpl|cpt|cró|ct|cte|ctra|cts|d|dept|derog|des|desp|dg|dip|disp|distr|div|dj|dl|doc|drec|ds|dt|dta|dte|dupl|dv|e|econ|ed|ef|entl|esc|esp|espf|esq|ex|exc|exp|exped|ext|f|fac|fca|febr|fig|figs|fra|gen|gov|gral|i|imp|impr|impt|inc|insp|inst|int|inv|j|jul|jur|jurispr|leg|llic|loc|ltda|làm|merc|mil·l|màx|mín|neg|nov|nre|núm|o|oct|op|p|pàg|pàgs|paq|par|pda|pg|pl|pobl|pol|ppda|ppt|pral|prev|prof|progr|prov|pta|ptes|ptge|pvt|pàg|quadr|quint|r|rbla|ref|reg|rev|secr|serv|sgt|sotsp|subsp|supl|supt|t|tel|telegr|tit|trad|trans|transcr|transf|trav|tripl|trv|tt|tèc|univ|urb|v|var|veg|venc|vid|vig|vocab|vs|x|àt|íd))\.[\s\u00A0]</beforebreak>
4699
4785
  <afterbreak></afterbreak>
4700
4786
  </rule>
4701
4787
  <!-- Abbreviations that can finish sentences -->
@@ -4750,12 +4836,12 @@
4750
4836
  </rule>
4751
4837
  <!-- Ellipsis: ... lowercase -->
4752
4838
  <rule break="no">
4753
- <beforebreak>[^\s\u00A0](\Q...\E|…)[\s\u00A0]</beforebreak>
4839
+ <beforebreak>[^\s\u00A0](\.\.\.|…)[\s\u00A0]</beforebreak>
4754
4840
  <afterbreak>\p{Ll}</afterbreak>
4755
4841
  </rule>
4756
4842
  <!-- (enum...) -->
4757
4843
  <rule break="no">
4758
- <beforebreak>\b(\Q...\E|…)[\p{Pe}»"’”][\s\u00A0]</beforebreak>
4844
+ <beforebreak>\b(\.\.\.|…)[\p{Pe}»"’”][\s\u00A0]</beforebreak>
4759
4845
  <afterbreak>\p{Ll}</afterbreak>
4760
4846
  </rule>
4761
4847
  <!-- pero ¡ah! no estaba
@@ -4779,6 +4865,19 @@
4779
4865
  </rule>
4780
4866
  </languagerule>
4781
4867
  <languagerule languagerulename="Spanish">
4868
+ <!-- unknown abbreviations inside parentheses -->
4869
+ <rule break="no">
4870
+ <beforebreak>\([^\)]*\.[\s\u00A0]</beforebreak>
4871
+ <afterbreak>[^\)\r\n]*\)</afterbreak>
4872
+ </rule>
4873
+ <rule break="no">
4874
+ <beforebreak>\[[^\]]*\.[\s\u00A0]</beforebreak>
4875
+ <afterbreak>[^\]\r\n]*\]</afterbreak>
4876
+ </rule>
4877
+ <rule break="no">
4878
+ <beforebreak>\{[^\}]*\.[\s\u00A0]</beforebreak>
4879
+ <afterbreak>[^\}\r\n]*\}</afterbreak>
4880
+ </rule>
4782
4881
  <rule break="no">
4783
4882
  <beforebreak>¿[^?]+:[\s\u00A0]</beforebreak>
4784
4883
  <afterbreak>.</afterbreak>
@@ -4802,12 +4901,12 @@
4802
4901
  </rule>
4803
4902
  <!-- Ellipsis: ... lowercase -->
4804
4903
  <rule break="no">
4805
- <beforebreak>[^\s\u00A0](\Q...\E|…)[\s\u00A0]</beforebreak>
4904
+ <beforebreak>[^\s\u00A0](\.\.\.|…)[\s\u00A0]</beforebreak>
4806
4905
  <afterbreak>\p{Ll}</afterbreak>
4807
4906
  </rule>
4808
4907
  <!-- (enum...) -->
4809
4908
  <rule break="no">
4810
- <beforebreak>\b(\Q...\E|…)[\p{Pe}»"’”][\s\u00A0]</beforebreak>
4909
+ <beforebreak>\b(\.\.\.|…)[\p{Pe}»"’”][\s\u00A0]</beforebreak>
4811
4910
  <afterbreak>\p{Ll}</afterbreak>
4812
4911
  </rule>
4813
4912
  <!-- Abbreviations that can finish sentences -->
@@ -4917,6 +5016,10 @@
4917
5016
  <beforebreak>\b(https?|ftp|file|chrome|chromium|android|(chrome|moz)\-extension):///?[A-Za-z0-9\-]+\.</beforebreak>
4918
5017
  <afterbreak>[A-Za-z0-9\-]+(\.|\b)</afterbreak>
4919
5018
  </rule>
5019
+ <rule break="no"><!-- https://www.seven.one/ -->
5020
+ <beforebreak>\b[Se]even\.</beforebreak>
5021
+ <afterbreak>[Oo]nes?\b</afterbreak>
5022
+ </rule>
4920
5023
  <rule break="no">
4921
5024
  <beforebreak>\b[A-Za-z0-9\-]+\.</beforebreak>
4922
5025
  <afterbreak>[A-Za-z0-9\-]+\.(com|net|org|info|de|es|edu|co|eu|nl|io|cn|uk|gov|biz|ca|tk|ru|br|jp|pl)(\.|\b)</afterbreak>
@@ -5026,7 +5129,7 @@
5026
5129
  </rule>
5027
5130
  <!-- don't split at cases like "Friedrich II. wird auch..." -->
5028
5131
  <rule break="no">
5029
- <beforebreak>[\u00A0\s ][IVX]+\.[\u00A0\s]{1,2}</beforebreak>
5132
+ <beforebreak>[\u00A0\s ][IVX]+\.[\u00A0\s]{1,2}</beforebreak>
5030
5133
  <afterbreak>[^\p{Lu}]+</afterbreak>
5031
5134
  </rule>
5032
5135
  <!-- don't split at cases like "im 13. oder 14. Jahrhundert" -->
@@ -5066,11 +5169,11 @@
5066
5169
  </rule>
5067
5170
  <!-- German abbreviations -->
5068
5171
  <rule break="no">
5069
- <beforebreak>\b(betr|Geb|Stk|ggü|Mag|mtl|[Pp]arl|Bsp|versch|[Dd]iesbzgl|[Dd]bzgl[Ss]tellv|d|Übers|usw|[Bb]zw|Ab[hkst]|[Aa]bzü?gl|\d+-tlg|tlg|ggfls|[Ll]tda|[Ee]inschl|[Vv]mtl|Ev|bezgl|lit|Abzw|[Vv]sl|ahd|Akk|aktual|[Öö]ffentl|prof|allg|alltagsspr|altdt|alttest|amerikan|Anh|Ank|Anm|Art|[Aa]utom|Auftragsnr|Az|Bat|bayr|Bde?|bearb|Bed|Bem|bes|bez|Bez|Bhf|Blvd|[Bb]spw|btto|bw|Dtl|[Gg]esetzl|Dez|[Jj]gdfr|[Ee]ff)\.[\u00A0\s]{1,2}</beforebreak>
5172
+ <beforebreak>\b(betr|Geb|Stk|ggü|Mag|mtl|Flgh?|[Pp]arl|Bsp|versch|[Dd]iesbzgl|[Zz]ykl|[Dd]bzgl[Ss]tellv|d|Übers|usw|[Bb]zw|Ab[hkst]|[Ee]ig|[Aa]bzü?gl|\d+-tlg|tlg|[Gg]gfls|[Ff]achspr|[Ll]tda|[Ee]inschl|[Vv]mtl|[Ss]tellv|Ev|[Bb]ezgl|lit|Abzw|[Vv]sl|ahd|Akk|aktual|[Öö]ffentl|prof|allg|alltagsspr|altdt|alttest|amerikan|Anh|Ank|Anm|Art|[Aa]utom|Auftragsnr|Az|Bat|bayr|Bde?|bearb|Bed|Bem|bes|bez|wsl|vsl|Bez|Bhf|Blvd|[Bb]spw|btto|bw|Dtl|[Gg]esetzl|Dez|[Jj]gdfr|[Ee]ff)\.[\u00A0\s]{1,2}</beforebreak>
5070
5173
  <afterbreak></afterbreak>
5071
5174
  </rule>
5072
5175
  <rule break="no">
5073
- <beforebreak>\b(cts?|[Cc]a|chem|chin|Chr|cresc|[Dd]at|desgl|ders|dgl|Dipl|Dir?|Doz?|durchg|durchges|Dr|dt|ebd|Ed|[Ee]igt?l|akt|[Ee]ngl|Erg|al|et[cw]|Etw|ev|[Ee]vtl?|[Ee]xkl|Expl|Exz)\.[\u00A0\s]{1,2}</beforebreak>
5176
+ <beforebreak>\b(cts?|[Cc]a|chem|chin|Chr|cresc|[Dd]at|desgl|ders|dgl|Dipl|Dir?|Doz?|durchg|durchges|Dr|[Dd]t|ebd|Ed|[Ee]igt?l|akt|[Ee]ngl|Erg|al|et[cw]|Etw|ev|[Ee]vtl?|[Ee]xkl|Expl|Exz)\.[\u00A0\s]{1,2}</beforebreak>
5074
5177
  <afterbreak></afterbreak>
5075
5178
  </rule>
5076
5179
  <rule break="no">
@@ -5094,7 +5197,7 @@
5094
5197
  <afterbreak></afterbreak>
5095
5198
  </rule>
5096
5199
  <rule break="no">
5097
- <beforebreak>\b(Part|Per[fs]|Pfd|Pl(ur)?|pl|Plusq|Pos|pp|Prä[ps]|Prät|Pro[vf]|rd|reg|resp|Rhld|rit|Sa|südl|Br|se[ln]|Sept|Sing|sign|So|sog|Sp|Std?|stacc|Str|stud|Subst|sva|svw|sZ)\.[\u00A0\s]{1,2}</beforebreak>
5200
+ <beforebreak>\b(Part|Per[fs]|Pfd|Pl(ur)?|pl|Plusq|Pos|pp|Prä[ps]|Prät|Pro[vf]|rd|reg|resp|Rhld|rit|Sa|südl|Br|se[ln]|Sept|Sing|sign|So|sog|Sp|[Ss]td?|stacc|Str|stud|Subst|sva|svw|sZ)\.[\u00A0\s]{1,2}</beforebreak>
5098
5201
  <afterbreak></afterbreak>
5099
5202
  </rule>
5100
5203
  <rule break="no">
@@ -5227,7 +5330,7 @@
5227
5330
  </rule>
5228
5331
  <rule break="no">
5229
5332
  <beforebreak>\bLL\.\s?</beforebreak>
5230
- <afterbreak>[BM]\.?</afterbreak>
5333
+ <afterbreak>[BMD]\.?</afterbreak>
5231
5334
  </rule>
5232
5335
  <rule break="no">
5233
5336
  <beforebreak>\b[BM]\.\s?</beforebreak>
@@ -5526,7 +5629,7 @@
5526
5629
  <afterbreak></afterbreak>
5527
5630
  </rule>
5528
5631
  <rule break="no">
5529
- <beforebreak>\bLL\.[\s\u00A0]?[BM]\.[\s\u00A0]</beforebreak>
5632
+ <beforebreak>\bLL\.[\s\u00A0]?[BMD]\.[\s\u00A0]</beforebreak>
5530
5633
  <afterbreak></afterbreak>
5531
5634
  </rule>
5532
5635
  <rule break="no">
@@ -5535,7 +5638,7 @@
5535
5638
  </rule>
5536
5639
  <rule break="no">
5537
5640
  <beforebreak>\bLL\.[\s\u00A0]?</beforebreak>
5538
- <afterbreak>[BM]\.?</afterbreak>
5641
+ <afterbreak>[BMD]\.?</afterbreak>
5539
5642
  </rule>
5540
5643
  <rule break="no">
5541
5644
  <beforebreak>\b[BM]\.[\s\u00A0]?</beforebreak>
@@ -5591,6 +5694,14 @@
5591
5694
  <afterbreak>\p{Lu}\p{Ll}</afterbreak>
5592
5695
  </rule>
5593
5696
  </languagerule>
5697
+
5698
+ <languagerule languagerulename="Crimean Tatar">
5699
+ <rule break="no">
5700
+ <beforebreak>\b[0-9]+(\.|:)[0-9][0-9][\s\u00A0\u202F]</beforebreak>
5701
+ <afterbreak></afterbreak>
5702
+ </rule>
5703
+ </languagerule>
5704
+
5594
5705
  <languagerule languagerulename="Ukrainian">
5595
5706
  <!-- when sentence starts with ellipsis: ...Мазій і Юхим теж. -->
5596
5707
  <rule break="no">
@@ -5651,7 +5762,7 @@
5651
5762
  </rule>
5652
5763
  <!-- Ів. Франко (але Ів Бутільє) -->
5653
5764
  <rule break="no">
5654
- <beforebreak>(^|[\h\v])(Ів|Дж)\.[\h\v]+</beforebreak>
5765
+ <beforebreak>(^|[\h\v])(Ів|Дж|Ол)\.[\h\v]+</beforebreak>
5655
5766
  <afterbreak>[А-ЯІЇЄҐA-Z]</afterbreak>
5656
5767
  </rule>
5657
5768
  <!-- Year: 2000 р.:
@@ -5742,7 +5853,7 @@
5742
5853
  </rule>
5743
5854
  <!-- abbreviation with proper noun: проф. Грицько, о. Лісове -->
5744
5855
  <rule break="no">
5745
- <beforebreak>\b(ап|[Аа]кад|[Пп]роф|[Дд]оц|[Аа]сист|[Рр]еж|[Аа]рх|[Сс]вв?|о|оз|ім|інж|дир|тов|упоряд|тт|чл\.-кор|[Пп]реп)\.[\h\v]*</beforebreak>
5856
+ <beforebreak>\b(ап|[Аа]кад|[Пп]роф|[Дд]оц|[Аа]сист|[Рр]еж|[Аа]рх|[Сс]вв?|о|оз|ім|інж|дир|тов|упоряд|тт|чл\.-кор|[Пп]реп|[сС]вт)\.[\h\v]*</beforebreak>
5746
5857
  <afterbreak>[\h\v]*[А-ЯІЇЄҐA-Z]</afterbreak>
5747
5858
  </rule>
5748
5859
  <rule break="no">
@@ -5760,8 +5871,8 @@
5760
5871
  </rule>
5761
5872
  <!-- TODO: арт. - артист -->
5762
5873
  <rule break="no">
5763
- <beforebreak>\b([Аа]рт|[Мм]ал|[Рр]ис)\.[\h\v]*</beforebreak>
5764
- <afterbreak>[\h\v]*[0-9]</afterbreak>
5874
+ <beforebreak>\b([Аа]рт|[Мм]ал|[Рр]ис|[Сс]пр)\.[\h\v]*</beforebreak>
5875
+ <afterbreak>[\h\v]*(№[\h\v]*)?[0-9]</afterbreak>
5765
5876
  </rule>
5766
5877
  <!-- ХІІ р., 3-6 арт., 2-3 тт. -->
5767
5878
  <rule break="no">
@@ -5791,7 +5902,11 @@
5791
5902
  <!-- статус правових держав. — Авт.). -->
5792
5903
  <rule break="no">
5793
5904
  <beforebreak></beforebreak>
5794
- <afterbreak>[\h\v]*[‐-―-][\h\v]*([Рр]ед|[Аа]вт)[\h\v]*\.[\)\]]</afterbreak>
5905
+ <afterbreak>[\h\v]*[‐-―-][\h\v]*([Рр]ед|[Аа]вт)\.[\h\v]*[\)\]]</afterbreak>
5906
+ </rule>
5907
+ <rule break="no">
5908
+ <beforebreak>\b([Рр]ед)\.[\h\v]*</beforebreak>
5909
+ <afterbreak>[А-ЯІЇЄҐ]</afterbreak>
5795
5910
  </rule>
5796
5911
  <!-- Цензор.НЕТ -->
5797
5912
  <rule break="no">
@@ -6282,7 +6397,7 @@
6282
6397
  </rule>
6283
6398
  <!-- Not break for ellipses (...) -->
6284
6399
  <rule break="no">
6285
- <beforebreak>[^\s](\Q...\E|…)\s</beforebreak>
6400
+ <beforebreak>[^\s](\.\.\.|…)\s</beforebreak>
6286
6401
  <afterbreak>\p{Ll}</afterbreak>
6287
6402
  </rule>
6288
6403
  <!-- z.B. "bla (...) blubb" -> without ending sentence -->
@@ -6503,7 +6618,7 @@
6503
6618
  <afterbreak></afterbreak>
6504
6619
  </rule>
6505
6620
  <rule break="no">
6506
- <beforebreak>[^\.]\s[ضصثقفغعهخحجچشسیبلاتنمکگ\ظطزرذدپوًٌٍَُِّْA-Z]\.\s</beforebreak>
6621
+ <beforebreak>[^\.]\s[ضصثقفغعهخحجچشسیبلاتنمکگ\ظطزرذدپوًٌٍَُِّْA-Z]\.\s</beforebreak>
6507
6622
  <afterbreak></afterbreak>
6508
6623
  </rule>
6509
6624
  <rule break="no">
@@ -6583,7 +6698,7 @@
6583
6698
  </rule>
6584
6699
  <!--Не раздвајај у случају као на пр.: "Петар I дошао је ..."-->
6585
6700
  <rule break="no">
6586
- <beforebreak>[\s ][IVX]+\s</beforebreak>
6701
+ <beforebreak>[\s ][IVX]+\s</beforebreak>
6587
6702
  <afterbreak>[^\p{Lu}]+</afterbreak>
6588
6703
  </rule>
6589
6704
  <!--Не раздвајај у случају као "од 13. до 14. века"-->
@@ -6837,6 +6952,7 @@
6837
6952
  <languagemap languagepattern="(ML|ml).*" languagerulename="Generic"></languagemap>
6838
6953
  <languagemap languagepattern="(TL|tl).*" languagerulename="Generic"></languagemap>
6839
6954
  <languagemap languagepattern="(AST|ast).*" languagerulename="Generic"></languagemap>
6955
+ <languagemap languagepattern="(CRH|crh).*" languagerulename="Generic"></languagemap>
6840
6956
  <languagemap languagepattern=".*" languagerulename="Default"></languagemap>
6841
6957
  </maprules>
6842
6958
  </body>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: srx-languagetool
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.12.0
4
+ version: 0.14.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aaron Madlon-Kay
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2023-10-07 00:00:00.000000000 Z
11
+ date: 2024-09-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: srx
@@ -24,7 +24,7 @@ dependencies:
24
24
  - - "<"
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.0'
27
- description:
27
+ description:
28
28
  email:
29
29
  - aaron@madlon-kay.com
30
30
  executables: []
@@ -58,7 +58,7 @@ metadata:
58
58
  source_code_uri: https://github.com/amake/srx-languagetool-ruby.git
59
59
  changelog_uri: https://github.com/amake/srx-languagetool-ruby/blob/master/CHANGELOG.md
60
60
  rubygems_mfa_required: 'true'
61
- post_install_message:
61
+ post_install_message:
62
62
  rdoc_options: []
63
63
  require_paths:
64
64
  - lib
@@ -73,8 +73,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
73
73
  - !ruby/object:Gem::Version
74
74
  version: '0'
75
75
  requirements: []
76
- rubygems_version: 3.1.6
77
- signing_key:
76
+ rubygems_version: 3.5.7
77
+ signing_key:
78
78
  specification_version: 4
79
79
  summary: SRX segmentation rules from LanguageTool
80
80
  test_files: []