srx-languagetool 0.6.0 → 0.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/main.yml +2 -2
- data/.ruby-version +1 -1
- data/CHANGELOG.md +9 -1
- data/Gemfile.lock +20 -18
- data/lib/srx/languagetool/version.rb +1 -1
- data/lib/srx/segment.srx +97 -50
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f73cebe4cfa7e771e3250f1f1dc13694b6450e19a4f9adf81769c5a75baa76d5
|
4
|
+
data.tar.gz: 35f29775c7d85150bc61551e9fb32adcb60cebc392d9d37e01d829b193a7464c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 548319e33a292724739e81eb28433594dfea9ec8e9e1fd78366d1847e601a5c6e1521e1fa639bd760fd33ee80d45190a83e92fdcb64d17efae730f5ac7958e6d
|
7
|
+
data.tar.gz: b55939faa805e0e5102a8610c4c89571f47c5002d3c7b7dfa26fa9f8785f83ce4083bec63abad845565091ddfad752c2ca6b9599a88be621e5618053ad5b0394
|
data/.github/workflows/main.yml
CHANGED
@@ -10,10 +10,10 @@ jobs:
|
|
10
10
|
- name: Set up Ruby
|
11
11
|
uses: ruby/setup-ruby@v1
|
12
12
|
with:
|
13
|
-
ruby-version: 2.7.
|
13
|
+
ruby-version: 2.7.6
|
14
14
|
- name: Install
|
15
15
|
run: |
|
16
|
-
gem install bundler -v 2.3.
|
16
|
+
gem install bundler -v 2.3.22
|
17
17
|
bundle install
|
18
18
|
- name: Type check
|
19
19
|
run: bundle exec solargraph typecheck --level typed
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.7.
|
1
|
+
2.7.6
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
srx-languagetool (0.
|
4
|
+
srx-languagetool (0.8.0)
|
5
5
|
srx (< 1.0)
|
6
6
|
|
7
7
|
GEM
|
@@ -14,40 +14,42 @@ GEM
|
|
14
14
|
diff-lcs (1.5.0)
|
15
15
|
e2mmap (0.1.0)
|
16
16
|
jaro_winkler (1.5.4)
|
17
|
-
|
17
|
+
json (2.6.2)
|
18
|
+
kramdown (2.4.0)
|
18
19
|
rexml
|
19
20
|
kramdown-parser-gfm (1.1.0)
|
20
21
|
kramdown (~> 2.0)
|
21
|
-
minitest (5.
|
22
|
-
nokogiri (1.13.
|
22
|
+
minitest (5.16.3)
|
23
|
+
nokogiri (1.13.8-x86_64-darwin)
|
23
24
|
racc (~> 1.4)
|
24
25
|
parallel (1.22.1)
|
25
|
-
parser (3.1.1
|
26
|
+
parser (3.1.2.1)
|
26
27
|
ast (~> 2.4.1)
|
27
28
|
racc (1.6.0)
|
28
29
|
rainbow (3.1.1)
|
29
30
|
rake (13.0.6)
|
30
|
-
regexp_parser (2.
|
31
|
+
regexp_parser (2.6.0)
|
31
32
|
reverse_markdown (2.1.1)
|
32
33
|
nokogiri
|
33
34
|
rexml (3.2.5)
|
34
|
-
rspec-expectations (3.11.
|
35
|
+
rspec-expectations (3.11.1)
|
35
36
|
diff-lcs (>= 1.2.0, < 2.0)
|
36
37
|
rspec-support (~> 3.11.0)
|
37
|
-
rspec-support (3.11.
|
38
|
-
rubocop (1.
|
38
|
+
rspec-support (3.11.1)
|
39
|
+
rubocop (1.36.0)
|
40
|
+
json (~> 2.3)
|
39
41
|
parallel (~> 1.10)
|
40
|
-
parser (>= 3.1.
|
42
|
+
parser (>= 3.1.2.1)
|
41
43
|
rainbow (>= 2.2.2, < 4.0)
|
42
44
|
regexp_parser (>= 1.8, < 3.0)
|
43
|
-
rexml
|
44
|
-
rubocop-ast (>= 1.
|
45
|
+
rexml (>= 3.2.5, < 4.0)
|
46
|
+
rubocop-ast (>= 1.20.1, < 2.0)
|
45
47
|
ruby-progressbar (~> 1.7)
|
46
48
|
unicode-display_width (>= 1.4.0, < 3.0)
|
47
|
-
rubocop-ast (1.
|
49
|
+
rubocop-ast (1.21.0)
|
48
50
|
parser (>= 3.1.1.0)
|
49
51
|
ruby-progressbar (1.11.0)
|
50
|
-
solargraph (0.
|
52
|
+
solargraph (0.47.1)
|
51
53
|
backport (~> 1.2)
|
52
54
|
benchmark
|
53
55
|
bundler (>= 1.17.2)
|
@@ -65,10 +67,10 @@ GEM
|
|
65
67
|
srx (0.6.0)
|
66
68
|
nokogiri (~> 1.11)
|
67
69
|
thor (1.2.1)
|
68
|
-
tilt (2.0.
|
69
|
-
unicode-display_width (2.
|
70
|
+
tilt (2.0.11)
|
71
|
+
unicode-display_width (2.3.0)
|
70
72
|
webrick (1.7.0)
|
71
|
-
yard (0.9.
|
73
|
+
yard (0.9.28)
|
72
74
|
webrick (~> 1.7.0)
|
73
75
|
|
74
76
|
PLATFORMS
|
@@ -85,4 +87,4 @@ DEPENDENCIES
|
|
85
87
|
srx-languagetool!
|
86
88
|
|
87
89
|
BUNDLED WITH
|
88
|
-
2.3.
|
90
|
+
2.3.22
|
data/lib/srx/segment.srx
CHANGED
@@ -1159,7 +1159,7 @@
|
|
1159
1159
|
<afterbreak>D\.?</afterbreak>
|
1160
1160
|
</rule>
|
1161
1161
|
<rule break="no"><!-- min. -->
|
1162
|
-
<beforebreak>\b([Ee]d|pp|[Vv]iz|i\.?[\s\u00A0]*e|[Vvol]|[Rr]col|maj|Lt|[Ff]ig|[Ff]igs|[Vv]iz|[Vv]ols|[Aa]pprox|[Ii]ncl?|[Aa]cc|Pres|[Cc]orp|[Ee]x|[Cc]onn|[Dd]ept|[Mm]in|[Mm]ax|[Gg]ovt|[Rr]etd|lb|lbf|ft|c\.?[\s\u00A0]*f|vs|dia|lbs|\d+-(:?oz|kc|in|h[rp]|ml)|M?sec)\.[\s\u00A0]</beforebreak>
|
1162
|
+
<beforebreak>\b([Aa]vg|[Ee]d|pp|[Vv]iz|i\.?[\s\u00A0]*e|[Vvol]|[Rr]col|maj|Lt|[Ff]ig|[Ff]igs|[Vv]iz|[Vv]ols|[Aa]pprox|[Ii]ncl?|[Aa]cc|Pres|[Cc]orp|[Ee]x|[Cc]onn|[Dd]ept|[Ll]tda|[Mm]in|[Mm]ax|[Gg]ovt|[Rr]etd|Ing|lb|lbf|ft|c\.?[\s\u00A0]*f|vs|dia|lbs|\d+-(:?oz|kc|in|h[rp]|ml)|M?sec)\.[\s\u00A0]</beforebreak>
|
1163
1163
|
<afterbreak>[^\p{Lu}]|I</afterbreak>
|
1164
1164
|
</rule>
|
1165
1165
|
<rule break="no"><!-- hr. -->
|
@@ -1553,6 +1553,10 @@
|
|
1553
1553
|
<beforebreak>\b[A-Za-z0-9\-]+\.</beforebreak>
|
1554
1554
|
<afterbreak>[A-Za-z0-9\-]+\.(com|net|org|info|de|es|edu|co|eu|nl|io|cn|uk|gov|biz|ca|tk|ru|br|jp|pl)(\.|\b)</afterbreak>
|
1555
1555
|
</rule>
|
1556
|
+
<rule break="no"><!-- Abbreviated books of the Bible and biblical apocrypha-->
|
1557
|
+
<beforebreak>\b(Ge?n|Ex|Le?v|Nu?m|D(eu)?t|Jo?z|Ri|R[ei]cht|Sa?m|Ko?n|Kr[on]{0,2}|Neh?|Est?|Jb|Ps|Spr?|Pr[ed]{0,2}|H(oog)?l|Je?s|Je?r|Kl(aagl)?|Ez(ech)?|Da?n|Ho?s|Jl|Am|Ob|Mc|Mi[ch]{0,2}|Nah?|Hk|Hab|Zf|[SZ]ef|Ha?g|Zc|Zach|Ma?l|Ma?t|Mk|Mar|Lk|Jh|H(an)?d|Ro?m|Kor|Ga?l|Ef|Fp|Fil|Ko|[CK]ol|Th|Th?e[s]{1,2}|Tm|Ti?t|Fm|Fil(em)?|Hb|Hebr?|Jk|Ja[ck]|Pe?tr?|Joh|Jud|Op(enb)?|Wijsh|Tob|Sir|Bar|Makk)\.\s</beforebreak>
|
1558
|
+
<afterbreak></afterbreak>
|
1559
|
+
</rule>
|
1556
1560
|
<rule break="no">
|
1557
1561
|
<beforebreak>\b(Drs|Art|Afr|Am|Ar|Br|Cie|Comp|Dhr|([Pp]rof\.)?[Dd]r|Em|Fa|Kon|Bros|Stb)\.\s</beforebreak>
|
1558
1562
|
<afterbreak></afterbreak>
|
@@ -1578,7 +1582,7 @@
|
|
1578
1582
|
<afterbreak></afterbreak>
|
1579
1583
|
</rule>
|
1580
1584
|
<rule break="no">
|
1581
|
-
<beforebreak>\b(arch|archeol|art|bc|betr|bez|bibl|bijl|
|
1585
|
+
<beforebreak>\b(arch|archeol|art|bc|bep|betr|bez|bibl|bijl|[Bb]ijv)\.\s</beforebreak>
|
1582
1586
|
<afterbreak></afterbreak>
|
1583
1587
|
</rule>
|
1584
1588
|
<rule break="no">
|
@@ -1590,15 +1594,15 @@
|
|
1590
1594
|
<afterbreak></afterbreak>
|
1591
1595
|
</rule>
|
1592
1596
|
<rule break="no">
|
1593
|
-
<beforebreak>\b(ed|em|enz|etc|ev|
|
1597
|
+
<beforebreak>\b(ed|em|enz|etc|ev|[Ee]xcl|fa|fam|fig|fin|fl|fr.)\.\s</beforebreak>
|
1594
1598
|
<afterbreak></afterbreak>
|
1595
1599
|
</rule>
|
1596
1600
|
<rule break="no">
|
1597
|
-
<beforebreak>\b(geb|get|gld|id|
|
1601
|
+
<beforebreak>\b(geb|[Gg]em|get|gld|id|[Ii]ncl|ind|inf|ing|intern|inz|ir|jhr|jkvr)\.\s</beforebreak>
|
1598
1602
|
<afterbreak></afterbreak>
|
1599
1603
|
</rule>
|
1600
1604
|
<rule break="no">
|
1601
|
-
<beforebreak>\b(jl|jr|kr|kt|lab|lic|ll|lt|lw|max|mi|
|
1605
|
+
<beforebreak>\b(jl|jr|kr|kt|lab|lic|ll|lt|lw|max|mevr|mi|[Mm]in|mld)\.\s</beforebreak>
|
1602
1606
|
<afterbreak></afterbreak>
|
1603
1607
|
</rule>
|
1604
1608
|
<rule break="no">
|
@@ -1606,11 +1610,11 @@
|
|
1606
1610
|
<afterbreak></afterbreak>
|
1607
1611
|
</rule>
|
1608
1612
|
<rule break="no">
|
1609
|
-
<beforebreak>\b(opm|org|ov|pag|par|penn|plm|plv)\.\s</beforebreak>
|
1613
|
+
<beforebreak>\b(opm|org|ov|pag|par|penn|([1-3][\.e]?)[\s]?pers|plm|plv)\.\s</beforebreak>
|
1610
1614
|
<afterbreak></afterbreak>
|
1611
1615
|
</rule>
|
1612
1616
|
<rule break="no">
|
1613
|
-
<beforebreak>\b(prov|pseud|qty|red|ref|resp|soc|st|tab|tel|temp|tk)\.\s</beforebreak>
|
1617
|
+
<beforebreak>\b(prov|pseud|psych|qty|red|ref|resp|soc|st|tab|tel|temp|tk)\.\s</beforebreak>
|
1614
1618
|
<afterbreak></afterbreak>
|
1615
1619
|
</rule>
|
1616
1620
|
<rule break="no">
|
@@ -1622,7 +1626,7 @@
|
|
1622
1626
|
<afterbreak>Chr</afterbreak>
|
1623
1627
|
</rule>
|
1624
1628
|
<rule break="no">
|
1625
|
-
<beforebreak>\b(uitsl|ver|vgl|vnl|vnw|voorz|ww|zat|
|
1629
|
+
<beforebreak>\b(uitsl|ver|vgl|vnl|vnw|voorz|ww|zat|[Zz]elfst|zgn?)\.\s</beforebreak>
|
1626
1630
|
<afterbreak></afterbreak>
|
1627
1631
|
</rule>
|
1628
1632
|
<rule break="no">
|
@@ -4373,7 +4377,7 @@
|
|
4373
4377
|
<afterbreak>\p{Ll}</afterbreak>
|
4374
4378
|
</rule>
|
4375
4379
|
<rule break="no">
|
4376
|
-
<beforebreak>\b(
|
4380
|
+
<beforebreak>\b(уд|ул|уч|физ|х|хор|э|Эл|эл)\.\s</beforebreak>
|
4377
4381
|
<afterbreak></afterbreak>
|
4378
4382
|
</rule>
|
4379
4383
|
<rule break="no">
|
@@ -4717,6 +4721,12 @@
|
|
4717
4721
|
</rule>
|
4718
4722
|
</languagerule>
|
4719
4723
|
<languagerule languagerulename="Spanish">
|
4724
|
+
|
4725
|
+
<rule break="no">
|
4726
|
+
<beforebreak>¿[^?]+:[\s\u00A0]</beforebreak>
|
4727
|
+
<afterbreak>.</afterbreak>
|
4728
|
+
</rule>
|
4729
|
+
|
4720
4730
|
<rule break="no">
|
4721
4731
|
<beforebreak>Yahoo![\s\u00A0]</beforebreak>
|
4722
4732
|
<afterbreak>\p{Ll}</afterbreak>
|
@@ -4773,6 +4783,10 @@
|
|
4773
4783
|
</rule>
|
4774
4784
|
<!-- Abbreviations that cannot finish sentences-->
|
4775
4785
|
<rule break="no">
|
4786
|
+
<beforebreak>\b((?iu)(en|febr|mzo|abr|my|jun|jul|ag|agt|set|sept|setbre|oct|nov|novbre|dic|dicbre))\.[\s\u00A0]</beforebreak>
|
4787
|
+
<afterbreak/>
|
4788
|
+
</rule>
|
4789
|
+
<rule break="no">
|
4776
4790
|
<beforebreak>\b(dc|(?iu)(n|[Aa]yto|Mr|C|Dr|Dra|E|Emm|Emma|Excm|Excma|Hble|I|Il·lm|Il·lma|Il·ltre|Im|Ima|Mgfc|Mgfca|Mn|R|Rev|Sr|Sra|Sres|Sras|Srs|St|Sta|a|abr|abs|acad|add|adj|adm|admdor|admdora|admtiu|admtiva|adv|ag|agl|agr|agron|agròn|aj|ajud|al|alim|amb|ampl|ant|ap|apmt|apnt|apr|aprox|apt|arm|arq|arqueol|arquit|assign|assoc|atm|aut|aux|av|b|batx|bda|bibl|bl|bnc|butll|bxs|c|calef|cartogr|cat|catedr|catol|cf|cia|cin|cint|circul|cit|climat|col|col·l|compt|cons|constr|cont|contr|conv|corp|corr|cpl|cpt|cró|ct|cte|ctra|cts|d|dept|derog|des|desp|dg|dip|disp|distr|div|dj|dl|doc|drec|ds|dt|dta|dte|dupl|dv|e|econ|ed|ef|entl|esc|esp|espf|esq|ex|exc|exp|exped|ext|f|fac|fca|febr|fig|figs|fra|gen|gov|gral|i|imp|impr|impt|inc|insp|inst|int|inv|j|jul|jur|jurispr|leg|llic|loc|ltda|làm|merc|mil·l|màx|mín|neg|nov|nre|núm|o|oct|op|p|pàg|pàgs|paq|par|pda|pg|pl|pobl|pol|ppda|ppt|pral|prev|prof|progr|prov|pta|ptes|ptge|pvt|pàg|quadr|quint|r|rbla|ref|reg|rev|secr|serv|sgt|sotsp|subsp|supl|supt|t|tel|telegr|tit|trad|trans|transcr|transf|trav|tripl|trv|tt|tèc|univ|urb|v|var|veg|venc|vid|vig|vocab|vs|x|àt|íd))\.[\s\u00A0]</beforebreak>
|
4777
4791
|
<afterbreak/>
|
4778
4792
|
</rule>
|
@@ -4794,7 +4808,7 @@
|
|
4794
4808
|
</rule>
|
4795
4809
|
<!-- Abbreviations that can finish sentences -->
|
4796
4810
|
<rule break="no">
|
4797
|
-
<beforebreak>\b([Ee]ds?|[Cc]oords?|grs?|Sr|Jr|Admón|Inc|Co|Hnos|Vda|[
|
4811
|
+
<beforebreak>\b([Ee]ds?|[Cc]oords?|grs?|Sr|Jr|Admón|Inc|Co|Hnos|Vda|[VUuv]d[s]?)\.[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
|
4798
4812
|
<afterbreak>[\-¡¿«»"'\u2018\u201C\p{Ps}\u2012\u2013\u2014\u2015\u2053]*\p{Ll}</afterbreak>
|
4799
4813
|
</rule>
|
4800
4814
|
<!-- Any word in acronyms like U.S.A.F or F. B. I. or C. or c.s.p. or p. e. -->
|
@@ -4860,14 +4874,18 @@
|
|
4860
4874
|
</rule>
|
4861
4875
|
<!-- Split at e.g. "1a. Und ..." -->
|
4862
4876
|
<rule break="yes">
|
4863
|
-
<beforebreak>\d+[a-z]\.[\u00A0\s]</beforebreak>
|
4877
|
+
<beforebreak>\d+[a-z]\.[\u00A0\s]{1,2}</beforebreak>
|
4864
4878
|
<afterbreak>\p{Lu}</afterbreak>
|
4865
4879
|
</rule>
|
4866
4880
|
<!-- Don't split at e.g. "d. h." -->
|
4867
4881
|
<rule break="no">
|
4868
|
-
<beforebreak>[^-\p{L}'
|
4882
|
+
<beforebreak>[^-\p{L}'’/°]\p{L}[\.!?…]['|"|“|«|\)|\]|\}]?[\u00A0\s]</beforebreak>
|
4869
4883
|
<afterbreak></afterbreak>
|
4870
4884
|
</rule>
|
4885
|
+
<rule break="no"><!-- special case: "Das 1. Internationale Filmfestival findet nächste Woche statt." -->
|
4886
|
+
<beforebreak>([Dd](as|er|ie|iese[rsmn]?|en|em)|[kmsd]?ein(e[rsnm]?)?|am|fürs|ins|zum|im|am|zur) \d+\.[\u00A0\s]+</beforebreak>
|
4887
|
+
<afterbreak>[A-ZÄÖÜ].*</afterbreak>
|
4888
|
+
</rule>
|
4871
4889
|
<rule break="no">
|
4872
4890
|
<beforebreak>Ust.</beforebreak><!-- needed for German rule UST_ID -->
|
4873
4891
|
<afterbreak>Id</afterbreak>
|
@@ -4889,8 +4907,12 @@
|
|
4889
4907
|
<afterbreak>3|4|Buzz|Crozz</afterbreak>
|
4890
4908
|
</rule>
|
4891
4909
|
<rule break="no">
|
4892
|
-
<beforebreak>[1-3]\.[\u00A0\s]</beforebreak>
|
4893
|
-
<afterbreak>Liga|Bundesliga|Fußball(-B|b)undesliga</afterbreak>
|
4910
|
+
<beforebreak>[1-3]\.[\u00A0\s]{1,2}</beforebreak>
|
4911
|
+
<afterbreak>Liga|Bundesliga|(Fußball|Handball|Basketball)(-B|b)undesliga</afterbreak>
|
4912
|
+
</rule>
|
4913
|
+
<rule break="no">
|
4914
|
+
<beforebreak>\d+\.[\u00A0\s]{1,2}</beforebreak>
|
4915
|
+
<afterbreak>Klässler[sn]?</afterbreak>
|
4894
4916
|
</rule>
|
4895
4917
|
<rule break="no">
|
4896
4918
|
<beforebreak>\bP[Hh]\.</beforebreak>
|
@@ -4904,43 +4926,43 @@
|
|
4904
4926
|
<!-- Don't split after a white-space followed by a single letter followed
|
4905
4927
|
by a dot followed by another whitespace. e.g. " p. " -->
|
4906
4928
|
<rule break="no">
|
4907
|
-
<beforebreak>[\u00A0\s]\p{L}\.[\u00A0\s]</beforebreak>
|
4929
|
+
<beforebreak>[\u00A0\s]\p{L}\.[\u00A0\s]{1,2}</beforebreak>
|
4908
4930
|
<afterbreak>\p{L}\.</afterbreak>
|
4909
4931
|
</rule>
|
4910
4932
|
<!-- Don't split at "bla bla... yada yada" -->
|
4911
4933
|
<rule break="no">
|
4912
|
-
<beforebreak>[\[\(]?\.\.\.[\]\)]?[\u00A0\s]</beforebreak>
|
4934
|
+
<beforebreak>[\[\(]?\.\.\.[\]\)]?[\u00A0\s]{1,2}</beforebreak>
|
4913
4935
|
<afterbreak>\p{Ll}</afterbreak>
|
4914
4936
|
</rule>
|
4915
4937
|
<!-- Don't split [.?!] when they're quoted -->
|
4916
4938
|
<rule break="no">
|
4917
|
-
<beforebreak>['"„][\.!?…]['"
|
4939
|
+
<beforebreak>['"„][\.!?…]['"“«»][\u00A0\s]{1,2}</beforebreak>
|
4918
4940
|
<afterbreak></afterbreak>
|
4919
4941
|
</rule>
|
4920
4942
|
<!-- Don't break after quote unless there's a capital letter
|
4921
4943
|
e.g.: "That's right!" he said. -->
|
4922
4944
|
<rule break="no">
|
4923
|
-
<beforebreak>["'
|
4945
|
+
<beforebreak>["'“«»][\u00A0\s]{1,2}</beforebreak>
|
4924
4946
|
<afterbreak>\p{Ll}</afterbreak>
|
4925
4947
|
</rule>
|
4926
4948
|
<!-- e.g. "Das ist . so." - assume one sentence. -->
|
4927
4949
|
<rule break="no">
|
4928
|
-
<beforebreak>[\u00A0\s]([\.!?]{1,3}|…)['|"|“|«|\)|\]|\}]?[\u00A0\s]</beforebreak>
|
4950
|
+
<beforebreak>[\u00A0\s]([\.!?]{1,3}|…)['|"|“|«|\)|\]|\}]?[\u00A0\s]{1,2}</beforebreak>
|
4929
4951
|
<afterbreak></afterbreak>
|
4930
4952
|
</rule>
|
4931
4953
|
<!-- Numbers, dates e.g. "3.10. datiert" -->
|
4932
4954
|
<rule break="no">
|
4933
|
-
<beforebreak>\b\d+\.[\u00A0\s]</beforebreak>
|
4955
|
+
<beforebreak>\b\d+\.[\u00A0\s]{1,2}</beforebreak>
|
4934
4956
|
<afterbreak>\p{Ll}|\p{Lu}{2,}</afterbreak>
|
4935
4957
|
</rule>
|
4936
4958
|
<!-- z.B. "Das hier ist ein(!) Satz." -->
|
4937
4959
|
<rule break="no">
|
4938
|
-
<beforebreak>[\(\[][!?]{1,3}[\]\)][\u00A0\s]</beforebreak>
|
4960
|
+
<beforebreak>[\(\[][!?]{1,3}[\]\)][\u00A0\s]{1,2}</beforebreak>
|
4939
4961
|
<afterbreak></afterbreak>
|
4940
4962
|
</rule>
|
4941
4963
|
<!-- z.B. "Das hier ist (genau!) ein Satz." -->
|
4942
4964
|
<rule break="no">
|
4943
|
-
<beforebreak>[!?]{1,3}[\)\]][\u00A0\s]</beforebreak>
|
4965
|
+
<beforebreak>[!?]{1,3}[\)\]][\u00A0\s]{1,2}</beforebreak>
|
4944
4966
|
<afterbreak></afterbreak>
|
4945
4967
|
</rule>
|
4946
4968
|
<!-- z.B. "bla (...) blubb" -> kein Satzende -->
|
@@ -4950,75 +4972,83 @@
|
|
4950
4972
|
</rule>
|
4951
4973
|
<!-- don't split at cases like "Friedrich II. wird auch..." -->
|
4952
4974
|
<rule break="no">
|
4953
|
-
<beforebreak>[\u00A0\s ][IVX]+\.[\u00A0\s]</beforebreak>
|
4975
|
+
<beforebreak>[\u00A0\s ][IVX]+\.[\u00A0\s]{1,2}</beforebreak>
|
4954
4976
|
<afterbreak>[^\p{Lu}]+</afterbreak>
|
4955
4977
|
</rule>
|
4956
4978
|
<!-- don't split at cases like "im 13. oder 14. Jahrhundert" -->
|
4957
4979
|
<rule break="no">
|
4958
|
-
<beforebreak>\d+\.[\u00A0\s]</beforebreak>
|
4980
|
+
<beforebreak>\d+\.[\u00A0\s]{1,2}</beforebreak>
|
4959
4981
|
<afterbreak>(und|oder|bis)[\u00A0\s]</afterbreak>
|
4960
4982
|
</rule>
|
4961
4983
|
<!-- einige deutsche Monate, vor denen eine Zahl erscheinen kann,
|
4962
4984
|
ohne dass eine Satzgrenze erkannt wird
|
4963
4985
|
(z.B. "am 13. Dezember" -> keine Satzgrenze) -->
|
4964
4986
|
<rule break="no">
|
4965
|
-
<beforebreak>\d+\.[\u00A0\s]</beforebreak>
|
4987
|
+
<beforebreak>\d+\.[\u00A0\s]{1,2}</beforebreak>
|
4966
4988
|
<afterbreak>Januar|Jänner|Februar|März|Merz|April|Mai|Ju[ln]i|August|September|Oktober|November|Dezember</afterbreak>
|
4967
4989
|
</rule>
|
4968
4990
|
<rule break="no">
|
4969
|
-
<beforebreak>\d+\.[\u00A0\s]</beforebreak>
|
4991
|
+
<beforebreak>\d+\.[\u00A0\s]{1,2}</beforebreak>
|
4970
4992
|
<afterbreak>J[aä]n|Febr?|Mär|Apr|Mai|Ju[nl]|Aug|Sept?|Okt|Nov|Dez</afterbreak>
|
4971
4993
|
</rule>
|
4972
4994
|
<rule break="no">
|
4973
|
-
<beforebreak>(Jan|Jän|Febr?|Mär|Apr|Mai|Ju[nl]|Aug|Sept?|Okt|Nov|Dez)\.[\u00A0\s]</beforebreak>
|
4995
|
+
<beforebreak>(Jan|Jän|Febr?|Mär|Apr|Mai|Ju[nl]|Aug|Sept?|Okt|Nov|Dez)\.[\u00A0\s]{1,2}</beforebreak>
|
4974
4996
|
<afterbreak>\d\d(\d\d)?</afterbreak>
|
4975
4997
|
</rule>
|
4976
4998
|
<!-- ähnliche Fälle außerhalb der Monatsnamen -->
|
4977
4999
|
<rule break="no">
|
4978
|
-
<beforebreak>\d+\.[\u00A0\s]</beforebreak>
|
5000
|
+
<beforebreak>\d+\.[\u00A0\s]{1,2}</beforebreak>
|
4979
5001
|
<afterbreak>Amtsperiode|Breitengrads?|Breitengrades|Jubiläum|Jhd?|Jhdts?|Konferenz|(Jahres|Partei)(-K|k)onferenz|Längengrade?s?|Tags?|Tages|(Jahres|Spiel|Partei|Geburts)tag|(Jahres|Spiel|Partei|Geburts)tages|(Jahres|Spiel|Partei|Geburts)tags|Jahrhunderts?|Jahrtausend|Platz|Platzes|Lebensjahrs?|Lebensjahres|Lochs?|Loches|Grads|Grades|Obergeschoss|Stock(werk)?s?|Etage|Klasse|Runde|Bezirk|Etappe|Staffel|Sinfonie</afterbreak>
|
4980
5002
|
</rule>
|
4981
5003
|
<!-- English abbreviations - but these work globally for all languages -->
|
4982
5004
|
<rule break="no">
|
4983
|
-
<beforebreak>\b(Mrs?|No|pp|St|no|Sr|Jr|Bros|etc|[Bb]tw|vs|esp|[Ff]ig|Jan|Feb|Mar|Apr|Ju[nl]|Aug|Sept?|O[ck]t|Nov|Dec|PhD|BSc|BEng|BComp|BArch|al|cf|Inc|Ms|MEng|MSc|MComp|Gen|Sen|Prof|Corp|Co|co|Ltd|Buchst)\.[\u00A0\s]</beforebreak>
|
5005
|
+
<beforebreak>\b(Mrs?|No|pp|St|no|Sr|Jr|[Ss]ek|Bros|etc|[Bb]tw|vs|esp|[Ff]ig|Jan|Feb|Mar|Apr|Ju[nl]|Aug|Sept?|O[ck]t|Nov|Dec|PhD|BSc|BEng|BComp|BArch|al|cf|Inc|Ms|MEng|MSc|MComp|Gen|Sen|Prof|Corp|Co|co|Ltd|Buchst)\.[\u00A0\s]{1,2}</beforebreak>
|
4984
5006
|
<afterbreak></afterbreak>
|
4985
5007
|
</rule>
|
4986
5008
|
<!-- Latin abbreviations - but these work globally for all languages -->
|
4987
5009
|
<rule break="no">
|
4988
|
-
<beforebreak>\b(spp?)\.[\u00A0\s]</beforebreak>
|
5010
|
+
<beforebreak>\b(spp?)\.[\u00A0\s]{1,2}</beforebreak>
|
4989
5011
|
<afterbreak></afterbreak>
|
4990
5012
|
</rule>
|
4991
5013
|
<!-- German abbreviations -->
|
4992
5014
|
<rule break="no">
|
4993
|
-
<beforebreak>\b(ggü|Mag|mtl|versch|d|Übers|usw|
|
5015
|
+
<beforebreak>\b(betr|Geb|Stk|ggü|Mag|mtl|versch|[Ss]tellv|d|Übers|usw|[Bb]zw|Ab[hkst]|[Aa]bzü?gl|[Ll]tda|[Ee]inschl|[Vv]mtl|Ev|bezgl|Abzw|[Vv]sl|ahd|Akk|aktual|allg|alltagsspr|altdt|alttest|amerikan|Anh|Ank|Anm|Art|[Aa]utom|Auftragsnr|Az|Bat|bayr|Bde?|bearb|Bed|Bem|bes|bez|Bez|Bhf|bspw|btto|bw|Dtl|Dez)\.[\u00A0\s]{1,2}</beforebreak>
|
4994
5016
|
<afterbreak></afterbreak>
|
4995
5017
|
</rule>
|
4996
5018
|
<rule break="no">
|
4997
|
-
<beforebreak>\b(cts?|
|
5019
|
+
<beforebreak>\b(cts?|[Cc]a|chem|chin|Chr|cresc|[Dd]at|desgl|ders|dgl|Dipl|Dir?|Doz?|durchg|durchges|Dr|dt|ebd|Ed|[Ee]igt?l|akt|[Ee]ngl|Erg|al|et[cw]|Etw|ev|[Ee]vtl?|exkl|Expl|Exz)\.[\u00A0\s]{1,2}</beforebreak>
|
4998
5020
|
<afterbreak></afterbreak>
|
4999
5021
|
</rule>
|
5000
5022
|
<rule break="no">
|
5001
|
-
<beforebreak>\bDipl\.-[A-Z][a-z]{2,4}\.[\u00A0\s]</beforebreak>
|
5023
|
+
<beforebreak>\bDipl\.-[A-Z][a-z]{2,4}\.[\u00A0\s]{1,2}</beforebreak>
|
5002
5024
|
<afterbreak></afterbreak>
|
5003
5025
|
</rule>
|
5004
5026
|
<rule break="no">
|
5005
|
-
<beforebreak>\b
|
5027
|
+
<beforebreak>\b[BM]\.[\u00A0\s]Sc\.[\u00A0\s]</beforebreak>
|
5028
|
+
<afterbreak>\p{Ll}</afterbreak>
|
5029
|
+
</rule>
|
5030
|
+
<rule break="no">
|
5031
|
+
<beforebreak>\b(ff|Fa|fachspr|fam|fem|Fem|Fr|franz|frz?|[Aa]ltfranz|frdl|Frl|Fut|Gd|gebr?|Gebr|geh|geleg|gen|Gen|germ|gesch|ges|get|ggf|Ggf|Ggs|ggT|Gr|[Gg]rds|griech)\.[\u00A0\s]{1,2}</beforebreak>
|
5006
5032
|
<afterbreak></afterbreak>
|
5007
5033
|
</rule>
|
5008
5034
|
<rule break="no">
|
5009
|
-
<beforebreak>\b(hebr|hg|hl|Hrsg|Hg|hist|hochd|hochspr|Hptst|Hr|hrsg|Allg|IdNr|ill|inkl|incl|Ind|Inf|Ing|ital|Tr|jap|Jb|Jg|Jhd?|Jhdts?|jmd[mns]?|jur|Kap|kart|kath|kfm|kaufm|Kfm|kgl|Kl|Konj|königl|Krs?|Kto)\.[\u00A0\s]</beforebreak>
|
5035
|
+
<beforebreak>\b(hebr|hg|hl|Hrsg|Hg|hist|hochd|hochspr|Hptst|Hr|hrsg|Allg|IdNr|ill|inkl|incl|Ind|Inf|Ing|ital|Tr|jap|Jb|Jg|Jhd?|Jhdts?|jmd[mns]?|jur|Kap|kart|kath|kfm|kaufm|Kfm|kgl|Kl|Konj|königl|Krs?|Kto)\.[\u00A0\s]{1,2}</beforebreak>
|
5010
5036
|
<afterbreak></afterbreak>
|
5011
5037
|
</rule>
|
5012
5038
|
<rule break="no">
|
5013
|
-
<beforebreak>\b(lat|lfd|Lit|lt|Lz|Mask|mask|max|Mrd|mdal|me[dt]|phil|mhd|Mio?|mind?|Mo|mod|nachm|nördlBr|neutr|Nhd|Nom|Nrn?|Num|Obj|od|dgl|offz)\.[\u00A0\s]</beforebreak>
|
5039
|
+
<beforebreak>\b([A-ZÖÄÜ][a-zöäüß]+nr|tel|[Gg]em|Pat|prov|Betr|lat|lfd|Lit|lt|Lz|Mask|mask|max|Mrd|mdal|me[dt]|phil|mhd|Mio?|mio|mind?|Mo|mod|nachm|nördlBr|neutr|Nhd|Nom|Nrn?|Num|Obj|od|dgl|offz)\.[\u00A0\s]{1,2}</beforebreak>
|
5014
5040
|
<afterbreak></afterbreak>
|
5015
5041
|
</rule>
|
5016
5042
|
<rule break="no">
|
5017
|
-
<beforebreak>\b(Part|Per[fs]|Pfd|Pl(ur)?|pl|Plusq|Pos|pp|Prä[ps]|Prät|Pro[vf]|rd|reg|resp|Rhld|rit|Sa|südl|Br|se[ln]|Sept|Sing|sign|So|sog|Sp|Std?|stacc|Str|stud|Subst|sva|svw|sZ)\.[\u00A0\s]</beforebreak>
|
5043
|
+
<beforebreak>\b(Part|Per[fs]|Pfd|Pl(ur)?|pl|Plusq|Pos|pp|Prä[ps]|Prät|Pro[vf]|rd|reg|resp|Rhld|rit|Sa|südl|Br|se[ln]|Sept|Sing|sign|So|sog|Sp|Std?|stacc|Str|stud|Subst|sva|svw|sZ)\.[\u00A0\s]{1,2}</beforebreak>
|
5018
5044
|
<afterbreak></afterbreak>
|
5019
5045
|
</rule>
|
5020
5046
|
<rule break="no">
|
5021
|
-
<beforebreak
|
5047
|
+
<beforebreak>([A-ZÖÄÜ][a-zöäüß]+str)\.[\u00A0\s]{1,2}</beforebreak>
|
5048
|
+
<afterbreak>\p{Ll}</afterbreak>
|
5049
|
+
</rule>
|
5050
|
+
<rule break="no">
|
5051
|
+
<beforebreak>\b(Tel|teilw|Temp|trans|Tsd|übertr|übl|ff|überarb|ugs|univ|unveränd|urspr|USt|UST|USt\-IdNr|[Aa][bn]schl|sw|kl|[Gg]r|vgl|vll|Vll|vlt|Vlt|vllt|Vllt|Vgl|Vol|vollst|vorm|Vp|Vs|vs|wesentl|wg|Whg|Hd|Ztr|zus|Zus|zzt?|zzgl|zB|zb|Zz|Zt|zw|Min|Bzgl|bzgl|bezügl|Frhr|ggfs|insb|autom|Mw[sS]t)\.[\u00A0\s]{1,2}</beforebreak>
|
5022
5052
|
<afterbreak></afterbreak>
|
5023
5053
|
</rule>
|
5024
5054
|
<!-- Break rules -->
|
@@ -5031,7 +5061,7 @@
|
|
5031
5061
|
<afterbreak>\p{Lu}[^\p{Lu}]</afterbreak>
|
5032
5062
|
</rule>
|
5033
5063
|
<rule break="yes">
|
5034
|
-
<beforebreak>[\u00A0\s]\p{L}[\.!?…][\u00A0\s]</beforebreak>
|
5064
|
+
<beforebreak>[\u00A0\s]\p{L}[\.!?…][\u00A0\s]{1,2}</beforebreak>
|
5035
5065
|
<afterbreak>\p{Lu}\p{Ll}</afterbreak>
|
5036
5066
|
</rule>
|
5037
5067
|
<!-- z.B. 2 sentences: “Liebst du mich?” “Ja!” -->
|
@@ -5250,6 +5280,18 @@
|
|
5250
5280
|
<beforebreak>[\s\u00A0]</beforebreak>
|
5251
5281
|
<afterbreak>[»”’"'›]</afterbreak>
|
5252
5282
|
</rule>
|
5283
|
+
<rule break="no">
|
5284
|
+
<beforebreak>ambass|cuil|p|liv|assoc|bibl|ENREG|al|phot|circ|concl|deb|dest|dupl|éd|écon|incl?|ital|jur|juris|jurispr|larg|lex|législ|longit|(?-i)RR|(?-i)ÉÉm|(?-i)EExc|métr|méd|néol|obs|plur|préf|prog|publ|trib|trim|suiv|(?-i)LL|env|élem|ér|ét|hon|hypexp|conj|coop|ch|alph|anglic|app|pr|collab|paragr|sect|para|commiss|coord|dép|dir|gér|secour|sén|gén|abrév|adj|adr|anon|append|av|auj|bibl|bibliogr|bdc|boul|bull|bur|caar|cat|cell|chap|cir|compl|cf|corres|dest|dict|div|dom|dr|édif|éd|électr|élém|encycl|fig|fl|graph|hist|hyp|ill|imm|imp|impr|incl|inc|ind|in[gtvf]|jur|lat|litt|liq|loc|liv|livr|méd|mém|pl|réd|rel|sc|suiv|sup|suppl|trad|univ|mus|pharm|soc|pol|compt|urb|act|confect|exp|réal|prov|introd|inv|tial|enr|ép|équiv|esp|étym|excl|exc|ap|arr|arch|adv|al|anc|angl|ann|gest|gouv|prés|rect|représ|resp|scrut|vol|coll|réf|id|sqq?|janv|fév|avr|juill|oct|nov|déc|admin</beforebreak>
|
5285
|
+
<afterbreak>\p{Ll}.*</afterbreak>
|
5286
|
+
</rule>
|
5287
|
+
<rule break="no">
|
5288
|
+
<beforebreak>\p{Ll}.*</beforebreak>
|
5289
|
+
<afterbreak>ambass|cuil|p|liv|assoc|bibl|oct|déc|jan|fév|avr|juil|sept|nov|ENREG|al|circ|concl|deb|dest|dupl|éd|écon|incl?|ital|jur|juris|jurispr|larg|lex|législ|longit|(?-i)RR|(?-i)ÉÉm|(?-i)EExc|métr|méd|néol|obs|plur|préf|prog|publ|trib|trim|suiv|(?-i)LL|env|élem|ér|ét|hon|hypexp|conj|coop|ch|alph|anglic|app|pr|collab|paragr|sect|para|commiss|coord|dép|dir|gér|secour|sén|gén|abrév|adj|adr|anon|append|av|auj|bibl|bibliogr|bdc|boul|bull|bur|caar|cat|cell|chap|cir|compl|cf|corres|dest|dict|div|dom|dr|édif|éd|électr|élém|encycl|fig|fl|graph|hist|hyp|ill|imm|imp|impr|incl|inc|ind|in[gtvf]|jur|lat|litt|liq|loc|liv|livr|méd|mém|pl|réd|rel|sc|suiv|sup|suppl|trad|univ|mus|pharm|soc|pol|compt|urb|act|confect|exp|réal|prov|introd|inv|tial|enr|ép|équiv|esp|étym|excl|exc|ap|arr|arch|adv|al|anc|angl|ann|gest|gouv|prés|rect|représ|resp|scrut|vol|coll|réf|id|sqq?|janv|fév|avr|juill|oct|nov|déc|admin</afterbreak>
|
5290
|
+
</rule>
|
5291
|
+
<rule break="no">
|
5292
|
+
<beforebreak>.*°C</beforebreak>
|
5293
|
+
<afterbreak>de</afterbreak>
|
5294
|
+
</rule>
|
5253
5295
|
<rule break="yes">
|
5254
5296
|
<beforebreak>[\.!?][\s\u00A0][»”’"'›][\s\u00A0]</beforebreak>
|
5255
5297
|
<afterbreak>[«“‘‹"'\p{Lu}]</afterbreak>
|
@@ -5258,6 +5300,11 @@
|
|
5258
5300
|
<beforebreak>Yahoo![\s\u00A0]</beforebreak>
|
5259
5301
|
<afterbreak>\p{Ll}</afterbreak>
|
5260
5302
|
</rule>
|
5303
|
+
<!-- !? + lowercase -->
|
5304
|
+
<rule break="no">
|
5305
|
+
<beforebreak>(\!|\?)[\s\u00A0]</beforebreak>
|
5306
|
+
<afterbreak>\p{Ll}</afterbreak>
|
5307
|
+
</rule>
|
5261
5308
|
<rule break="yes">
|
5262
5309
|
<beforebreak>\.\[\d+\][\s\u00A0]</beforebreak>
|
5263
5310
|
<afterbreak></afterbreak>
|
@@ -5599,7 +5646,7 @@
|
|
5599
5646
|
</rule>
|
5600
5647
|
<rule break="no">
|
5601
5648
|
<!-- unfortunately \b ignores \u0301 -->
|
5602
|
-
<beforebreak>\b
|
5649
|
+
<beforebreak>\b[сС]т\.[\h\v]</beforebreak>
|
5603
5650
|
<afterbreak>[\h]*(?!([АВУОІЄ]|На|Але|Так?)[\h\v])</afterbreak>
|
5604
5651
|
</rule>
|
5605
5652
|
<rule break="no">
|
@@ -5631,7 +5678,7 @@
|
|
5631
5678
|
</rule>
|
5632
5679
|
<!-- abbreviation with proper noun: проф. Грицько, о. Лісове -->
|
5633
5680
|
<rule break="no">
|
5634
|
-
<beforebreak>\b([Аа]кад|[Пп]роф|[Дд]оц|[Аа]сист|[Рр]еж|[Аа]рх|[Сс]
|
5681
|
+
<beforebreak>\b(ап|[Аа]кад|[Пп]роф|[Дд]оц|[Аа]сист|[Рр]еж|[Аа]рх|[Сс]вв?|о|оз|ім|інж|упоряд|чл\.-кор|[Пп]реп)\.[\h\v]*</beforebreak>
|
5635
5682
|
<afterbreak>[\h\v]*[А-ЯІЇЄҐA-Z]</afterbreak>
|
5636
5683
|
</rule>
|
5637
5684
|
<rule break="no">
|
@@ -5979,7 +6026,7 @@
|
|
5979
6026
|
</rule>
|
5980
6027
|
<!-- Abbreviations that cannot finish sentences-->
|
5981
6028
|
<rule break="no">
|
5982
|
-
<beforebreak>\b(a|Ab|abrev|absol|acad|Açor|A\. ?D|add|adj|adv|advers|Aeron|afér|Agric|Álg|aprox|[Aa]rts?|Artilh|auxil|av|Av)\.\s?</beforebreak>
|
6029
|
+
<beforebreak>\b(a|Ab|abr|abrev|absol|acad|Açor|A\. ?D|add|adj|adv|advers|Aeron|afér|Agric|ago|Álg|aprox|[Aa]rts?|Artilh|auxil|av|Av)\.\s?</beforebreak>
|
5983
6030
|
<afterbreak></afterbreak>
|
5984
6031
|
</rule>
|
5985
6032
|
<rule break="no">
|
@@ -5991,7 +6038,7 @@
|
|
5991
6038
|
<afterbreak></afterbreak>
|
5992
6039
|
</rule>
|
5993
6040
|
<rule break="no">
|
5994
|
-
<beforebreak>\b(D|def|dem|deprec|deriv|det|disj|[Dd]ra?s?)\.\s?</beforebreak>
|
6041
|
+
<beforebreak>\b(D|def|dem|deprec|deriv|det|dez|disj|[Dd]ra?s?)\.\s?</beforebreak>
|
5995
6042
|
<afterbreak></afterbreak>
|
5996
6043
|
</rule>
|
5997
6044
|
<rule break="no">
|
@@ -6003,7 +6050,7 @@
|
|
6003
6050
|
<afterbreak>\p{Ll}</afterbreak>
|
6004
6051
|
</rule>
|
6005
6052
|
<rule break="no">
|
6006
|
-
<beforebreak>\b(f|fam|Farm|fem|fig|fin|fl|fr|frac)\.\s?</beforebreak>
|
6053
|
+
<beforebreak>\b(f|fam|Farm|fem|fev|fig|fin|fl|fr|frac)\.\s?</beforebreak>
|
6007
6054
|
<afterbreak></afterbreak>
|
6008
6055
|
</rule>
|
6009
6056
|
<rule break="no">
|
@@ -6019,7 +6066,7 @@
|
|
6019
6066
|
<afterbreak></afterbreak>
|
6020
6067
|
</rule>
|
6021
6068
|
<rule break="no">
|
6022
|
-
<beforebreak>\b(Jorn|Jur)\.\s?</beforebreak>
|
6069
|
+
<beforebreak>\b(jan|jul|jun|Jorn|Jur)\.\s?</beforebreak>
|
6023
6070
|
<afterbreak></afterbreak>
|
6024
6071
|
</rule>
|
6025
6072
|
<rule break="no">
|
@@ -6027,15 +6074,15 @@
|
|
6027
6074
|
<afterbreak></afterbreak>
|
6028
6075
|
</rule>
|
6029
6076
|
<rule break="no">
|
6030
|
-
<beforebreak>\b(m|masc|Mat|máx|Mecân|[Mm]ed|Mil|mín|mult|Mús)\.\s?</beforebreak>
|
6077
|
+
<beforebreak>\b(m|mai|mar|masc|Mat|máx|Mecân|[Mm]ed|Mil|mín|mult|Mús)\.\s?</beforebreak>
|
6031
6078
|
<afterbreak></afterbreak>
|
6032
6079
|
</rule>
|
6033
6080
|
<rule break="no">
|
6034
|
-
<beforebreak>\b(n|N|Náut|N.B|neg|neol|num|núm)\.\s?</beforebreak>
|
6081
|
+
<beforebreak>\b(n|N|Náut|N.B|neg|neol|nov|num|núm)\.\s?</beforebreak>
|
6035
6082
|
<afterbreak></afterbreak>
|
6036
6083
|
</rule>
|
6037
6084
|
<rule break="no">
|
6038
|
-
<beforebreak>\b(ord)\.\s?</beforebreak>
|
6085
|
+
<beforebreak>\b(ord|out)\.\s?</beforebreak>
|
6039
6086
|
<afterbreak></afterbreak>
|
6040
6087
|
</rule>
|
6041
6088
|
<rule break="no">
|
@@ -6051,7 +6098,7 @@
|
|
6051
6098
|
<afterbreak></afterbreak>
|
6052
6099
|
</rule>
|
6053
6100
|
<rule break="no">
|
6054
|
-
<beforebreak>\b(S|S.A|símb|S. ?M|[Ss]ra?s?|[Ss]rta|suf|superl)\.\s?</beforebreak>
|
6101
|
+
<beforebreak>\b(S|S.A|set|símb|S. ?M|[Ss]ra?s?|[Ss]rta|suf|superl)\.\s?</beforebreak>
|
6055
6102
|
<afterbreak></afterbreak>
|
6056
6103
|
</rule>
|
6057
6104
|
<rule break="no">
|
@@ -6073,7 +6120,7 @@
|
|
6073
6120
|
<!-- s. XIX; s.IX; sec. XX; séc. XX -->
|
6074
6121
|
<rule break="no">
|
6075
6122
|
<beforebreak>\bs([eé]c)?\.\s?</beforebreak>
|
6076
|
-
<afterbreak>[
|
6123
|
+
<afterbreak>[IVXDMCL]+</afterbreak>
|
6077
6124
|
</rule>
|
6078
6125
|
<!-- English abbreviations - but these work globally for all languages -->
|
6079
6126
|
<rule break="no">
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: srx-languagetool
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aaron Madlon-Kay
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-09-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: srx
|