srx-languagetool 0.11.0 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +4 -0
- data/Gemfile.lock +1 -1
- data/lib/srx/languagetool/version.rb +1 -1
- data/lib/srx/segment.srx +13 -5
- metadata +2 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: b095dc51541d60d446b063cdff16940d7840e7d51891e1f5201117d42089b0da
|
|
4
|
+
data.tar.gz: 5376dce7bda3c17a142477a3b19f629e65527c4fb96d540bfa1a853f0ed394f8
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: eb9f0aac7af1e2345842bdaf21917556429d8c887d356430c4c707b18b82d0a54cc46a5b6344247de27113f3ad0730f0d3fa37012f4a7c5c7cf4987dcedcf271
|
|
7
|
+
data.tar.gz: 3370f8761982aa574c862f32d9a2702451cb992fa35dd0f842a4116224d448c7a2dd247be0ac2c61cace69f72364963c9072bd72e3fc2c46a3bc948996c8fc61
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
data/lib/srx/segment.srx
CHANGED
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
<formathandle type="end" include="yes"></formathandle>
|
|
6
6
|
<formathandle type="isolated" include="no"></formathandle>
|
|
7
7
|
<okpsrx:options oneSegmentIncludesAll="no" trimLeadingWhitespaces="no" trimTrailingWhitespaces="no" useJavaRegex="yes" useIcu4JBreakRules="no" treatIsolatedCodesAsWhitespace="no"></okpsrx:options>
|
|
8
|
-
<okpsrx:sample language="nl" useMappedRules="yes">
|
|
8
|
+
<okpsrx:sample language="nl" useMappedRules="yes">Wat God buiten Christus is. 2.</okpsrx:sample>
|
|
9
9
|
<okpsrx:rangeRule></okpsrx:rangeRule>
|
|
10
10
|
</header>
|
|
11
11
|
<body>
|
|
@@ -1164,7 +1164,7 @@
|
|
|
1164
1164
|
<afterbreak>D\.?</afterbreak>
|
|
1165
1165
|
</rule>
|
|
1166
1166
|
<rule break="no">
|
|
1167
|
-
<beforebreak>\b([Aa]vg|[Ee]d|pp|[Vv]iz|i\.?[\s\u00A0]*e|[Vvol]|[Rr]col|maj|Lt|[Ff]ig|[Ff]igs|[Vv]iz|[Vv]ols|[Aa]pprox|[Ii]ncl?|[Aa]cc|Pres|[Cc]orp|[Ee]x|[Cc]onn|[Dd]ept|[Ll]tda|[Mm]in|[Mm]ax|[Gg]ovt|[Rr]etd|Ing|lb|lbf|ft|c\.?[\s\u00A0]*f|vs|dia|lbs|\d+-(:?oz|kc|in|h[rp]|ml)|M?sec)\.[\s\u00A0]</beforebreak>
|
|
1167
|
+
<beforebreak>\b([Aa]vg|[Ee]d|pp|[Vv]iz|i\.?[\s\u00A0]*e|[Vvol]|[Rr]col|maj|Lt|[Ff]ig|[Ff]igs|[Vv]iz|[Vv]ols|[Aa]pprox|[Aa]cq|[Ii]ncl?|[Aa]cc|Pres|[Cc]orp|[Ee]x|[Cc]onn|[Dd]ept|[Ll]tda|[Mm]in|[Mm]ax|[Gg]ovt|[Rr]etd|Ing|lb|lbf|ft|c\.?[\s\u00A0]*f|vs|dia|lbs|\d+-(:?oz|kc|in|h[rp]|ml)|M?sec)\.[\s\u00A0]</beforebreak>
|
|
1168
1168
|
<afterbreak>[^\p{Lu}]|I</afterbreak>
|
|
1169
1169
|
</rule>
|
|
1170
1170
|
<rule break="no">
|
|
@@ -1541,6 +1541,10 @@
|
|
|
1541
1541
|
</languagerule>
|
|
1542
1542
|
<languagerule languagerulename="Dutch">
|
|
1543
1543
|
<rule break="yes">
|
|
1544
|
+
<beforebreak>[ ]is[.][ ]</beforebreak>
|
|
1545
|
+
<afterbreak>[0-9]\.($|[ ])</afterbreak>
|
|
1546
|
+
</rule>
|
|
1547
|
+
<rule break="yes">
|
|
1544
1548
|
<beforebreak>(^| )O\.\s</beforebreak>
|
|
1545
1549
|
<afterbreak>([A-Z][a-z]{1,3}[ ,:;.!?]|Indië|Wanneer|Kunnen|Sorry)</afterbreak>
|
|
1546
1550
|
</rule>
|
|
@@ -1661,6 +1665,10 @@
|
|
|
1661
1665
|
<beforebreak>°C\.\s</beforebreak>
|
|
1662
1666
|
<afterbreak>[A-Z][a-z]</afterbreak>
|
|
1663
1667
|
</rule>
|
|
1668
|
+
<rule break="yes">
|
|
1669
|
+
<beforebreak>[A-Z]&[A-Z]\.\s</beforebreak>
|
|
1670
|
+
<afterbreak>[A-Z][a-z]</afterbreak>
|
|
1671
|
+
</rule>
|
|
1664
1672
|
<rule break="no">
|
|
1665
1673
|
<beforebreak>\b([A-Z]|Adr|Chr|Fr|Fred|IJ|Jac|Joh|Ph|St|Th|Tj|v|v\.(\s)?d)\.(\s)?</beforebreak>
|
|
1666
1674
|
<afterbreak>\p{Lu}</afterbreak>
|
|
@@ -4704,7 +4712,7 @@
|
|
|
4704
4712
|
<afterbreak>[XIV\d]+\b</afterbreak>
|
|
4705
4713
|
</rule>
|
|
4706
4714
|
<rule break="no">
|
|
4707
|
-
<beforebreak>\b([Ee]ds?|[Cc]oords?|\d+(r|n|t|è|é|a|rs|ns|es)|seg|masc|fem|sing|pl|adj|adv|g|kg|m|km|cm|ha|u|h|hrs|s|ss|alt|cant|cast|cert|com|dir|
|
|
4715
|
+
<beforebreak>\b(Ltd|[Ee]ds?|[Cc]oords?|\d+(r|n|t|è|é|a|rs|ns|es)|seg|masc|fem|sing|pl|adj|adv|g|kg|m|km|cm|ha|u|h|hrs|s|ss|alt|cant|cast|cert|com|dir|grs?|nom|parc|pres|set|Sr|Jr|Admón|Adm|Inc|Co|Hnos|Vda|[VU]d[s]?)\.[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
|
|
4708
4716
|
<afterbreak>[\-¡¿«»"'\u2018\u201C\p{Ps}\u2012\u2013\u2014\u2015\u2053]*\p{Ll}</afterbreak>
|
|
4709
4717
|
</rule>
|
|
4710
4718
|
<!-- Any word in acronyms like U.S.A.F or F. B. I. or C. or c.s.p. or p. e. -->
|
|
@@ -4854,7 +4862,7 @@
|
|
|
4854
4862
|
</rule>
|
|
4855
4863
|
<!-- Abbreviations that can finish sentences -->
|
|
4856
4864
|
<rule break="no">
|
|
4857
|
-
<beforebreak>\b([Ee]ds?|[Cc]oords
|
|
4865
|
+
<beforebreak>\b(Ltd|[Ee]ds?|[Cc]oords?|\d+(r|n|t|è|é|a|rs|ns|es)|seg|masc|fem|sing|pl|adj|adv|g|kg|m|km|cm|ha|u|h|hrs|s|ss|alt|cant|cast|cert|com|dir|grs?|nom|parc|pres|set|Sr|Jr|Admón|Adm|Inc|Co|Hnos|Vda|[VU]d[s]?)\.[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
|
|
4858
4866
|
<afterbreak>[\-¡¿«»"'\u2018\u201C\p{Ps}\u2012\u2013\u2014\u2015\u2053]*\p{Ll}</afterbreak>
|
|
4859
4867
|
</rule>
|
|
4860
4868
|
<!-- Any word in acronyms like U.S.A.F or F. B. I. or C. or c.s.p. or p. e. -->
|
|
@@ -5058,7 +5066,7 @@
|
|
|
5058
5066
|
</rule>
|
|
5059
5067
|
<!-- German abbreviations -->
|
|
5060
5068
|
<rule break="no">
|
|
5061
|
-
<beforebreak>\b(betr|Geb|Stk|ggü|Mag|mtl|[Pp]arl|Bsp|versch|[Dd]iesbzgl|[Dd]bzgl[Ss]tellv|d|Übers|usw|[Bb]zw|Ab[hkst]|[Aa]bzü?gl|\d+-tlg|tlg|ggfls|[Ll]tda|[Ee]inschl|[Vv]mtl|Ev|bezgl|lit|Abzw|[Vv]sl|ahd|Akk|aktual|[Öö]ffentl|prof|allg|alltagsspr|altdt|alttest|amerikan|Anh|Ank|Anm|Art|[Aa]utom|Auftragsnr|Az|Bat|bayr|Bde?|bearb|Bed|Bem|bes|bez|Bez|Bhf|Blvd|
|
|
5069
|
+
<beforebreak>\b(betr|Geb|Stk|ggü|Mag|mtl|[Pp]arl|Bsp|versch|[Dd]iesbzgl|[Dd]bzgl[Ss]tellv|d|Übers|usw|[Bb]zw|Ab[hkst]|[Aa]bzü?gl|\d+-tlg|tlg|ggfls|[Ll]tda|[Ee]inschl|[Vv]mtl|Ev|bezgl|lit|Abzw|[Vv]sl|ahd|Akk|aktual|[Öö]ffentl|prof|allg|alltagsspr|altdt|alttest|amerikan|Anh|Ank|Anm|Art|[Aa]utom|Auftragsnr|Az|Bat|bayr|Bde?|bearb|Bed|Bem|bes|bez|Bez|Bhf|Blvd|[Bb]spw|btto|bw|Dtl|[Gg]esetzl|Dez|[Jj]gdfr|[Ee]ff)\.[\u00A0\s]{1,2}</beforebreak>
|
|
5062
5070
|
<afterbreak></afterbreak>
|
|
5063
5071
|
</rule>
|
|
5064
5072
|
<rule break="no">
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: srx-languagetool
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.12.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Aaron Madlon-Kay
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2023-07
|
|
11
|
+
date: 2023-10-07 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: srx
|