srx-languagetool 0.6.0 → 0.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/main.yml +2 -2
- data/.ruby-version +1 -1
- data/CHANGELOG.md +5 -1
- data/Gemfile.lock +16 -14
- data/lib/srx/languagetool/version.rb +1 -1
- data/lib/srx/segment.srx +26 -9
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c277febbbdec765f5eeba65cdc43dd661ddf807a53f2e2858f0e321eccdf3459
|
4
|
+
data.tar.gz: 34aa30e138bddfacd30ed8f9a48f45d4e91afa5c4e4a789d3208860c1bbd608e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 62a56ee4d2cb0ba5fe16181681561a829f7f59f84155753d896fa6dc6153bd238ed9de8337cea2e75ac6c83e94381d84a0ede80c2bbcff1648fe97c57f3d5fed
|
7
|
+
data.tar.gz: 66d09127f9eb76307bd38938c619a6099c0ff941bcd3c33b51e4c130c0d4373ea12cdb080bec1aab783e4070c0d5d70fa2e97a4d64d1c394390316630012e0b3
|
data/.github/workflows/main.yml
CHANGED
@@ -10,10 +10,10 @@ jobs:
|
|
10
10
|
- name: Set up Ruby
|
11
11
|
uses: ruby/setup-ruby@v1
|
12
12
|
with:
|
13
|
-
ruby-version: 2.7.
|
13
|
+
ruby-version: 2.7.6
|
14
14
|
- name: Install
|
15
15
|
run: |
|
16
|
-
gem install bundler -v 2.3.
|
16
|
+
gem install bundler -v 2.3.17
|
17
17
|
bundle install
|
18
18
|
- name: Type check
|
19
19
|
run: bundle exec solargraph typecheck --level typed
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
2.7.
|
1
|
+
2.7.6
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
srx-languagetool (0.
|
4
|
+
srx-languagetool (0.7.0)
|
5
5
|
srx (< 1.0)
|
6
6
|
|
7
7
|
GEM
|
@@ -14,20 +14,21 @@ GEM
|
|
14
14
|
diff-lcs (1.5.0)
|
15
15
|
e2mmap (0.1.0)
|
16
16
|
jaro_winkler (1.5.4)
|
17
|
-
|
17
|
+
json (2.6.2)
|
18
|
+
kramdown (2.4.0)
|
18
19
|
rexml
|
19
20
|
kramdown-parser-gfm (1.1.0)
|
20
21
|
kramdown (~> 2.0)
|
21
|
-
minitest (5.
|
22
|
-
nokogiri (1.13.
|
22
|
+
minitest (5.16.2)
|
23
|
+
nokogiri (1.13.6-x86_64-darwin)
|
23
24
|
racc (~> 1.4)
|
24
25
|
parallel (1.22.1)
|
25
|
-
parser (3.1.
|
26
|
+
parser (3.1.2.0)
|
26
27
|
ast (~> 2.4.1)
|
27
28
|
racc (1.6.0)
|
28
29
|
rainbow (3.1.1)
|
29
30
|
rake (13.0.6)
|
30
|
-
regexp_parser (2.
|
31
|
+
regexp_parser (2.5.0)
|
31
32
|
reverse_markdown (2.1.1)
|
32
33
|
nokogiri
|
33
34
|
rexml (3.2.5)
|
@@ -35,19 +36,20 @@ GEM
|
|
35
36
|
diff-lcs (>= 1.2.0, < 2.0)
|
36
37
|
rspec-support (~> 3.11.0)
|
37
38
|
rspec-support (3.11.0)
|
38
|
-
rubocop (1.
|
39
|
+
rubocop (1.31.1)
|
40
|
+
json (~> 2.3)
|
39
41
|
parallel (~> 1.10)
|
40
42
|
parser (>= 3.1.0.0)
|
41
43
|
rainbow (>= 2.2.2, < 4.0)
|
42
44
|
regexp_parser (>= 1.8, < 3.0)
|
43
|
-
rexml
|
44
|
-
rubocop-ast (>= 1.
|
45
|
+
rexml (>= 3.2.5, < 4.0)
|
46
|
+
rubocop-ast (>= 1.18.0, < 2.0)
|
45
47
|
ruby-progressbar (~> 1.7)
|
46
48
|
unicode-display_width (>= 1.4.0, < 3.0)
|
47
|
-
rubocop-ast (1.
|
49
|
+
rubocop-ast (1.18.0)
|
48
50
|
parser (>= 3.1.1.0)
|
49
51
|
ruby-progressbar (1.11.0)
|
50
|
-
solargraph (0.
|
52
|
+
solargraph (0.45.0)
|
51
53
|
backport (~> 1.2)
|
52
54
|
benchmark
|
53
55
|
bundler (>= 1.17.2)
|
@@ -66,9 +68,9 @@ GEM
|
|
66
68
|
nokogiri (~> 1.11)
|
67
69
|
thor (1.2.1)
|
68
70
|
tilt (2.0.10)
|
69
|
-
unicode-display_width (2.
|
71
|
+
unicode-display_width (2.2.0)
|
70
72
|
webrick (1.7.0)
|
71
|
-
yard (0.9.
|
73
|
+
yard (0.9.28)
|
72
74
|
webrick (~> 1.7.0)
|
73
75
|
|
74
76
|
PLATFORMS
|
@@ -85,4 +87,4 @@ DEPENDENCIES
|
|
85
87
|
srx-languagetool!
|
86
88
|
|
87
89
|
BUNDLED WITH
|
88
|
-
2.3.
|
90
|
+
2.3.17
|
data/lib/srx/segment.srx
CHANGED
@@ -1159,7 +1159,7 @@
|
|
1159
1159
|
<afterbreak>D\.?</afterbreak>
|
1160
1160
|
</rule>
|
1161
1161
|
<rule break="no"><!-- min. -->
|
1162
|
-
<beforebreak>\b([Ee]d|pp|[Vv]iz|i\.?[\s\u00A0]*e|[Vvol]|[Rr]col|maj|Lt|[Ff]ig|[Ff]igs|[Vv]iz|[Vv]ols|[Aa]pprox|[Ii]ncl?|[Aa]cc|Pres|[Cc]orp|[Ee]x|[Cc]onn|[Dd]ept|[Mm]in|[Mm]ax|[Gg]ovt|[Rr]etd|lb|lbf|ft|c\.?[\s\u00A0]*f|vs|dia|lbs|\d+-(:?oz|kc|in|h[rp]|ml)|M?sec)\.[\s\u00A0]</beforebreak>
|
1162
|
+
<beforebreak>\b([Aa]vg|[Ee]d|pp|[Vv]iz|i\.?[\s\u00A0]*e|[Vvol]|[Rr]col|maj|Lt|[Ff]ig|[Ff]igs|[Vv]iz|[Vv]ols|[Aa]pprox|[Ii]ncl?|[Aa]cc|Pres|[Cc]orp|[Ee]x|[Cc]onn|[Dd]ept|[Ll]tda|[Mm]in|[Mm]ax|[Gg]ovt|[Rr]etd|lb|lbf|ft|c\.?[\s\u00A0]*f|vs|dia|lbs|\d+-(:?oz|kc|in|h[rp]|ml)|M?sec)\.[\s\u00A0]</beforebreak>
|
1163
1163
|
<afterbreak>[^\p{Lu}]|I</afterbreak>
|
1164
1164
|
</rule>
|
1165
1165
|
<rule break="no"><!-- hr. -->
|
@@ -4773,6 +4773,10 @@
|
|
4773
4773
|
</rule>
|
4774
4774
|
<!-- Abbreviations that cannot finish sentences-->
|
4775
4775
|
<rule break="no">
|
4776
|
+
<beforebreak>\b((?iu)(en|febr|mzo|abr|my|jun|jul|ag|agt|set|sept|setbre|oct|nov|novbre|dic|dicbre))\.[\s\u00A0]</beforebreak>
|
4777
|
+
<afterbreak/>
|
4778
|
+
</rule>
|
4779
|
+
<rule break="no">
|
4776
4780
|
<beforebreak>\b(dc|(?iu)(n|[Aa]yto|Mr|C|Dr|Dra|E|Emm|Emma|Excm|Excma|Hble|I|Il·lm|Il·lma|Il·ltre|Im|Ima|Mgfc|Mgfca|Mn|R|Rev|Sr|Sra|Sres|Sras|Srs|St|Sta|a|abr|abs|acad|add|adj|adm|admdor|admdora|admtiu|admtiva|adv|ag|agl|agr|agron|agròn|aj|ajud|al|alim|amb|ampl|ant|ap|apmt|apnt|apr|aprox|apt|arm|arq|arqueol|arquit|assign|assoc|atm|aut|aux|av|b|batx|bda|bibl|bl|bnc|butll|bxs|c|calef|cartogr|cat|catedr|catol|cf|cia|cin|cint|circul|cit|climat|col|col·l|compt|cons|constr|cont|contr|conv|corp|corr|cpl|cpt|cró|ct|cte|ctra|cts|d|dept|derog|des|desp|dg|dip|disp|distr|div|dj|dl|doc|drec|ds|dt|dta|dte|dupl|dv|e|econ|ed|ef|entl|esc|esp|espf|esq|ex|exc|exp|exped|ext|f|fac|fca|febr|fig|figs|fra|gen|gov|gral|i|imp|impr|impt|inc|insp|inst|int|inv|j|jul|jur|jurispr|leg|llic|loc|ltda|làm|merc|mil·l|màx|mín|neg|nov|nre|núm|o|oct|op|p|pàg|pàgs|paq|par|pda|pg|pl|pobl|pol|ppda|ppt|pral|prev|prof|progr|prov|pta|ptes|ptge|pvt|pàg|quadr|quint|r|rbla|ref|reg|rev|secr|serv|sgt|sotsp|subsp|supl|supt|t|tel|telegr|tit|trad|trans|transcr|transf|trav|tripl|trv|tt|tèc|univ|urb|v|var|veg|venc|vid|vig|vocab|vs|x|àt|íd))\.[\s\u00A0]</beforebreak>
|
4777
4781
|
<afterbreak/>
|
4778
4782
|
</rule>
|
@@ -4890,7 +4894,11 @@
|
|
4890
4894
|
</rule>
|
4891
4895
|
<rule break="no">
|
4892
4896
|
<beforebreak>[1-3]\.[\u00A0\s]</beforebreak>
|
4893
|
-
<afterbreak>Liga|Bundesliga|Fußball(-B|b)undesliga</afterbreak>
|
4897
|
+
<afterbreak>Liga|Bundesliga|(Fußball|Handball|Basketball)(-B|b)undesliga</afterbreak>
|
4898
|
+
</rule>
|
4899
|
+
<rule break="no">
|
4900
|
+
<beforebreak>\d+\.[\u00A0\s]</beforebreak>
|
4901
|
+
<afterbreak>Klässler[sn]?</afterbreak>
|
4894
4902
|
</rule>
|
4895
4903
|
<rule break="no">
|
4896
4904
|
<beforebreak>\bP[Hh]\.</beforebreak>
|
@@ -4914,13 +4922,13 @@
|
|
4914
4922
|
</rule>
|
4915
4923
|
<!-- Don't split [.?!] when they're quoted -->
|
4916
4924
|
<rule break="no">
|
4917
|
-
<beforebreak>['"„][\.!?…]['"
|
4925
|
+
<beforebreak>['"„][\.!?…]['"“«»][\u00A0\s]</beforebreak>
|
4918
4926
|
<afterbreak></afterbreak>
|
4919
4927
|
</rule>
|
4920
4928
|
<!-- Don't break after quote unless there's a capital letter
|
4921
4929
|
e.g.: "That's right!" he said. -->
|
4922
4930
|
<rule break="no">
|
4923
|
-
<beforebreak>["'
|
4931
|
+
<beforebreak>["'“«»][\u00A0\s]</beforebreak>
|
4924
4932
|
<afterbreak>\p{Ll}</afterbreak>
|
4925
4933
|
</rule>
|
4926
4934
|
<!-- e.g. "Das ist . so." - assume one sentence. -->
|
@@ -4980,7 +4988,7 @@
|
|
4980
4988
|
</rule>
|
4981
4989
|
<!-- English abbreviations - but these work globally for all languages -->
|
4982
4990
|
<rule break="no">
|
4983
|
-
<beforebreak>\b(Mrs?|No|pp|St|no|Sr|Jr|Bros|etc|[Bb]tw|vs|esp|[Ff]ig|Jan|Feb|Mar|Apr|Ju[nl]|Aug|Sept?|O[ck]t|Nov|Dec|PhD|BSc|BEng|BComp|BArch|al|cf|Inc|Ms|MEng|MSc|MComp|Gen|Sen|Prof|Corp|Co|co|Ltd|Buchst)\.[\u00A0\s]</beforebreak>
|
4991
|
+
<beforebreak>\b(Mrs?|No|pp|St|no|Sr|Jr|[Ss]ek|Bros|etc|[Bb]tw|vs|esp|[Ff]ig|Jan|Feb|Mar|Apr|Ju[nl]|Aug|Sept?|O[ck]t|Nov|Dec|PhD|BSc|BEng|BComp|BArch|al|cf|Inc|Ms|MEng|MSc|MComp|Gen|Sen|Prof|Corp|Co|co|Ltd|Buchst)\.[\u00A0\s]</beforebreak>
|
4984
4992
|
<afterbreak></afterbreak>
|
4985
4993
|
</rule>
|
4986
4994
|
<!-- Latin abbreviations - but these work globally for all languages -->
|
@@ -4990,7 +4998,7 @@
|
|
4990
4998
|
</rule>
|
4991
4999
|
<!-- German abbreviations -->
|
4992
5000
|
<rule break="no">
|
4993
|
-
<beforebreak>\b(ggü|Mag|mtl|versch|d|Übers|usw|Bzw|bzw|Ab[hkst]|abzgl|[Ee]inschl|[Vv]mtl|Ev|bezgl|Abzw|[Vv]sl|ahd|Akk|aktual|allg|alltagsspr|altdt|alttest|amerikan|Anh|Ank|Anm|Art|autom|Auftragsnr|Az|Bat|bayr|Bde?|bearb|Bed|Bem|bes|bez|Bez|Bhf|bspw|btto|bw|Dtl|Dez)\.[\u00A0\s]</beforebreak>
|
5001
|
+
<beforebreak>\b(betr|Geb|Stk|ggü|Mag|mtl|versch|d|Übers|usw|Bzw|bzw|Ab[hkst]|abzgl|[Ll]tda|[Ee]inschl|[Vv]mtl|Ev|bezgl|Abzw|[Vv]sl|ahd|Akk|aktual|allg|alltagsspr|altdt|alttest|amerikan|Anh|Ank|Anm|Art|autom|Auftragsnr|Az|Bat|bayr|Bde?|bearb|Bed|Bem|bes|bez|Bez|Bhf|bspw|btto|bw|Dtl|Dez)\.[\u00A0\s]</beforebreak>
|
4994
5002
|
<afterbreak></afterbreak>
|
4995
5003
|
</rule>
|
4996
5004
|
<rule break="no">
|
@@ -5002,6 +5010,10 @@
|
|
5002
5010
|
<afterbreak></afterbreak>
|
5003
5011
|
</rule>
|
5004
5012
|
<rule break="no">
|
5013
|
+
<beforebreak>\b[BM]\.[\u00A0\s]Sc\.[\u00A0\s]</beforebreak>
|
5014
|
+
<afterbreak>\p{Ll}</afterbreak>
|
5015
|
+
</rule>
|
5016
|
+
<rule break="no">
|
5005
5017
|
<beforebreak>\b(ff|Fa|fachspr|fam|fem|Fem|Fr|franz|frz?|[Aa]ltfranz|frdl|Frl|Fut|Gd|gebr?|Gebr|geh|geleg|gen|Gen|germ|gesch|ges|get|ggf|Ggf|Ggs|ggT|Gr|[Gg]rds|griech)\.[\u00A0\s]</beforebreak>
|
5006
5018
|
<afterbreak></afterbreak>
|
5007
5019
|
</rule>
|
@@ -5010,7 +5022,7 @@
|
|
5010
5022
|
<afterbreak></afterbreak>
|
5011
5023
|
</rule>
|
5012
5024
|
<rule break="no">
|
5013
|
-
<beforebreak>\b(lat|lfd|Lit|lt|Lz|Mask|mask|max|Mrd|mdal|me[dt]|phil|mhd|Mio?|mind?|Mo|mod|nachm|nördlBr|neutr|Nhd|Nom|Nrn?|Num|Obj|od|dgl|offz)\.[\u00A0\s]</beforebreak>
|
5025
|
+
<beforebreak>\b([A-ZÖÄÜ][a-zöäüß]+nr|tel|gem|Pat|prov|Betr|lat|lfd|Lit|lt|Lz|Mask|mask|max|Mrd|mdal|me[dt]|phil|mhd|Mio?|mind?|Mo|mod|nachm|nördlBr|neutr|Nhd|Nom|Nrn?|Num|Obj|od|dgl|offz)\.[\u00A0\s]</beforebreak>
|
5014
5026
|
<afterbreak></afterbreak>
|
5015
5027
|
</rule>
|
5016
5028
|
<rule break="no">
|
@@ -5018,7 +5030,7 @@
|
|
5018
5030
|
<afterbreak></afterbreak>
|
5019
5031
|
</rule>
|
5020
5032
|
<rule break="no">
|
5021
|
-
<beforebreak>\b(Tel|teilw|Temp|trans|Tsd|übertr|übl|ff|überarb|ugs|univ|unveränd|urspr|USt|UST|USt\-IdNr|sw|vgl|vll|Vll|vlt|Vlt|vllt|Vllt|Vgl|Vol|vollst|vorm|Vp|Vs|vs|wesentl|wg|Whg|Hd|Ztr|zus|Zus|zzt?|zzgl|zB|zb|Zz|Zt|zw|Min|Bzgl|bzgl|bezügl|Frhr|ggfs|insb|autom|Mw[sS]t)\.[\u00A0\s]</beforebreak>
|
5033
|
+
<beforebreak>\b(Tel|teilw|Temp|trans|Tsd|übertr|übl|ff|überarb|ugs|univ|unveränd|urspr|USt|UST|USt\-IdNr|[Aa][bn]schl|sw|kl|[Gg]r|vgl|vll|Vll|vlt|Vlt|vllt|Vllt|Vgl|Vol|vollst|vorm|Vp|Vs|vs|wesentl|wg|Whg|Hd|Ztr|zus|Zus|zzt?|zzgl|zB|zb|Zz|Zt|zw|Min|Bzgl|bzgl|bezügl|Frhr|ggfs|insb|autom|Mw[sS]t)\.[\u00A0\s]</beforebreak>
|
5022
5034
|
<afterbreak></afterbreak>
|
5023
5035
|
</rule>
|
5024
5036
|
<!-- Break rules -->
|
@@ -5258,6 +5270,11 @@
|
|
5258
5270
|
<beforebreak>Yahoo![\s\u00A0]</beforebreak>
|
5259
5271
|
<afterbreak>\p{Ll}</afterbreak>
|
5260
5272
|
</rule>
|
5273
|
+
<!-- !? + lowercase -->
|
5274
|
+
<rule break="no">
|
5275
|
+
<beforebreak>(\!|\?)[\s\u00A0]</beforebreak>
|
5276
|
+
<afterbreak>\p{Ll}</afterbreak>
|
5277
|
+
</rule>
|
5261
5278
|
<rule break="yes">
|
5262
5279
|
<beforebreak>\.\[\d+\][\s\u00A0]</beforebreak>
|
5263
5280
|
<afterbreak></afterbreak>
|
@@ -5599,7 +5616,7 @@
|
|
5599
5616
|
</rule>
|
5600
5617
|
<rule break="no">
|
5601
5618
|
<!-- unfortunately \b ignores \u0301 -->
|
5602
|
-
<beforebreak>\b
|
5619
|
+
<beforebreak>\b[сС]т\.[\h\v]</beforebreak>
|
5603
5620
|
<afterbreak>[\h]*(?!([АВУОІЄ]|На|Але|Так?)[\h\v])</afterbreak>
|
5604
5621
|
</rule>
|
5605
5622
|
<rule break="no">
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: srx-languagetool
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aaron Madlon-Kay
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-07-04 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: srx
|