srx-languagetool 0.14.0 → 0.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a71886af9137758228b7fdbb92b1e0c9e8f64c1b3456597660186b43379bf098
4
- data.tar.gz: 621f91e2be6bc34a564259f61953b56e02cb57aeaa7c68fdea2770f39ee2631a
3
+ metadata.gz: 14898a4b393f6e5925d50b6379bc42f18b7e3215a37fe24b8c0b0e9bc0363907
4
+ data.tar.gz: d7ec36383548be664580ece271c6e9014019a2abd7036bfb4d3de970561f8fdd
5
5
  SHA512:
6
- metadata.gz: 2dd8b533adbd82f274f492b1243330554a35a8e98d88af8a7790d2aaef6b2ebb85c0a3f0fc4bb87964b9a0135a5efd65554dbdae0ab8569e0838dbb2e5f504ef
7
- data.tar.gz: d5270d27a3dba9622b84ebfb1a6874c5eaeb655cdc933e61001ae9827fbfd0158bd7ebe7da4491145d8acc1273501aa7aee2c687438f957026a3c16c5273c2a3
6
+ metadata.gz: 01edf5bfc726983e6b2184c7ad4b584c0fef27fa2e995f6a910e8be09ff4b4d19077bc86edb07702b63f5c023c959edff2a5fff68d970a55c3fe4a40777dbbc2
7
+ data.tar.gz: a1f548e79786bdf954f10592c7e5df245dc4c887a6a367176027240687208cbd3eb4cea52b7a963126485b1181d44d229e79ab750e2a1640c97f1b3f5ac78028
@@ -10,10 +10,10 @@ jobs:
10
10
  - name: Set up Ruby
11
11
  uses: ruby/setup-ruby@v1
12
12
  with:
13
- ruby-version: 3.2.3
13
+ ruby-version: 3.3.7
14
14
  - name: Install
15
15
  run: |
16
- gem install bundler -v 2.5.7
16
+ gem install bundler -v 2.2.6
17
17
  bundle install
18
18
  - name: Type check
19
19
  run: bundle exec solargraph typecheck --level typed
data/.ruby-version CHANGED
@@ -1 +1 @@
1
- 3.2.3
1
+ 3.3.7
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.15.0] - 2025-03-29
4
+
5
+ - Update rules to LanguageTool 6.6
6
+
3
7
  ## [0.14.0] - 2024-09-27
4
8
 
5
9
  - Update rules to LanguageTool 6.5
data/Gemfile.lock CHANGED
@@ -1,81 +1,98 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- srx-languagetool (0.14.0)
4
+ srx-languagetool (0.15.0)
5
5
  srx (< 1.0)
6
6
 
7
7
  GEM
8
8
  remote: https://rubygems.org/
9
9
  specs:
10
- ast (2.4.2)
10
+ ast (2.4.3)
11
11
  backport (1.2.0)
12
- benchmark (0.3.0)
13
- byebug (11.1.3)
14
- diff-lcs (1.5.1)
15
- e2mmap (0.1.0)
12
+ benchmark (0.4.0)
13
+ byebug (12.0.0)
14
+ diff-lcs (1.6.1)
16
15
  jaro_winkler (1.6.0)
17
- json (2.7.2)
18
- kramdown (2.4.0)
19
- rexml
16
+ json (2.10.2)
17
+ kramdown (2.5.1)
18
+ rexml (>= 3.3.9)
20
19
  kramdown-parser-gfm (1.1.0)
21
20
  kramdown (~> 2.0)
22
- language_server-protocol (3.17.0.3)
23
- minitest (5.25.1)
24
- nokogiri (1.16.7-x86_64-darwin)
21
+ language_server-protocol (3.17.0.4)
22
+ lint_roller (1.1.0)
23
+ logger (1.7.0)
24
+ minitest (5.25.5)
25
+ nokogiri (1.18.6-arm64-darwin)
25
26
  racc (~> 1.4)
27
+ nokogiri (1.18.6-x86_64-darwin)
28
+ racc (~> 1.4)
29
+ observer (0.1.2)
30
+ ostruct (0.6.1)
26
31
  parallel (1.26.3)
27
- parser (3.3.5.0)
32
+ parser (3.3.7.3)
28
33
  ast (~> 2.4.1)
29
34
  racc
35
+ prism (1.4.0)
30
36
  racc (1.8.1)
31
37
  rainbow (3.1.1)
32
38
  rake (13.2.1)
33
- rbs (2.8.4)
34
- regexp_parser (2.9.2)
35
- reverse_markdown (2.1.1)
39
+ rbs (3.9.1)
40
+ logger
41
+ regexp_parser (2.10.0)
42
+ reverse_markdown (3.0.0)
36
43
  nokogiri
37
- rexml (3.3.7)
44
+ rexml (3.4.1)
38
45
  rspec-expectations (3.13.3)
39
46
  diff-lcs (>= 1.2.0, < 2.0)
40
47
  rspec-support (~> 3.13.0)
41
- rspec-support (3.13.1)
42
- rubocop (1.66.1)
48
+ rspec-support (3.13.2)
49
+ rubocop (1.75.1)
43
50
  json (~> 2.3)
44
- language_server-protocol (>= 3.17.0)
51
+ language_server-protocol (~> 3.17.0.2)
52
+ lint_roller (~> 1.1.0)
45
53
  parallel (~> 1.10)
46
54
  parser (>= 3.3.0.2)
47
55
  rainbow (>= 2.2.2, < 4.0)
48
- regexp_parser (>= 2.4, < 3.0)
49
- rubocop-ast (>= 1.32.2, < 2.0)
56
+ regexp_parser (>= 2.9.3, < 3.0)
57
+ rubocop-ast (>= 1.43.0, < 2.0)
50
58
  ruby-progressbar (~> 1.7)
51
- unicode-display_width (>= 2.4.0, < 3.0)
52
- rubocop-ast (1.32.3)
53
- parser (>= 3.3.1.0)
59
+ unicode-display_width (>= 2.4.0, < 4.0)
60
+ rubocop-ast (1.43.0)
61
+ parser (>= 3.3.7.2)
62
+ prism (~> 1.4)
54
63
  ruby-progressbar (1.13.0)
55
- solargraph (0.50.0)
64
+ solargraph (0.53.2)
56
65
  backport (~> 1.2)
57
66
  benchmark
58
67
  bundler (~> 2.0)
59
68
  diff-lcs (~> 1.4)
60
- e2mmap
61
- jaro_winkler (~> 1.5)
69
+ jaro_winkler (~> 1.6)
62
70
  kramdown (~> 2.3)
63
71
  kramdown-parser-gfm (~> 1.1)
72
+ logger (~> 1.6)
73
+ observer (~> 0.1)
74
+ ostruct (~> 0.6)
64
75
  parser (~> 3.0)
65
- rbs (~> 2.0)
66
- reverse_markdown (~> 2.0)
76
+ rbs (~> 3.3)
77
+ reverse_markdown (>= 2.0, < 4)
67
78
  rubocop (~> 1.38)
68
79
  thor (~> 1.0)
69
80
  tilt (~> 2.0)
70
81
  yard (~> 0.9, >= 0.9.24)
82
+ yard-solargraph (~> 0.1)
71
83
  srx (0.6.0)
72
84
  nokogiri (~> 1.11)
73
85
  thor (1.3.2)
74
- tilt (2.4.0)
75
- unicode-display_width (2.6.0)
86
+ tilt (2.6.0)
87
+ unicode-display_width (3.1.4)
88
+ unicode-emoji (~> 4.0, >= 4.0.4)
89
+ unicode-emoji (4.0.4)
76
90
  yard (0.9.37)
91
+ yard-solargraph (0.1.0)
92
+ yard (~> 0.9)
77
93
 
78
94
  PLATFORMS
95
+ arm64-darwin-24
79
96
  x86_64-darwin-20
80
97
  x86_64-darwin-21
81
98
  x86_64-darwin-22
@@ -91,4 +108,4 @@ DEPENDENCIES
91
108
  srx-languagetool!
92
109
 
93
110
  BUNDLED WITH
94
- 2.5.7
111
+ 2.6.6
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Srx
4
4
  module Languagetool
5
- VERSION = '0.14.0'
5
+ VERSION = '0.15.0'
6
6
  end
7
7
  end
data/lib/srx/segment.srx CHANGED
@@ -4761,9 +4761,26 @@
4761
4761
  <beforebreak>\.\[\d+\][\s\u00A0]</beforebreak>
4762
4762
  <afterbreak></afterbreak>
4763
4763
  </rule>
4764
+ <!-- 1. Punt primer-->
4765
+ <rule break="no">
4766
+ <beforebreak>^\d+\.[\s\u00A0]</beforebreak>
4767
+ <afterbreak>\p{L}</afterbreak>
4768
+ </rule>
4764
4769
  <!-- unknown abbreviations inside parentheses -->
4765
4770
  <rule break="no">
4766
- <beforebreak>\([^\)]*\.[\s\u00A0]</beforebreak>
4771
+ <beforebreak>\([^\)]*?[\.:][\s\u00A0]</beforebreak>
4772
+ <afterbreak>[^\)\r\n]*\)</afterbreak>
4773
+ </rule>
4774
+ <rule break="no">
4775
+ <beforebreak>\([^\)]*?[\.:][\s\u00A0][^\)]*?[\.:][\s\u00A0]</beforebreak>
4776
+ <afterbreak>[^\)\r\n]*\)</afterbreak>
4777
+ </rule>
4778
+ <rule break="no">
4779
+ <beforebreak>\([^\)]*?[\.:][\s\u00A0][^\)]*?[\.:][\s\u00A0][^\)]*?[\.:][\s\u00A0]</beforebreak>
4780
+ <afterbreak>[^\)\r\n]*\)</afterbreak>
4781
+ </rule>
4782
+ <rule break="no">
4783
+ <beforebreak>\([^\)]*?[\.:][\s\u00A0][^\)]*?[\.:][\s\u00A0][^\)]*?[\.:][\s\u00A0][^\)]*?[\.:][\s\u00A0]</beforebreak>
4767
4784
  <afterbreak>[^\)\r\n]*\)</afterbreak>
4768
4785
  </rule>
4769
4786
  <rule break="no">
@@ -4781,7 +4798,7 @@
4781
4798
  </rule>
4782
4799
  <!-- Abbreviations that cannot finish sentences-->
4783
4800
  <rule break="no">
4784
- <beforebreak>\b(dc|inst|(?iu)(n|Mr|C|Dr|Dra|Dra\. Ma|Sta\. Ma|E|Emm|Emma|Excm|Excma|Hble|I|Il·lm|Il·lma|Il·ltre|Im|Ima|Mgfc|Mgfca|Mn|R|Rev|Sr|Sra|Sres|Srs|St|Sta|a|abr|abs|acad|add|adj|adm|admdor|admdora|admtiu|admtiva|adv|ag|agl|agr|agron|agròn|aj|ajud|al|alim|amb|ampl|ant|ap|apmt|apnt|apr|aprox|apt|arm|arq|arqueol|arquit|assign|assoc|atm|aut|aux|av|b|batx|bda|bibl|bl|bnc|butll|bxs|c|calef|cartogr|cat|catedr|catol|cf|cia|cin|cint|circul|cit|climat|col|col·l|compt|cons|constr|cont|contr|conv|corp|corr|cpl|cpt|cró|ct|cte|ctra|cts|d|dept|derog|des|desp|dg|dip|disp|distr|div|dj|dl|doc|drec|ds|dt|dta|dte|dupl|dv|e|econ|ed|ef|entl|esc|esp|espf|esq|ex|exc|exp|exped|ext|f|fac|fca|febr|fig|figs|fra|gen|gov|gral|i|imp|impr|impt|inc|insp|inst|int|inv|j|jul|jur|jurispr|leg|llic|loc|ltda|làm|merc|mil·l|màx|mín|neg|nov|nre|núm|o|oct|op|p|pàg|pàgs|paq|par|pda|pg|pl|pobl|pol|ppda|ppt|pral|prev|prof|progr|prov|pta|ptes|ptge|pvt|pàg|quadr|quint|r|rbla|ref|reg|rev|secr|serv|sgt|sotsp|subsp|supl|supt|t|tel|telegr|tit|trad|trans|transcr|transf|trav|tripl|trv|tt|tèc|univ|urb|v|var|veg|venc|vid|vig|vocab|vs|x|àt|íd))\.[\s\u00A0]</beforebreak>
4801
+ <beforebreak>\b(dc|inst|coop|(?iu)(n|Mr|C|Dr|Dra|Dra\. Ma|Sta\. Ma|E|Emm|Emma|Excm|Excma|Hble|I|Il·lm|Il·lma|Il·ltre|Im|Ima|Mgfc|Mgfca|Mn|R|Rev|Sr|Sra|Sres|Srs|St|Sta|a|abr|abs|acad|add|adj|adm|admdor|admdora|admtiu|admtiva|adv|ag|agl|agr|agron|agròn|aj|ajud|al|alim|amb|ampl|ant|ap|apmt|apnt|apr|aprox|apt|arm|arq|arqueol|arquit|assign|assoc|atm|aut|aux|av|avda|b|batx|bda|bibl|bl|bnc|butll|bxs|c|calef|cartogr|cat|catedr|catol|cf|cia|cin|cint|circul|cit|climat|col|col·l|compt|cons|constr|cont|contr|conv|corp|corr|cpl|cpt|cró|ct|cte|ctra|cts|d|dept|derog|des|desp|dg|dip|disp|distr|div|dj|dl|doc|drec|ds|dt|dta|dte|dupl|dv|e|econ|ed|ef|entl|esc|esp|espf|esq|ex|exc|exp|exped|ext|f|fac|fca|febr|fig|figs|fra|gen|gov|gral|i|imp|impr|impt|inc|insp|inst|int|inv|j|jul|jur|jurispr|leg|llic|loc|ltda|làm|merc|mil·l|màx|mín|neg|nov|nre|núm|o|oct|op|p|pàg|pàgs|paq|par|pda|pg|pl|pobl|pol|ppda|ppt|pral|prev|prof|progr|prov|pta|ptes|ptge|pvt|pàg|quadr|quint|r|rbla|ref|reg|rev|secr|serv|sgt|sotsp|subsp|supl|supt|t|tel|telegr|tit|trad|trans|transcr|transf|trav|tripl|trv|tt|tèc|univ|urb|v|var|veg|venc|vid|vig|vocab|vs|x|àt|íd|subg))\.[\s\u00A0]</beforebreak>
4785
4802
  <afterbreak></afterbreak>
4786
4803
  </rule>
4787
4804
  <!-- Abbreviations that can finish sentences -->
@@ -4821,7 +4838,7 @@
4821
4838
  </rule>
4822
4839
  <!-- max min etc -->
4823
4840
  <rule break="no">
4824
- <beforebreak>\b([Ee]tc|m[aáà]x|m[ií]n|aprox|\d+o)\.[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
4841
+ <beforebreak>\b([Ee]tc|m[aáà]x|m[ií]n|aprox|long|\d+o)\.[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
4825
4842
  <afterbreak>\p{Ll}</afterbreak>
4826
4843
  </rule>
4827
4844
  <!-- Composed abbrev. -->
@@ -4865,11 +4882,28 @@
4865
4882
  </rule>
4866
4883
  </languagerule>
4867
4884
  <languagerule languagerulename="Spanish">
4885
+ <!-- 1. Punto primero-->
4886
+ <rule break="no">
4887
+ <beforebreak>^\d+\.[\s\u00A0]</beforebreak>
4888
+ <afterbreak>\p{L}</afterbreak>
4889
+ </rule>
4868
4890
  <!-- unknown abbreviations inside parentheses -->
4869
4891
  <rule break="no">
4870
- <beforebreak>\([^\)]*\.[\s\u00A0]</beforebreak>
4892
+ <beforebreak>\([^\)]*?[\.:][\s\u00A0]</beforebreak>
4871
4893
  <afterbreak>[^\)\r\n]*\)</afterbreak>
4872
4894
  </rule>
4895
+ <rule break="no">
4896
+ <beforebreak>\([^\)]*?[\.:][\s\u00A0][^\)]*?[\.:][\s\u00A0]</beforebreak>
4897
+ <afterbreak>[^\)\r\n]*\)</afterbreak>
4898
+ </rule>
4899
+ <rule break="no">
4900
+ <beforebreak>\([^\)]*?[\.:][\s\u00A0][^\)]*?[\.:][\s\u00A0][^\)]*?[\.:][\s\u00A0]</beforebreak>
4901
+ <afterbreak>[^\)\r\n]*\)</afterbreak>
4902
+ </rule>
4903
+ <rule break="no">
4904
+ <beforebreak>\([^\)]*?[\.:][\s\u00A0][^\)]*?[\.:][\s\u00A0][^\)]*?[\.:][\s\u00A0][^\)]*?[\.:][\s\u00A0]</beforebreak>
4905
+ <afterbreak>[^\)\r\n]*\)</afterbreak>
4906
+ </rule>
4873
4907
  <rule break="no">
4874
4908
  <beforebreak>\[[^\]]*\.[\s\u00A0]</beforebreak>
4875
4909
  <afterbreak>[^\]\r\n]*\]</afterbreak>
@@ -4940,7 +4974,7 @@
4940
4974
  <afterbreak></afterbreak>
4941
4975
  </rule>
4942
4976
  <rule break="no">
4943
- <beforebreak>\b(dc|(?iu)(n|[Aa]yto|Mr|C|Dr|Dra|E|Emm|Emma|Excm|Excma|Hble|I|Il·lm|Il·lma|Il·ltre|Im|Ima|Mgfc|Mgfca|Mn|R|Rev|Sr|Sra|Sres|Sras|Srs|St|Sta|a|abr|abs|acad|add|adj|adm|admdor|admdora|admtiu|admtiva|adv|ag|agl|agr|agron|agròn|aj|ajud|al|alim|amb|ampl|ant|ap|apmt|apnt|apr|aprox|apt|arm|arq|arqueol|arquit|assign|assoc|atm|aut|aux|av|b|batx|bda|bibl|bl|bnc|butll|bxs|c|calef|cartogr|cat|catedr|catol|cf|cia|cin|cint|circul|cit|climat|col|col·l|compt|cons|constr|cont|contr|conv|corp|corr|cpl|cpt|cró|ct|cte|ctra|cts|d|dept|derog|des|desp|dg|dip|disp|distr|div|dj|dl|doc|drec|ds|dt|dta|dte|dupl|dv|e|econ|ed|ef|entl|esc|esp|espf|esq|ex|exc|exp|exped|ext|f|fac|fca|febr|fig|figs|fra|gen|gov|gral|i|imp|impr|impt|inc|insp|inst|int|inv|j|jul|jur|jurispr|leg|llic|loc|ltda|làm|merc|mil·l|màx|mín|neg|nov|nre|núm|o|oct|op|p|pàg|pàgs|paq|par|pda|pg|pl|pobl|pol|ppda|ppt|pral|prev|prof|progr|prov|pta|ptes|ptge|pvt|pàg|quadr|quint|r|rbla|ref|reg|rev|secr|serv|sgt|sotsp|subsp|supl|supt|t|tel|telegr|tit|trad|trans|transcr|transf|trav|tripl|trv|tt|tèc|univ|urb|v|var|veg|venc|vid|vig|vocab|vs|x|àt|íd))\.[\s\u00A0]</beforebreak>
4977
+ <beforebreak>\b(dc|coop|(?iu)(n|[Aa]yto|Mr|C|Dr|Dra|E|Emm|Emma|Excm|Exc|Excma|Hble|I|Il·lm|Il·lma|Il·ltre|Ilm|Ilma|Iltre|Im|Ima|Mgfc|Mgfca|Mn|R|Rev|Sr|Sra|Sres|Sras|Srs|St|Sta|a|abr|abs|acad|add|adj|adm|admdor|admdora|admtiu|admtiva|adv|ag|agl|agr|agron|agròn|aj|ajud|al|alim|amb|ampl|ant|ap|apmt|apnt|apr|aprox|apt|arm|arq|arqueol|arquit|assign|assoc|atm|aut|aux|av|avda|b|batx|bda|bibl|bl|bnc|butll|bxs|c|calef|cartogr|cat|catedr|catol|cf|cia|cin|cint|circul|cit|climat|col|col·l|compt|cons|constr|cont|contr|conv|corp|corr|cpl|cpt|cró|ct|cte|ctra|cts|d|dept|derog|des|desp|dg|dip|disp|distr|div|dj|dl|doc|drec|ds|dt|dta|dte|dupl|dv|e|econ|ed|ef|entl|esc|esp|espf|esq|ex|exc|exp|exped|ext|f|fac|fca|febr|fig|figs|fra|gen|gov|gral|i|imp|impr|impt|inc|insp|inst|int|inv|j|jul|jur|jurispr|leg|llic|loc|ltda|làm|merc|mil·l|màx|mín|neg|nov|nre|núm|o|oct|op|p|pàg|pàgs|paq|par|pda|pg|pl|pobl|pol|ppda|ppt|pral|prev|prof|progr|prov|pta|ptes|ptge|pvt|pàg|quadr|quint|r|rbla|ref|reg|rev|secr|serv|sgt|sotsp|subsp|supl|supt|t|tel|telegr|tit|trad|trans|transcr|transf|trav|tripl|trv|tt|tèc|univ|urb|v|var|veg|venc|vid|vig|vocab|vs|x|àt|íd|subg))\.[\s\u00A0]</beforebreak>
4944
4978
  <afterbreak></afterbreak>
4945
4979
  </rule>
4946
4980
  <rule break="no">
@@ -4984,7 +5018,7 @@
4984
5018
  </rule>
4985
5019
  <!-- max min etc -->
4986
5020
  <rule break="no">
4987
- <beforebreak>\b([Ee]tc|m[aá]x|m[ií]n|aprox|\d+o)\.[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
5021
+ <beforebreak>\b([Ee]tc|m[aá]x|m[ií]n|aprox|long|\d+o)\.[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
4988
5022
  <afterbreak>\p{Ll}</afterbreak>
4989
5023
  </rule>
4990
5024
  <!-- Composed abbrev. -->
@@ -5129,7 +5163,7 @@
5129
5163
  </rule>
5130
5164
  <!-- don't split at cases like "Friedrich II. wird auch..." -->
5131
5165
  <rule break="no">
5132
- <beforebreak>[\u00A0\s ][IVX]+\.[\u00A0\s]{1,2}</beforebreak>
5166
+ <beforebreak>[\u00A0\s ][IVX]+\.[\u00A0\s]{1,2}</beforebreak>
5133
5167
  <afterbreak>[^\p{Lu}]+</afterbreak>
5134
5168
  </rule>
5135
5169
  <!-- don't split at cases like "im 13. oder 14. Jahrhundert" -->
@@ -5159,7 +5193,7 @@
5159
5193
  </rule>
5160
5194
  <!-- English abbreviations - but these work globally for all languages -->
5161
5195
  <rule break="no">
5162
- <beforebreak>\b(Mrs?|No|pp|St|no|Sr|Jr|[Ss]ek|Bros|etc|[Bb]tw|vs|esp|[Ff]ig|Jan|Feb|Mar|Apr|Ju[nl]|Aug|Sept?|O[ck]t|Nov|Dec|PhD|BSc|BEng|BComp|BArch|al|cf|Inc|Ms|MEng|MSc|MComp|Gen|Sen|Prof|Corp|Co|co|Ltd|Buchst)\.[\u00A0\s]{1,2}</beforebreak>
5196
+ <beforebreak>\b(Mrs?|No|pp|St|no|Sr|Jr|[Ss]ek|Bros|[Bb]tw|vs|esp|[Ff]ig|Jan|Feb|Mar|Apr|Ju[nl]|Aug|Sept?|O[ck]t|Nov|Dec|PhD|BSc|BEng|BComp|BArch|al|cf|Inc|Ms|MEng|MSc|MComp|Gen|Sen|Prof|Corp|Co|co|Ltd|Buchst)\.[\u00A0\s]{1,2}</beforebreak>
5163
5197
  <afterbreak></afterbreak>
5164
5198
  </rule>
5165
5199
  <!-- Latin abbreviations - but these work globally for all languages -->
@@ -5169,10 +5203,19 @@
5169
5203
  </rule>
5170
5204
  <!-- German abbreviations -->
5171
5205
  <rule break="no">
5172
- <beforebreak>\b(betr|Geb|Stk|ggü|Mag|mtl|Flgh?|[Pp]arl|Bsp|versch|[Dd]iesbzgl|[Zz]ykl|[Dd]bzgl[Ss]tellv|d|Übers|usw|[Bb]zw|Ab[hkst]|[Ee]ig|[Aa]bzü?gl|\d+-tlg|tlg|[Gg]gfls|[Ff]achspr|[Ll]tda|[Ee]inschl|[Vv]mtl|[Ss]tellv|Ev|[Bb]ezgl|lit|Abzw|[Vv]sl|ahd|Akk|aktual|[Öö]ffentl|prof|allg|alltagsspr|altdt|alttest|amerikan|Anh|Ank|Anm|Art|[Aa]utom|Auftragsnr|Az|Bat|bayr|Bde?|bearb|Bed|Bem|bes|bez|wsl|vsl|Bez|Bhf|Blvd|[Bb]spw|btto|bw|Dtl|[Gg]esetzl|Dez|[Jj]gdfr|[Ee]ff)\.[\u00A0\s]{1,2}</beforebreak>
5206
+ <beforebreak>\b(betr|Geb|Stk|ggü|Mag|mtl|Flgh?|[Pp]arl|Bsp|versch|[Dd]iesbzgl|[Zz]ykl|[Dd]bzgl[Ss]tellv|d|Übers|[Bb]zw|Ab[hkst]|[Ee]ig|[Aa]bzü?gl|\d+-tlg|tlg|[Gg]gfls|[Ff]achspr|[Ll]tda|[Ee]inschl|[Vv]mtl|[Ss]tellv|Ev|[Bb]ezgl|lit|Abzw|[Vv]sl|ahd|Akk|aktual|[Öö]ffentl|prof|allg|alltagsspr|altdt|alttest|amerikan|Anh|Ank|Anm|Art|[Aa]utom|Auftragsnr|Az|Bat|bayr|Bde?|bearb|Bed|Bem|bes|bez|wsl|vsl|Bez|Bhf|Blvd|[Bb]spw|btto|bw|Dtl|[Gg]esetzl|Dez|[Jj]gdfr|[Ee]ff)\.[\u00A0\s]{1,2}</beforebreak>
5173
5207
  <afterbreak></afterbreak>
5174
5208
  </rule>
5175
5209
  <rule break="no">
5210
+ <beforebreak>\b([Uu]sw|[Ee]tc)\.[\u00A0\s]{1,2}</beforebreak>
5211
+ <afterbreak>\p{Ll}</afterbreak>
5212
+ </rule>
5213
+ <rule break="yes">
5214
+ <!-- Why is this needed? -->
5215
+ <beforebreak>\b([Ee]tc)\.[\u00A0\s]{1,2}</beforebreak>
5216
+ <afterbreak>\p{Lu}</afterbreak>
5217
+ </rule>
5218
+ <rule break="no">
5176
5219
  <beforebreak>\b(cts?|[Cc]a|chem|chin|Chr|cresc|[Dd]at|desgl|ders|dgl|Dipl|Dir?|Doz?|durchg|durchges|Dr|[Dd]t|ebd|Ed|[Ee]igt?l|akt|[Ee]ngl|Erg|al|et[cw]|Etw|ev|[Ee]vtl?|[Ee]xkl|Expl|Exz)\.[\u00A0\s]{1,2}</beforebreak>
5177
5220
  <afterbreak></afterbreak>
5178
5221
  </rule>
@@ -5710,7 +5753,7 @@
5710
5753
  </rule>
5711
5754
  <!-- Наші в... Лос-Анджелесі -->
5712
5755
  <rule break="no">
5713
- <beforebreak>\b(в|у|на|за|з|із|зі|зо)(\.\.\.|…)[\h\v]*</beforebreak>
5756
+ <beforebreak>(?U)\b(в|у|на|за|з|із|зі|зо)(\.\.\.|…)[\h\v]*</beforebreak>
5714
5757
  <afterbreak>\p{Lu}</afterbreak>
5715
5758
  </rule>
5716
5759
  <rule break="no">
@@ -5723,12 +5766,12 @@
5723
5766
  </rule>
5724
5767
  <!-- Digit as a point number: 1. перший пункт -->
5725
5768
  <rule break="no">
5726
- <beforebreak>\b\d{1,3}\.[\h]+</beforebreak>
5769
+ <beforebreak>(?U)\b\d{1,3}\.[\h]+</beforebreak>
5727
5770
  <afterbreak>\p{Ll}|\p{Lu}{2,}</afterbreak>
5728
5771
  </rule>
5729
5772
  <!-- various punctuation between lowercase letters -->
5730
5773
  <rule break="no">
5731
- <beforebreak>\b\p{Ll}+[.!?][\h\v]*</beforebreak>
5774
+ <beforebreak>(?U)\b\p{Ll}+[.!?][\h\v]*</beforebreak>
5732
5775
  <afterbreak>\h*(([\(«]|[\[‐-―-][\h\v]*)?\p{Ll})</afterbreak>
5733
5776
  </rule>
5734
5777
  <rule break="no">
@@ -5737,17 +5780,17 @@
5737
5780
  </rule>
5738
5781
  <!-- lowercase letter abbreviations together: н.е., кв.м. -->
5739
5782
  <rule break="no">
5740
- <beforebreak>\b\p{L}{1,2}\.</beforebreak>
5783
+ <beforebreak>(?U)\b\p{L}{1,2}\.</beforebreak>
5741
5784
  <afterbreak>\p{L}{1,2}\.</afterbreak>
5742
5785
  </rule>
5743
5786
  <!-- latin capital char abbreviations A. B. C. -->
5744
5787
  <rule break="no">
5745
- <beforebreak>\b[\u00A0\u202F]?[A-Z]\.[\h\v]?</beforebreak>
5788
+ <beforebreak>(?U)\b[\u00A0\u202F]?[A-Z]\.[\h\v]?</beforebreak>
5746
5789
  <afterbreak>[A-Z][a-zA-Z'’.-]|[А-ЯІЇЄҐ]\.</afterbreak>
5747
5790
  </rule>
5748
5791
  <!-- capital char abbreviations А. Б. В. -->
5749
5792
  <rule break="no">
5750
- <beforebreak>(^[\h\v]*|\([\h\v]*|[«„"]|(\b[А-ЯІЇЄҐACEIHOPX]\.-))[А-ЯІЇЄҐA-Z]\.[\h\v]*</beforebreak>
5793
+ <beforebreak>(?U)(^[\h\v]*|\([\h\v]*|[«„"]|(\b[А-ЯІЇЄҐACEIHOPX]\.-))[А-ЯІЇЄҐA-Z]\.[\h\v]*</beforebreak>
5751
5794
  <afterbreak></afterbreak>
5752
5795
  </rule>
5753
5796
  <!-- Іван Ч. (1914 р. н.) -->
@@ -5771,12 +5814,12 @@
5771
5814
  а до лютого 2020 р. — затвердити
5772
5815
  -->
5773
5816
  <rule break="no">
5774
- <beforebreak>\b([0-9]{2}|[0-9]{4})[\h\v]+р\.[\h\v]+</beforebreak>
5817
+ <beforebreak>(?U)\b([0-9]{2}|[0-9]{4})[\h\v]+р\.[\h\v]+</beforebreak>
5775
5818
  <afterbreak>[\h\v]*[№0-9‐-―-]</afterbreak>
5776
5819
  </rule>
5777
5820
  <!-- річка - р. Дніпро -->
5778
5821
  <rule break="no">
5779
- <beforebreak>(?&lt;!\d[\h]*)\bр\.[\h\v]*</beforebreak>
5822
+ <beforebreak>(?U)(?&lt;!\d[\h]*)\bр\.[\h\v]*</beforebreak>
5780
5823
  <afterbreak>[\h]*(?!(На|Але|Так?)[\h\v]+)[А-ЯІЇЄҐA-Z][^\h]</afterbreak>
5781
5824
  </rule>
5782
5825
  <!-- У травні 1949 р. Грушківський район -->
@@ -5791,29 +5834,29 @@
5791
5834
  </rule>
5792
5835
  <!-- Years: рр. -->
5793
5836
  <rule break="no">
5794
- <beforebreak>\b([0-9]0|[0-9]{3}0)(-[мх])?рр\.[\h\v]*</beforebreak>
5837
+ <beforebreak>(?U)\b([0-9]0|[0-9]{3}0)(-[мх])?рр\.[\h\v]*</beforebreak>
5795
5838
  <afterbreak></afterbreak>
5796
5839
  </rule>
5797
5840
  <!-- млн./млрд./грн. — frequent mistake -->
5798
5841
  <rule break="no">
5799
- <beforebreak>\b(тис|млн|млрд|грн)\.[\h\v]*</beforebreak>
5842
+ <beforebreak>(?U)\b(тис|млн|млрд|грн)\.[\h\v]*</beforebreak>
5800
5843
  <afterbreak>[\h\v]*(\d|[КМ]Вт)</afterbreak>
5801
5844
  </rule>
5802
5845
  <!-- усталені скорочення, що не збігаються з нескороченими словами -->
5803
5846
  <rule break="no">
5804
- <beforebreak>\b(укр|рос|англ?|амер|італ|ісп|нім|фр(анц)?|лат|грец(ьк)?)\.[\h\v]*</beforebreak>
5847
+ <beforebreak>(?U)\b(укр|рос|англ?|амер|італ|ісп|нім|фр(анц)?|лат|грец(ьк)?)\.[\h\v]*</beforebreak>
5805
5848
  <afterbreak></afterbreak>
5806
5849
  </rule>
5807
5850
  <rule break="no">
5808
- <beforebreak>\b(абз|арк|ауд|бл|буд|бульв|вул|держ|дод|зав|зб|зв|зовн|екон|к|кв|канд|кн|напр|нпр|нац|обл|оп|пл|пол|поч|пп|пор|просп|розд|стор|табл|[Тт]]ел|ч|част)\.[\h\v]*</beforebreak>
5851
+ <beforebreak>(?U)\b(абз|арк|ауд|бл|буд|бульв|вул|держ|дод|зав|зб|зв|зовн|екон|іл|к|кв|канд|кн|напр|нпр|нац|обл|оп|пл|пол|поч|пп|пор|просп|розд|стор|табл|[Тт]]ел|ч|част)\.[\h\v]*</beforebreak>
5809
5852
  <afterbreak></afterbreak>
5810
5853
  </rule>
5811
5854
  <rule break="no">
5812
- <beforebreak>\b(кін)\.[\h\v]*</beforebreak>
5855
+ <beforebreak>(?U)\b(кін)\.[\h\v]*</beforebreak>
5813
5856
  <afterbreak>[а-яіїєґ0-9IXV]|[ІХ]+\b</afterbreak>
5814
5857
  </rule>
5815
5858
  <rule break="no">
5816
- <beforebreak>\b[сС]т\.[\h\v]</beforebreak>
5859
+ <beforebreak>(?U)\b[сС]т\.[\h\v]</beforebreak>
5817
5860
  <afterbreak>[\h]*(?!([АВУОІЄ]|На|Але|Так?)[\h\v])</afterbreak>
5818
5861
  </rule>
5819
5862
  <!-- нар. 1945 р. | (1966 р. нар.) | 1975 — нар. Осипчук -->
@@ -5822,21 +5865,21 @@
5822
5865
  <afterbreak></afterbreak>
5823
5866
  </rule>
5824
5867
  <rule break="no">
5825
- <beforebreak>\bнар\.[\h\v]*</beforebreak>
5868
+ <beforebreak>(?U)\bнар\.[\h\v]*</beforebreak>
5826
5869
  <afterbreak>([0-9]|бл\.|арт\.)</afterbreak>
5827
5870
  </rule>
5828
5871
  <rule break="no">
5829
- <beforebreak>\bдол\.[\h\v]*</beforebreak>
5872
+ <beforebreak>(?U)\bдол\.[\h\v]*</beforebreak>
5830
5873
  <afterbreak>США</afterbreak>
5831
5874
  </rule>
5832
5875
  <!-- п. 10 від 11.10.1933, д. Василь -->
5833
5876
  <rule break="no">
5834
- <beforebreak>(?&lt;!т\.[\h\v]?)\b[пд]\.[\h\v]*</beforebreak>
5877
+ <beforebreak>(?U)(?&lt;!т\.[\h\v]?)\b[пд]\.[\h\v]*</beforebreak>
5835
5878
  <afterbreak></afterbreak>
5836
5879
  </rule>
5837
5880
  <!-- усталені скорочення, що збігаються з нескороченими словами -->
5838
5881
  <rule break="no">
5839
- <beforebreak>\b(див)\.[\h\v]</beforebreak>
5882
+ <beforebreak>(?U)\b(див)\.[\h\v]</beforebreak>
5840
5883
  <afterbreak>[\h\v]*[^А-ЯІЇЄҐ]</afterbreak>
5841
5884
  </rule>
5842
5885
  <!-- Верховний орган, див. Африканський національний конгрес -->
@@ -5848,20 +5891,20 @@
5848
5891
  України (див. Зимові походи)
5849
5892
  -->
5850
5893
  <rule break="no">
5851
- <beforebreak>(\([^)]*|\[[^\]]*|,[\h\v]*)\b(див)\.[\h\v]*</beforebreak>
5894
+ <beforebreak>(?U)(\([^)]*|\[[^\]]*|,[\h\v]*)\b(див)\.[\h\v]*</beforebreak>
5852
5895
  <afterbreak></afterbreak>
5853
5896
  </rule>
5854
5897
  <!-- abbreviation with proper noun: проф. Грицько, о. Лісове -->
5855
5898
  <rule break="no">
5856
- <beforebreak>\b(ап|[Аа]кад|[Пп]роф|[Дд]оц|[Аа]сист|[Рр]еж|[Аа]рх|[Сс]вв?|о|оз|ім|інж|дир|тов|упоряд|тт|чл\.-кор|[Пп]реп|[сС]вт)\.[\h\v]*</beforebreak>
5899
+ <beforebreak>(?U)\b(ап|[Аа]кад|[Пп]роф|[Дд]оц|[Аа]сист|[Рр]еж|[Аа]рх|[Сс]вв?|о|оз|ім|інж|дир|тов|упоряд|тт|чл\.-кор|[Пп]реп|[сС]вт)\.[\h\v]*</beforebreak>
5857
5900
  <afterbreak>[\h\v]*[А-ЯІЇЄҐA-Z]</afterbreak>
5858
5901
  </rule>
5859
5902
  <rule break="no">
5860
- <beforebreak>(?&lt;![іи]\s+)\bдр\.[\h\v]*</beforebreak>
5903
+ <beforebreak>(?U)(?&lt;![іи]\s+)\bдр\.[\h\v]*</beforebreak>
5861
5904
  <afterbreak>[\h\v]*[А-ЯІЇЄҐ]</afterbreak>
5862
5905
  </rule>
5863
5906
  <rule break="no">
5864
- <beforebreak>\bМан\.[\h\v]*</beforebreak>
5907
+ <beforebreak>(?U)\bМан\.[\h\v]*</beforebreak>
5865
5908
  <afterbreak>[\h\v]*([Сс]іті|[Юю]н)</afterbreak>
5866
5909
  </rule>
5867
5910
  <!-- смерть гр. Болтаровича, but not "9 гр." -->
@@ -5871,7 +5914,7 @@
5871
5914
  </rule>
5872
5915
  <!-- TODO: арт. - артист -->
5873
5916
  <rule break="no">
5874
- <beforebreak>\b([Аа]рт|[Мм]ал|[Рр]ис|[Сс]пр)\.[\h\v]*</beforebreak>
5917
+ <beforebreak>(?U)\b([Аа]рт|[Мм]ал|[Рр]ис|[Сс]пр)\.[\h\v]*</beforebreak>
5875
5918
  <afterbreak>[\h\v]*(№[\h\v]*)?[0-9]</afterbreak>
5876
5919
  </rule>
5877
5920
  <!-- ХІІ р., 3-6 арт., 2-3 тт. -->
@@ -5881,7 +5924,7 @@
5881
5924
  </rule>
5882
5925
  <!-- але розбиваємо «всього 20 м. Почалося» -->
5883
5926
  <rule break="no">
5884
- <beforebreak>(?&lt;!\d[\h\v]*)\bм\.[\h\v]*</beforebreak>
5927
+ <beforebreak>(?U)(?&lt;!\d[\h\v]*)\bм\.[\h\v]*</beforebreak>
5885
5928
  <afterbreak>[А-ЯІЇЄҐ][а-яіїєґ']</afterbreak>
5886
5929
  </rule>
5887
5930
  <!-- село/сторінка/місто, але щоб не збігалося з секундами/метрами -->
@@ -5905,7 +5948,7 @@
5905
5948
  <afterbreak>[\h\v]*[‐-―-][\h\v]*([Рр]ед|[Аа]вт)\.[\h\v]*[\)\]]</afterbreak>
5906
5949
  </rule>
5907
5950
  <rule break="no">
5908
- <beforebreak>\b([Рр]ед)\.[\h\v]*</beforebreak>
5951
+ <beforebreak>(?U)\b([Рр]ед)\.[\h\v]*</beforebreak>
5909
5952
  <afterbreak>[А-ЯІЇЄҐ]</afterbreak>
5910
5953
  </rule>
5911
5954
  <!-- Цензор.НЕТ -->
@@ -5933,6 +5976,7 @@
5933
5976
  <afterbreak>([‐-―-][\h\v]*)?\p{Lu}[^\p{Lu}]</afterbreak>
5934
5977
  </rule>
5935
5978
  </languagerule>
5979
+
5936
5980
  <languagerule languagerulename="Belarusian">
5937
5981
  <rule break="no">
5938
5982
  <beforebreak>\b\d+\.\s</beforebreak>
@@ -6618,7 +6662,7 @@
6618
6662
  <afterbreak></afterbreak>
6619
6663
  </rule>
6620
6664
  <rule break="no">
6621
- <beforebreak>[^\.]\s[ضصثقفغعهخحجچشسیبلاتنمکگ\ظطزرذدپوًٌٍَُِّْA-Z]\.\s</beforebreak>
6665
+ <beforebreak>[^\.]\s[ضصثقفغعهخحجچشسیبلاتنمکگ\ظطزرذدپوًٌٍَُِّْA-Z]\.\s</beforebreak>
6622
6666
  <afterbreak></afterbreak>
6623
6667
  </rule>
6624
6668
  <rule break="no">
@@ -6671,7 +6715,7 @@
6671
6715
 
6672
6716
  Не раздвајати после наводника осим ако нису праћени
6673
6717
  великим словом. На пример:
6674
- "Тако је!", рече он.-->
6718
+ "Тако је!", рече он.-->
6675
6719
  <rule break="no">
6676
6720
  <beforebreak>["'“],\s</beforebreak>
6677
6721
  <afterbreak>\p{Ll}</afterbreak>
@@ -6698,7 +6742,7 @@
6698
6742
  </rule>
6699
6743
  <!--Не раздвајај у случају као на пр.: "Петар I дошао је ..."-->
6700
6744
  <rule break="no">
6701
- <beforebreak>[\s ][IVX]+\s</beforebreak>
6745
+ <beforebreak>[\s ][IVX]+\s</beforebreak>
6702
6746
  <afterbreak>[^\p{Lu}]+</afterbreak>
6703
6747
  </rule>
6704
6748
  <!--Не раздвајај у случају као "од 13. до 14. века"-->
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: srx-languagetool
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.14.0
4
+ version: 0.15.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aaron Madlon-Kay
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2024-09-27 00:00:00.000000000 Z
11
+ date: 2025-03-28 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: srx
@@ -73,7 +73,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
73
73
  - !ruby/object:Gem::Version
74
74
  version: '0'
75
75
  requirements: []
76
- rubygems_version: 3.5.7
76
+ rubygems_version: 3.5.22
77
77
  signing_key:
78
78
  specification_version: 4
79
79
  summary: SRX segmentation rules from LanguageTool