srx-languagetool 0.13.0 → 0.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/main.yml +2 -2
- data/.ruby-version +1 -1
- data/CHANGELOG.md +8 -0
- data/Gemfile.lock +58 -41
- data/lib/srx/languagetool/version.rb +1 -1
- data/lib/srx/segment.srx +122 -40
- metadata +7 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 14898a4b393f6e5925d50b6379bc42f18b7e3215a37fe24b8c0b0e9bc0363907
|
4
|
+
data.tar.gz: d7ec36383548be664580ece271c6e9014019a2abd7036bfb4d3de970561f8fdd
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 01edf5bfc726983e6b2184c7ad4b584c0fef27fa2e995f6a910e8be09ff4b4d19077bc86edb07702b63f5c023c959edff2a5fff68d970a55c3fe4a40777dbbc2
|
7
|
+
data.tar.gz: a1f548e79786bdf954f10592c7e5df245dc4c887a6a367176027240687208cbd3eb4cea52b7a963126485b1181d44d229e79ab750e2a1640c97f1b3f5ac78028
|
data/.github/workflows/main.yml
CHANGED
@@ -10,10 +10,10 @@ jobs:
|
|
10
10
|
- name: Set up Ruby
|
11
11
|
uses: ruby/setup-ruby@v1
|
12
12
|
with:
|
13
|
-
ruby-version:
|
13
|
+
ruby-version: 3.3.7
|
14
14
|
- name: Install
|
15
15
|
run: |
|
16
|
-
gem install bundler -v 2.
|
16
|
+
gem install bundler -v 2.2.6
|
17
17
|
bundle install
|
18
18
|
- name: Type check
|
19
19
|
run: bundle exec solargraph typecheck --level typed
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
3.3.7
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,85 +1,102 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
srx-languagetool (0.
|
4
|
+
srx-languagetool (0.15.0)
|
5
5
|
srx (< 1.0)
|
6
6
|
|
7
7
|
GEM
|
8
8
|
remote: https://rubygems.org/
|
9
9
|
specs:
|
10
|
-
ast (2.4.
|
10
|
+
ast (2.4.3)
|
11
11
|
backport (1.2.0)
|
12
|
-
benchmark (0.
|
13
|
-
byebug (
|
14
|
-
diff-lcs (1.
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
rexml
|
12
|
+
benchmark (0.4.0)
|
13
|
+
byebug (12.0.0)
|
14
|
+
diff-lcs (1.6.1)
|
15
|
+
jaro_winkler (1.6.0)
|
16
|
+
json (2.10.2)
|
17
|
+
kramdown (2.5.1)
|
18
|
+
rexml (>= 3.3.9)
|
20
19
|
kramdown-parser-gfm (1.1.0)
|
21
20
|
kramdown (~> 2.0)
|
22
|
-
language_server-protocol (3.17.0.
|
23
|
-
|
24
|
-
|
21
|
+
language_server-protocol (3.17.0.4)
|
22
|
+
lint_roller (1.1.0)
|
23
|
+
logger (1.7.0)
|
24
|
+
minitest (5.25.5)
|
25
|
+
nokogiri (1.18.6-arm64-darwin)
|
25
26
|
racc (~> 1.4)
|
26
|
-
|
27
|
-
|
27
|
+
nokogiri (1.18.6-x86_64-darwin)
|
28
|
+
racc (~> 1.4)
|
29
|
+
observer (0.1.2)
|
30
|
+
ostruct (0.6.1)
|
31
|
+
parallel (1.26.3)
|
32
|
+
parser (3.3.7.3)
|
28
33
|
ast (~> 2.4.1)
|
29
34
|
racc
|
30
|
-
|
35
|
+
prism (1.4.0)
|
36
|
+
racc (1.8.1)
|
31
37
|
rainbow (3.1.1)
|
32
|
-
rake (13.1
|
33
|
-
rbs (
|
34
|
-
|
35
|
-
|
38
|
+
rake (13.2.1)
|
39
|
+
rbs (3.9.1)
|
40
|
+
logger
|
41
|
+
regexp_parser (2.10.0)
|
42
|
+
reverse_markdown (3.0.0)
|
36
43
|
nokogiri
|
37
|
-
rexml (3.
|
38
|
-
rspec-expectations (3.13.
|
44
|
+
rexml (3.4.1)
|
45
|
+
rspec-expectations (3.13.3)
|
39
46
|
diff-lcs (>= 1.2.0, < 2.0)
|
40
47
|
rspec-support (~> 3.13.0)
|
41
|
-
rspec-support (3.13.
|
42
|
-
rubocop (1.
|
48
|
+
rspec-support (3.13.2)
|
49
|
+
rubocop (1.75.1)
|
43
50
|
json (~> 2.3)
|
44
|
-
language_server-protocol (
|
51
|
+
language_server-protocol (~> 3.17.0.2)
|
52
|
+
lint_roller (~> 1.1.0)
|
45
53
|
parallel (~> 1.10)
|
46
54
|
parser (>= 3.3.0.2)
|
47
55
|
rainbow (>= 2.2.2, < 4.0)
|
48
|
-
regexp_parser (>=
|
49
|
-
|
50
|
-
rubocop-ast (>= 1.31.1, < 2.0)
|
56
|
+
regexp_parser (>= 2.9.3, < 3.0)
|
57
|
+
rubocop-ast (>= 1.43.0, < 2.0)
|
51
58
|
ruby-progressbar (~> 1.7)
|
52
|
-
unicode-display_width (>= 2.4.0, <
|
53
|
-
rubocop-ast (1.
|
54
|
-
parser (>= 3.3.
|
59
|
+
unicode-display_width (>= 2.4.0, < 4.0)
|
60
|
+
rubocop-ast (1.43.0)
|
61
|
+
parser (>= 3.3.7.2)
|
62
|
+
prism (~> 1.4)
|
55
63
|
ruby-progressbar (1.13.0)
|
56
|
-
solargraph (0.
|
64
|
+
solargraph (0.53.2)
|
57
65
|
backport (~> 1.2)
|
58
66
|
benchmark
|
59
67
|
bundler (~> 2.0)
|
60
68
|
diff-lcs (~> 1.4)
|
61
|
-
|
62
|
-
jaro_winkler (~> 1.5)
|
69
|
+
jaro_winkler (~> 1.6)
|
63
70
|
kramdown (~> 2.3)
|
64
71
|
kramdown-parser-gfm (~> 1.1)
|
72
|
+
logger (~> 1.6)
|
73
|
+
observer (~> 0.1)
|
74
|
+
ostruct (~> 0.6)
|
65
75
|
parser (~> 3.0)
|
66
|
-
rbs (~>
|
67
|
-
reverse_markdown (
|
76
|
+
rbs (~> 3.3)
|
77
|
+
reverse_markdown (>= 2.0, < 4)
|
68
78
|
rubocop (~> 1.38)
|
69
79
|
thor (~> 1.0)
|
70
80
|
tilt (~> 2.0)
|
71
81
|
yard (~> 0.9, >= 0.9.24)
|
82
|
+
yard-solargraph (~> 0.1)
|
72
83
|
srx (0.6.0)
|
73
84
|
nokogiri (~> 1.11)
|
74
|
-
thor (1.3.
|
75
|
-
tilt (2.
|
76
|
-
unicode-display_width (
|
77
|
-
|
85
|
+
thor (1.3.2)
|
86
|
+
tilt (2.6.0)
|
87
|
+
unicode-display_width (3.1.4)
|
88
|
+
unicode-emoji (~> 4.0, >= 4.0.4)
|
89
|
+
unicode-emoji (4.0.4)
|
90
|
+
yard (0.9.37)
|
91
|
+
yard-solargraph (0.1.0)
|
92
|
+
yard (~> 0.9)
|
78
93
|
|
79
94
|
PLATFORMS
|
95
|
+
arm64-darwin-24
|
80
96
|
x86_64-darwin-20
|
81
97
|
x86_64-darwin-21
|
82
98
|
x86_64-darwin-22
|
99
|
+
x86_64-darwin-23
|
83
100
|
|
84
101
|
DEPENDENCIES
|
85
102
|
byebug
|
@@ -91,4 +108,4 @@ DEPENDENCIES
|
|
91
108
|
srx-languagetool!
|
92
109
|
|
93
110
|
BUNDLED WITH
|
94
|
-
2.
|
111
|
+
2.6.6
|
data/lib/srx/segment.srx
CHANGED
@@ -1107,6 +1107,14 @@
|
|
1107
1107
|
</rule>
|
1108
1108
|
</languagerule>
|
1109
1109
|
<languagerule languagerulename="English">
|
1110
|
+
<rule break="no"><!-- https://www.seven.one/ -->
|
1111
|
+
<beforebreak>\b[Se]even\.</beforebreak>
|
1112
|
+
<afterbreak>[Oo]ne\b</afterbreak>
|
1113
|
+
</rule>
|
1114
|
+
<rule break="no">
|
1115
|
+
<beforebreak>\b[1-9]\.[\s\u00A0]</beforebreak>
|
1116
|
+
<afterbreak>[a-z]</afterbreak>
|
1117
|
+
</rule>
|
1110
1118
|
<rule break="no">
|
1111
1119
|
<beforebreak>[\u00A0\s]</beforebreak>
|
1112
1120
|
<afterbreak>\n</afterbreak>
|
@@ -1164,7 +1172,7 @@
|
|
1164
1172
|
<afterbreak>D\.?</afterbreak>
|
1165
1173
|
</rule>
|
1166
1174
|
<rule break="no">
|
1167
|
-
<beforebreak>\b([Aa]vg|[Ee]d|pp|[Vv]iz|i\.?[\s\u00A0]*e|[Vvol]|[Rr]col|maj|Lt|[Ff]ig|[Ff]igs|[Vv]iz|[Vv]ols|[Aa]pprox|[Aa]cq|[Ii]ncl?|[Ee]xcl|[Aa]cc|Pres|[Cc]orp|[Ee]x|[Cc]onn|[Dd]ept|[Ll]tda|[Mm]in|[Mm]ax|[Gg]ovt|[Rr]etd|Ing|lb|lbf|ft|c\.?[\s\u00A0]*f|vs|dia|lbs|\d+-(:?oz|kc|in|h[rp]|ml)|M?sec)\.[\s\u00A0]</beforebreak>
|
1175
|
+
<beforebreak>\b([Aa]vg|[Ee]d|pp|[Vv]iz|i\.?[\s\u00A0]*e|[Vvol]|[Rr]col|maj|Lt|[Ff]ig|[Ee]xt|[Ff]igs|[Vv]iz|[Vv]ols|[Aa]pprox|[Aa]cq|[Ii]ncl?|[Ee]xcl|[Aa]cc|Pres|[Cc]orp|[Ee]x|[Cc]onn|[Dd]ept|[Ll]tda|[Mm]in|[Mm]ax|[Gg]ovt|[Rr]etd|Ing|lb|lbf|ft|c\.?[\s\u00A0]*f|vs|dia|lbs|\d+-(:?oz|kc|in|h[rp]|ml)|M?sec)\.[\s\u00A0]</beforebreak>
|
1168
1176
|
<afterbreak>[^\p{Lu}]|I</afterbreak>
|
1169
1177
|
</rule>
|
1170
1178
|
<rule break="no">
|
@@ -4753,6 +4761,36 @@
|
|
4753
4761
|
<beforebreak>\.\[\d+\][\s\u00A0]</beforebreak>
|
4754
4762
|
<afterbreak></afterbreak>
|
4755
4763
|
</rule>
|
4764
|
+
<!-- 1. Punt primer-->
|
4765
|
+
<rule break="no">
|
4766
|
+
<beforebreak>^\d+\.[\s\u00A0]</beforebreak>
|
4767
|
+
<afterbreak>\p{L}</afterbreak>
|
4768
|
+
</rule>
|
4769
|
+
<!-- unknown abbreviations inside parentheses -->
|
4770
|
+
<rule break="no">
|
4771
|
+
<beforebreak>\([^\)]*?[\.:][\s\u00A0]</beforebreak>
|
4772
|
+
<afterbreak>[^\)\r\n]*\)</afterbreak>
|
4773
|
+
</rule>
|
4774
|
+
<rule break="no">
|
4775
|
+
<beforebreak>\([^\)]*?[\.:][\s\u00A0][^\)]*?[\.:][\s\u00A0]</beforebreak>
|
4776
|
+
<afterbreak>[^\)\r\n]*\)</afterbreak>
|
4777
|
+
</rule>
|
4778
|
+
<rule break="no">
|
4779
|
+
<beforebreak>\([^\)]*?[\.:][\s\u00A0][^\)]*?[\.:][\s\u00A0][^\)]*?[\.:][\s\u00A0]</beforebreak>
|
4780
|
+
<afterbreak>[^\)\r\n]*\)</afterbreak>
|
4781
|
+
</rule>
|
4782
|
+
<rule break="no">
|
4783
|
+
<beforebreak>\([^\)]*?[\.:][\s\u00A0][^\)]*?[\.:][\s\u00A0][^\)]*?[\.:][\s\u00A0][^\)]*?[\.:][\s\u00A0]</beforebreak>
|
4784
|
+
<afterbreak>[^\)\r\n]*\)</afterbreak>
|
4785
|
+
</rule>
|
4786
|
+
<rule break="no">
|
4787
|
+
<beforebreak>\[[^\]]*\.[\s\u00A0]</beforebreak>
|
4788
|
+
<afterbreak>[^\]\r\n]*\]</afterbreak>
|
4789
|
+
</rule>
|
4790
|
+
<rule break="no">
|
4791
|
+
<beforebreak>\{[^\}]*\.[\s\u00A0]</beforebreak>
|
4792
|
+
<afterbreak>[^\}\r\n]*\}</afterbreak>
|
4793
|
+
</rule>
|
4756
4794
|
<!-- initials: A. C. Jones. Problem: [...] d'Alfons I. Ell era [...] -->
|
4757
4795
|
<rule break="no">
|
4758
4796
|
<beforebreak>\b[A-ZÀÉÈÍÓÒÚ]\.[\s\u00A0]</beforebreak>
|
@@ -4760,7 +4798,7 @@
|
|
4760
4798
|
</rule>
|
4761
4799
|
<!-- Abbreviations that cannot finish sentences-->
|
4762
4800
|
<rule break="no">
|
4763
|
-
<beforebreak>\b(dc|(?iu)(n|Mr|C|Dr|Dra|Dra\. Ma|Sta\. Ma|E|Emm|Emma|Excm|Excma|Hble|I|Il·lm|Il·lma|Il·ltre|Im|Ima|Mgfc|Mgfca|Mn|R|Rev|Sr|Sra|Sres|Srs|St|Sta|a|abr|abs|acad|add|adj|adm|admdor|admdora|admtiu|admtiva|adv|ag|agl|agr|agron|agròn|aj|ajud|al|alim|amb|ampl|ant|ap|apmt|apnt|apr|aprox|apt|arm|arq|arqueol|arquit|assign|assoc|atm|aut|aux|av|b|batx|bda|bibl|bl|bnc|butll|bxs|c|calef|cartogr|cat|catedr|catol|cf|cia|cin|cint|circul|cit|climat|col|col·l|compt|cons|constr|cont|contr|conv|corp|corr|cpl|cpt|cró|ct|cte|ctra|cts|d|dept|derog|des|desp|dg|dip|disp|distr|div|dj|dl|doc|drec|ds|dt|dta|dte|dupl|dv|e|econ|ed|ef|entl|esc|esp|espf|esq|ex|exc|exp|exped|ext|f|fac|fca|febr|fig|figs|fra|gen|gov|gral|i|imp|impr|impt|inc|insp|inst|int|inv|j|jul|jur|jurispr|leg|llic|loc|ltda|làm|merc|mil·l|màx|mín|neg|nov|nre|núm|o|oct|op|p|pàg|pàgs|paq|par|pda|pg|pl|pobl|pol|ppda|ppt|pral|prev|prof|progr|prov|pta|ptes|ptge|pvt|pàg|quadr|quint|r|rbla|ref|reg|rev|secr|serv|sgt|sotsp|subsp|supl|supt|t|tel|telegr|tit|trad|trans|transcr|transf|trav|tripl|trv|tt|tèc|univ|urb|v|var|veg|venc|vid|vig|vocab|vs|x|àt|íd))\.[\s\u00A0]</beforebreak>
|
4801
|
+
<beforebreak>\b(dc|inst|coop|(?iu)(n|Mr|C|Dr|Dra|Dra\. Ma|Sta\. Ma|E|Emm|Emma|Excm|Excma|Hble|I|Il·lm|Il·lma|Il·ltre|Im|Ima|Mgfc|Mgfca|Mn|R|Rev|Sr|Sra|Sres|Srs|St|Sta|a|abr|abs|acad|add|adj|adm|admdor|admdora|admtiu|admtiva|adv|ag|agl|agr|agron|agròn|aj|ajud|al|alim|amb|ampl|ant|ap|apmt|apnt|apr|aprox|apt|arm|arq|arqueol|arquit|assign|assoc|atm|aut|aux|av|avda|b|batx|bda|bibl|bl|bnc|butll|bxs|c|calef|cartogr|cat|catedr|catol|cf|cia|cin|cint|circul|cit|climat|col|col·l|compt|cons|constr|cont|contr|conv|corp|corr|cpl|cpt|cró|ct|cte|ctra|cts|d|dept|derog|des|desp|dg|dip|disp|distr|div|dj|dl|doc|drec|ds|dt|dta|dte|dupl|dv|e|econ|ed|ef|entl|esc|esp|espf|esq|ex|exc|exp|exped|ext|f|fac|fca|febr|fig|figs|fra|gen|gov|gral|i|imp|impr|impt|inc|insp|inst|int|inv|j|jul|jur|jurispr|leg|llic|loc|ltda|làm|merc|mil·l|màx|mín|neg|nov|nre|núm|o|oct|op|p|pàg|pàgs|paq|par|pda|pg|pl|pobl|pol|ppda|ppt|pral|prev|prof|progr|prov|pta|ptes|ptge|pvt|pàg|quadr|quint|r|rbla|ref|reg|rev|secr|serv|sgt|sotsp|subsp|supl|supt|t|tel|telegr|tit|trad|trans|transcr|transf|trav|tripl|trv|tt|tèc|univ|urb|v|var|veg|venc|vid|vig|vocab|vs|x|àt|íd|subg))\.[\s\u00A0]</beforebreak>
|
4764
4802
|
<afterbreak></afterbreak>
|
4765
4803
|
</rule>
|
4766
4804
|
<!-- Abbreviations that can finish sentences -->
|
@@ -4800,7 +4838,7 @@
|
|
4800
4838
|
</rule>
|
4801
4839
|
<!-- max min etc -->
|
4802
4840
|
<rule break="no">
|
4803
|
-
<beforebreak>\b([Ee]tc|m[aáà]x|m[ií]n|aprox|\d+o)\.[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
|
4841
|
+
<beforebreak>\b([Ee]tc|m[aáà]x|m[ií]n|aprox|long|\d+o)\.[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
|
4804
4842
|
<afterbreak>\p{Ll}</afterbreak>
|
4805
4843
|
</rule>
|
4806
4844
|
<!-- Composed abbrev. -->
|
@@ -4815,12 +4853,12 @@
|
|
4815
4853
|
</rule>
|
4816
4854
|
<!-- Ellipsis: ... lowercase -->
|
4817
4855
|
<rule break="no">
|
4818
|
-
<beforebreak>[^\s\u00A0](
|
4856
|
+
<beforebreak>[^\s\u00A0](\.\.\.|…)[\s\u00A0]</beforebreak>
|
4819
4857
|
<afterbreak>\p{Ll}</afterbreak>
|
4820
4858
|
</rule>
|
4821
4859
|
<!-- (enum...) -->
|
4822
4860
|
<rule break="no">
|
4823
|
-
<beforebreak>\b(
|
4861
|
+
<beforebreak>\b(\.\.\.|…)[\p{Pe}»"’”][\s\u00A0]</beforebreak>
|
4824
4862
|
<afterbreak>\p{Ll}</afterbreak>
|
4825
4863
|
</rule>
|
4826
4864
|
<!-- pero ¡ah! no estaba
|
@@ -4844,6 +4882,36 @@
|
|
4844
4882
|
</rule>
|
4845
4883
|
</languagerule>
|
4846
4884
|
<languagerule languagerulename="Spanish">
|
4885
|
+
<!-- 1. Punto primero-->
|
4886
|
+
<rule break="no">
|
4887
|
+
<beforebreak>^\d+\.[\s\u00A0]</beforebreak>
|
4888
|
+
<afterbreak>\p{L}</afterbreak>
|
4889
|
+
</rule>
|
4890
|
+
<!-- unknown abbreviations inside parentheses -->
|
4891
|
+
<rule break="no">
|
4892
|
+
<beforebreak>\([^\)]*?[\.:][\s\u00A0]</beforebreak>
|
4893
|
+
<afterbreak>[^\)\r\n]*\)</afterbreak>
|
4894
|
+
</rule>
|
4895
|
+
<rule break="no">
|
4896
|
+
<beforebreak>\([^\)]*?[\.:][\s\u00A0][^\)]*?[\.:][\s\u00A0]</beforebreak>
|
4897
|
+
<afterbreak>[^\)\r\n]*\)</afterbreak>
|
4898
|
+
</rule>
|
4899
|
+
<rule break="no">
|
4900
|
+
<beforebreak>\([^\)]*?[\.:][\s\u00A0][^\)]*?[\.:][\s\u00A0][^\)]*?[\.:][\s\u00A0]</beforebreak>
|
4901
|
+
<afterbreak>[^\)\r\n]*\)</afterbreak>
|
4902
|
+
</rule>
|
4903
|
+
<rule break="no">
|
4904
|
+
<beforebreak>\([^\)]*?[\.:][\s\u00A0][^\)]*?[\.:][\s\u00A0][^\)]*?[\.:][\s\u00A0][^\)]*?[\.:][\s\u00A0]</beforebreak>
|
4905
|
+
<afterbreak>[^\)\r\n]*\)</afterbreak>
|
4906
|
+
</rule>
|
4907
|
+
<rule break="no">
|
4908
|
+
<beforebreak>\[[^\]]*\.[\s\u00A0]</beforebreak>
|
4909
|
+
<afterbreak>[^\]\r\n]*\]</afterbreak>
|
4910
|
+
</rule>
|
4911
|
+
<rule break="no">
|
4912
|
+
<beforebreak>\{[^\}]*\.[\s\u00A0]</beforebreak>
|
4913
|
+
<afterbreak>[^\}\r\n]*\}</afterbreak>
|
4914
|
+
</rule>
|
4847
4915
|
<rule break="no">
|
4848
4916
|
<beforebreak>¿[^?]+:[\s\u00A0]</beforebreak>
|
4849
4917
|
<afterbreak>.</afterbreak>
|
@@ -4867,12 +4935,12 @@
|
|
4867
4935
|
</rule>
|
4868
4936
|
<!-- Ellipsis: ... lowercase -->
|
4869
4937
|
<rule break="no">
|
4870
|
-
<beforebreak>[^\s\u00A0](
|
4938
|
+
<beforebreak>[^\s\u00A0](\.\.\.|…)[\s\u00A0]</beforebreak>
|
4871
4939
|
<afterbreak>\p{Ll}</afterbreak>
|
4872
4940
|
</rule>
|
4873
4941
|
<!-- (enum...) -->
|
4874
4942
|
<rule break="no">
|
4875
|
-
<beforebreak>\b(
|
4943
|
+
<beforebreak>\b(\.\.\.|…)[\p{Pe}»"’”][\s\u00A0]</beforebreak>
|
4876
4944
|
<afterbreak>\p{Ll}</afterbreak>
|
4877
4945
|
</rule>
|
4878
4946
|
<!-- Abbreviations that can finish sentences -->
|
@@ -4906,7 +4974,7 @@
|
|
4906
4974
|
<afterbreak></afterbreak>
|
4907
4975
|
</rule>
|
4908
4976
|
<rule break="no">
|
4909
|
-
<beforebreak>\b(dc|(?iu)(n|[Aa]yto|Mr|C|Dr|Dra|E|Emm|Emma|Excm|Excma|Hble|I|Il·lm|Il·lma|Il·ltre|Im|Ima|Mgfc|Mgfca|Mn|R|Rev|Sr|Sra|Sres|Sras|Srs|St|Sta|a|abr|abs|acad|add|adj|adm|admdor|admdora|admtiu|admtiva|adv|ag|agl|agr|agron|agròn|aj|ajud|al|alim|amb|ampl|ant|ap|apmt|apnt|apr|aprox|apt|arm|arq|arqueol|arquit|assign|assoc|atm|aut|aux|av|b|batx|bda|bibl|bl|bnc|butll|bxs|c|calef|cartogr|cat|catedr|catol|cf|cia|cin|cint|circul|cit|climat|col|col·l|compt|cons|constr|cont|contr|conv|corp|corr|cpl|cpt|cró|ct|cte|ctra|cts|d|dept|derog|des|desp|dg|dip|disp|distr|div|dj|dl|doc|drec|ds|dt|dta|dte|dupl|dv|e|econ|ed|ef|entl|esc|esp|espf|esq|ex|exc|exp|exped|ext|f|fac|fca|febr|fig|figs|fra|gen|gov|gral|i|imp|impr|impt|inc|insp|inst|int|inv|j|jul|jur|jurispr|leg|llic|loc|ltda|làm|merc|mil·l|màx|mín|neg|nov|nre|núm|o|oct|op|p|pàg|pàgs|paq|par|pda|pg|pl|pobl|pol|ppda|ppt|pral|prev|prof|progr|prov|pta|ptes|ptge|pvt|pàg|quadr|quint|r|rbla|ref|reg|rev|secr|serv|sgt|sotsp|subsp|supl|supt|t|tel|telegr|tit|trad|trans|transcr|transf|trav|tripl|trv|tt|tèc|univ|urb|v|var|veg|venc|vid|vig|vocab|vs|x|àt|íd))\.[\s\u00A0]</beforebreak>
|
4977
|
+
<beforebreak>\b(dc|coop|(?iu)(n|[Aa]yto|Mr|C|Dr|Dra|E|Emm|Emma|Excm|Exc|Excma|Hble|I|Il·lm|Il·lma|Il·ltre|Ilm|Ilma|Iltre|Im|Ima|Mgfc|Mgfca|Mn|R|Rev|Sr|Sra|Sres|Sras|Srs|St|Sta|a|abr|abs|acad|add|adj|adm|admdor|admdora|admtiu|admtiva|adv|ag|agl|agr|agron|agròn|aj|ajud|al|alim|amb|ampl|ant|ap|apmt|apnt|apr|aprox|apt|arm|arq|arqueol|arquit|assign|assoc|atm|aut|aux|av|avda|b|batx|bda|bibl|bl|bnc|butll|bxs|c|calef|cartogr|cat|catedr|catol|cf|cia|cin|cint|circul|cit|climat|col|col·l|compt|cons|constr|cont|contr|conv|corp|corr|cpl|cpt|cró|ct|cte|ctra|cts|d|dept|derog|des|desp|dg|dip|disp|distr|div|dj|dl|doc|drec|ds|dt|dta|dte|dupl|dv|e|econ|ed|ef|entl|esc|esp|espf|esq|ex|exc|exp|exped|ext|f|fac|fca|febr|fig|figs|fra|gen|gov|gral|i|imp|impr|impt|inc|insp|inst|int|inv|j|jul|jur|jurispr|leg|llic|loc|ltda|làm|merc|mil·l|màx|mín|neg|nov|nre|núm|o|oct|op|p|pàg|pàgs|paq|par|pda|pg|pl|pobl|pol|ppda|ppt|pral|prev|prof|progr|prov|pta|ptes|ptge|pvt|pàg|quadr|quint|r|rbla|ref|reg|rev|secr|serv|sgt|sotsp|subsp|supl|supt|t|tel|telegr|tit|trad|trans|transcr|transf|trav|tripl|trv|tt|tèc|univ|urb|v|var|veg|venc|vid|vig|vocab|vs|x|àt|íd|subg))\.[\s\u00A0]</beforebreak>
|
4910
4978
|
<afterbreak></afterbreak>
|
4911
4979
|
</rule>
|
4912
4980
|
<rule break="no">
|
@@ -4950,7 +5018,7 @@
|
|
4950
5018
|
</rule>
|
4951
5019
|
<!-- max min etc -->
|
4952
5020
|
<rule break="no">
|
4953
|
-
<beforebreak>\b([Ee]tc|m[aá]x|m[ií]n|aprox|\d+o)\.[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
|
5021
|
+
<beforebreak>\b([Ee]tc|m[aá]x|m[ií]n|aprox|long|\d+o)\.[\p{Pe}\p{Pf}\p{Pd}"']*[\s\u00A0]</beforebreak>
|
4954
5022
|
<afterbreak>\p{Ll}</afterbreak>
|
4955
5023
|
</rule>
|
4956
5024
|
<!-- Composed abbrev. -->
|
@@ -4982,6 +5050,10 @@
|
|
4982
5050
|
<beforebreak>\b(https?|ftp|file|chrome|chromium|android|(chrome|moz)\-extension):///?[A-Za-z0-9\-]+\.</beforebreak>
|
4983
5051
|
<afterbreak>[A-Za-z0-9\-]+(\.|\b)</afterbreak>
|
4984
5052
|
</rule>
|
5053
|
+
<rule break="no"><!-- https://www.seven.one/ -->
|
5054
|
+
<beforebreak>\b[Se]even\.</beforebreak>
|
5055
|
+
<afterbreak>[Oo]nes?\b</afterbreak>
|
5056
|
+
</rule>
|
4985
5057
|
<rule break="no">
|
4986
5058
|
<beforebreak>\b[A-Za-z0-9\-]+\.</beforebreak>
|
4987
5059
|
<afterbreak>[A-Za-z0-9\-]+\.(com|net|org|info|de|es|edu|co|eu|nl|io|cn|uk|gov|biz|ca|tk|ru|br|jp|pl)(\.|\b)</afterbreak>
|
@@ -5121,7 +5193,7 @@
|
|
5121
5193
|
</rule>
|
5122
5194
|
<!-- English abbreviations - but these work globally for all languages -->
|
5123
5195
|
<rule break="no">
|
5124
|
-
<beforebreak>\b(Mrs?|No|pp|St|no|Sr|Jr|[Ss]ek|Bros|
|
5196
|
+
<beforebreak>\b(Mrs?|No|pp|St|no|Sr|Jr|[Ss]ek|Bros|[Bb]tw|vs|esp|[Ff]ig|Jan|Feb|Mar|Apr|Ju[nl]|Aug|Sept?|O[ck]t|Nov|Dec|PhD|BSc|BEng|BComp|BArch|al|cf|Inc|Ms|MEng|MSc|MComp|Gen|Sen|Prof|Corp|Co|co|Ltd|Buchst)\.[\u00A0\s]{1,2}</beforebreak>
|
5125
5197
|
<afterbreak></afterbreak>
|
5126
5198
|
</rule>
|
5127
5199
|
<!-- Latin abbreviations - but these work globally for all languages -->
|
@@ -5131,10 +5203,19 @@
|
|
5131
5203
|
</rule>
|
5132
5204
|
<!-- German abbreviations -->
|
5133
5205
|
<rule break="no">
|
5134
|
-
<beforebreak>\b(betr|Geb|Stk|ggü|Mag|mtl|Flgh?|[Pp]arl|Bsp|versch|[Dd]iesbzgl|[Zz]ykl|[Dd]bzgl[Ss]tellv|d|Übers|
|
5206
|
+
<beforebreak>\b(betr|Geb|Stk|ggü|Mag|mtl|Flgh?|[Pp]arl|Bsp|versch|[Dd]iesbzgl|[Zz]ykl|[Dd]bzgl[Ss]tellv|d|Übers|[Bb]zw|Ab[hkst]|[Ee]ig|[Aa]bzü?gl|\d+-tlg|tlg|[Gg]gfls|[Ff]achspr|[Ll]tda|[Ee]inschl|[Vv]mtl|[Ss]tellv|Ev|[Bb]ezgl|lit|Abzw|[Vv]sl|ahd|Akk|aktual|[Öö]ffentl|prof|allg|alltagsspr|altdt|alttest|amerikan|Anh|Ank|Anm|Art|[Aa]utom|Auftragsnr|Az|Bat|bayr|Bde?|bearb|Bed|Bem|bes|bez|wsl|vsl|Bez|Bhf|Blvd|[Bb]spw|btto|bw|Dtl|[Gg]esetzl|Dez|[Jj]gdfr|[Ee]ff)\.[\u00A0\s]{1,2}</beforebreak>
|
5135
5207
|
<afterbreak></afterbreak>
|
5136
5208
|
</rule>
|
5137
5209
|
<rule break="no">
|
5210
|
+
<beforebreak>\b([Uu]sw|[Ee]tc)\.[\u00A0\s]{1,2}</beforebreak>
|
5211
|
+
<afterbreak>\p{Ll}</afterbreak>
|
5212
|
+
</rule>
|
5213
|
+
<rule break="yes">
|
5214
|
+
<!-- Why is this needed? -->
|
5215
|
+
<beforebreak>\b([Ee]tc)\.[\u00A0\s]{1,2}</beforebreak>
|
5216
|
+
<afterbreak>\p{Lu}</afterbreak>
|
5217
|
+
</rule>
|
5218
|
+
<rule break="no">
|
5138
5219
|
<beforebreak>\b(cts?|[Cc]a|chem|chin|Chr|cresc|[Dd]at|desgl|ders|dgl|Dipl|Dir?|Doz?|durchg|durchges|Dr|[Dd]t|ebd|Ed|[Ee]igt?l|akt|[Ee]ngl|Erg|al|et[cw]|Etw|ev|[Ee]vtl?|[Ee]xkl|Expl|Exz)\.[\u00A0\s]{1,2}</beforebreak>
|
5139
5220
|
<afterbreak></afterbreak>
|
5140
5221
|
</rule>
|
@@ -5672,7 +5753,7 @@
|
|
5672
5753
|
</rule>
|
5673
5754
|
<!-- Наші в... Лос-Анджелесі -->
|
5674
5755
|
<rule break="no">
|
5675
|
-
<beforebreak
|
5756
|
+
<beforebreak>(?U)\b(в|у|на|за|з|із|зі|зо)(\.\.\.|…)[\h\v]*</beforebreak>
|
5676
5757
|
<afterbreak>\p{Lu}</afterbreak>
|
5677
5758
|
</rule>
|
5678
5759
|
<rule break="no">
|
@@ -5685,12 +5766,12 @@
|
|
5685
5766
|
</rule>
|
5686
5767
|
<!-- Digit as a point number: 1. перший пункт -->
|
5687
5768
|
<rule break="no">
|
5688
|
-
<beforebreak
|
5769
|
+
<beforebreak>(?U)\b\d{1,3}\.[\h]+</beforebreak>
|
5689
5770
|
<afterbreak>\p{Ll}|\p{Lu}{2,}</afterbreak>
|
5690
5771
|
</rule>
|
5691
5772
|
<!-- various punctuation between lowercase letters -->
|
5692
5773
|
<rule break="no">
|
5693
|
-
<beforebreak
|
5774
|
+
<beforebreak>(?U)\b\p{Ll}+[.!?][\h\v]*</beforebreak>
|
5694
5775
|
<afterbreak>\h*(([\(«]|[\[‐-―-][\h\v]*)?\p{Ll})</afterbreak>
|
5695
5776
|
</rule>
|
5696
5777
|
<rule break="no">
|
@@ -5699,17 +5780,17 @@
|
|
5699
5780
|
</rule>
|
5700
5781
|
<!-- lowercase letter abbreviations together: н.е., кв.м. -->
|
5701
5782
|
<rule break="no">
|
5702
|
-
<beforebreak
|
5783
|
+
<beforebreak>(?U)\b\p{L}{1,2}\.</beforebreak>
|
5703
5784
|
<afterbreak>\p{L}{1,2}\.</afterbreak>
|
5704
5785
|
</rule>
|
5705
5786
|
<!-- latin capital char abbreviations A. B. C. -->
|
5706
5787
|
<rule break="no">
|
5707
|
-
<beforebreak
|
5788
|
+
<beforebreak>(?U)\b[\u00A0\u202F]?[A-Z]\.[\h\v]?</beforebreak>
|
5708
5789
|
<afterbreak>[A-Z][a-zA-Z'’.-]|[А-ЯІЇЄҐ]\.</afterbreak>
|
5709
5790
|
</rule>
|
5710
5791
|
<!-- capital char abbreviations А. Б. В. -->
|
5711
5792
|
<rule break="no">
|
5712
|
-
<beforebreak>(^[\h\v]*|\([\h\v]*|[«„"]|(\b[А-ЯІЇЄҐACEIHOPX]\.-))[А-ЯІЇЄҐA-Z]\.[\h\v]*</beforebreak>
|
5793
|
+
<beforebreak>(?U)(^[\h\v]*|\([\h\v]*|[«„"]|(\b[А-ЯІЇЄҐACEIHOPX]\.-))[А-ЯІЇЄҐA-Z]\.[\h\v]*</beforebreak>
|
5713
5794
|
<afterbreak></afterbreak>
|
5714
5795
|
</rule>
|
5715
5796
|
<!-- Іван Ч. (1914 р. н.) -->
|
@@ -5724,7 +5805,7 @@
|
|
5724
5805
|
</rule>
|
5725
5806
|
<!-- Ів. Франко (але Ів Бутільє) -->
|
5726
5807
|
<rule break="no">
|
5727
|
-
<beforebreak>(^|[\h\v])(
|
5808
|
+
<beforebreak>(^|[\h\v])(Ів|Дж|Ол)\.[\h\v]+</beforebreak>
|
5728
5809
|
<afterbreak>[А-ЯІЇЄҐA-Z]</afterbreak>
|
5729
5810
|
</rule>
|
5730
5811
|
<!-- Year: 2000 р.:
|
@@ -5733,12 +5814,12 @@
|
|
5733
5814
|
а до лютого 2020 р. — затвердити
|
5734
5815
|
-->
|
5735
5816
|
<rule break="no">
|
5736
|
-
<beforebreak
|
5817
|
+
<beforebreak>(?U)\b([0-9]{2}|[0-9]{4})[\h\v]+р\.[\h\v]+</beforebreak>
|
5737
5818
|
<afterbreak>[\h\v]*[№0-9‐-―-]</afterbreak>
|
5738
5819
|
</rule>
|
5739
5820
|
<!-- річка - р. Дніпро -->
|
5740
5821
|
<rule break="no">
|
5741
|
-
<beforebreak>(?<!\d[\h]*)\bр\.[\h\v]*</beforebreak>
|
5822
|
+
<beforebreak>(?U)(?<!\d[\h]*)\bр\.[\h\v]*</beforebreak>
|
5742
5823
|
<afterbreak>[\h]*(?!(На|Але|Так?)[\h\v]+)[А-ЯІЇЄҐA-Z][^\h]</afterbreak>
|
5743
5824
|
</rule>
|
5744
5825
|
<!-- У травні 1949 р. Грушківський район -->
|
@@ -5753,29 +5834,29 @@
|
|
5753
5834
|
</rule>
|
5754
5835
|
<!-- Years: рр. -->
|
5755
5836
|
<rule break="no">
|
5756
|
-
<beforebreak
|
5837
|
+
<beforebreak>(?U)\b([0-9]0|[0-9]{3}0)(-[мх])?рр\.[\h\v]*</beforebreak>
|
5757
5838
|
<afterbreak></afterbreak>
|
5758
5839
|
</rule>
|
5759
5840
|
<!-- млн./млрд./грн. — frequent mistake -->
|
5760
5841
|
<rule break="no">
|
5761
|
-
<beforebreak
|
5842
|
+
<beforebreak>(?U)\b(тис|млн|млрд|грн)\.[\h\v]*</beforebreak>
|
5762
5843
|
<afterbreak>[\h\v]*(\d|[КМ]Вт)</afterbreak>
|
5763
5844
|
</rule>
|
5764
5845
|
<!-- усталені скорочення, що не збігаються з нескороченими словами -->
|
5765
5846
|
<rule break="no">
|
5766
|
-
<beforebreak
|
5847
|
+
<beforebreak>(?U)\b(укр|рос|англ?|амер|італ|ісп|нім|фр(анц)?|лат|грец(ьк)?)\.[\h\v]*</beforebreak>
|
5767
5848
|
<afterbreak></afterbreak>
|
5768
5849
|
</rule>
|
5769
5850
|
<rule break="no">
|
5770
|
-
<beforebreak
|
5851
|
+
<beforebreak>(?U)\b(абз|арк|ауд|бл|буд|бульв|вул|держ|дод|зав|зб|зв|зовн|екон|іл|к|кв|канд|кн|напр|нпр|нац|обл|оп|пл|пол|поч|пп|пор|просп|розд|стор|табл|[Тт]]ел|ч|част)\.[\h\v]*</beforebreak>
|
5771
5852
|
<afterbreak></afterbreak>
|
5772
5853
|
</rule>
|
5773
5854
|
<rule break="no">
|
5774
|
-
<beforebreak
|
5855
|
+
<beforebreak>(?U)\b(кін)\.[\h\v]*</beforebreak>
|
5775
5856
|
<afterbreak>[а-яіїєґ0-9IXV]|[ІХ]+\b</afterbreak>
|
5776
5857
|
</rule>
|
5777
5858
|
<rule break="no">
|
5778
|
-
<beforebreak
|
5859
|
+
<beforebreak>(?U)\b[сС]т\.[\h\v]</beforebreak>
|
5779
5860
|
<afterbreak>[\h]*(?!([АВУОІЄ]|На|Але|Так?)[\h\v])</afterbreak>
|
5780
5861
|
</rule>
|
5781
5862
|
<!-- нар. 1945 р. | (1966 р. нар.) | 1975 — нар. Осипчук -->
|
@@ -5784,21 +5865,21 @@
|
|
5784
5865
|
<afterbreak></afterbreak>
|
5785
5866
|
</rule>
|
5786
5867
|
<rule break="no">
|
5787
|
-
<beforebreak
|
5868
|
+
<beforebreak>(?U)\bнар\.[\h\v]*</beforebreak>
|
5788
5869
|
<afterbreak>([0-9]|бл\.|арт\.)</afterbreak>
|
5789
5870
|
</rule>
|
5790
5871
|
<rule break="no">
|
5791
|
-
<beforebreak
|
5872
|
+
<beforebreak>(?U)\bдол\.[\h\v]*</beforebreak>
|
5792
5873
|
<afterbreak>США</afterbreak>
|
5793
5874
|
</rule>
|
5794
5875
|
<!-- п. 10 від 11.10.1933, д. Василь -->
|
5795
5876
|
<rule break="no">
|
5796
|
-
<beforebreak>(?<!т\.[\h\v]?)\b[пд]\.[\h\v]*</beforebreak>
|
5877
|
+
<beforebreak>(?U)(?<!т\.[\h\v]?)\b[пд]\.[\h\v]*</beforebreak>
|
5797
5878
|
<afterbreak></afterbreak>
|
5798
5879
|
</rule>
|
5799
5880
|
<!-- усталені скорочення, що збігаються з нескороченими словами -->
|
5800
5881
|
<rule break="no">
|
5801
|
-
<beforebreak
|
5882
|
+
<beforebreak>(?U)\b(див)\.[\h\v]</beforebreak>
|
5802
5883
|
<afterbreak>[\h\v]*[^А-ЯІЇЄҐ]</afterbreak>
|
5803
5884
|
</rule>
|
5804
5885
|
<!-- Верховний орган, див. Африканський національний конгрес -->
|
@@ -5810,20 +5891,20 @@
|
|
5810
5891
|
України (див. Зимові походи)
|
5811
5892
|
-->
|
5812
5893
|
<rule break="no">
|
5813
|
-
<beforebreak>(\([^)]*|\[[^\]]*|,[\h\v]*)\b(див)\.[\h\v]*</beforebreak>
|
5894
|
+
<beforebreak>(?U)(\([^)]*|\[[^\]]*|,[\h\v]*)\b(див)\.[\h\v]*</beforebreak>
|
5814
5895
|
<afterbreak></afterbreak>
|
5815
5896
|
</rule>
|
5816
5897
|
<!-- abbreviation with proper noun: проф. Грицько, о. Лісове -->
|
5817
5898
|
<rule break="no">
|
5818
|
-
<beforebreak
|
5899
|
+
<beforebreak>(?U)\b(ап|[Аа]кад|[Пп]роф|[Дд]оц|[Аа]сист|[Рр]еж|[Аа]рх|[Сс]вв?|о|оз|ім|інж|дир|тов|упоряд|тт|чл\.-кор|[Пп]реп|[сС]вт)\.[\h\v]*</beforebreak>
|
5819
5900
|
<afterbreak>[\h\v]*[А-ЯІЇЄҐA-Z]</afterbreak>
|
5820
5901
|
</rule>
|
5821
5902
|
<rule break="no">
|
5822
|
-
<beforebreak>(?<![іи]\s+)\bдр\.[\h\v]*</beforebreak>
|
5903
|
+
<beforebreak>(?U)(?<![іи]\s+)\bдр\.[\h\v]*</beforebreak>
|
5823
5904
|
<afterbreak>[\h\v]*[А-ЯІЇЄҐ]</afterbreak>
|
5824
5905
|
</rule>
|
5825
5906
|
<rule break="no">
|
5826
|
-
<beforebreak
|
5907
|
+
<beforebreak>(?U)\bМан\.[\h\v]*</beforebreak>
|
5827
5908
|
<afterbreak>[\h\v]*([Сс]іті|[Юю]н)</afterbreak>
|
5828
5909
|
</rule>
|
5829
5910
|
<!-- смерть гр. Болтаровича, but not "9 гр." -->
|
@@ -5833,8 +5914,8 @@
|
|
5833
5914
|
</rule>
|
5834
5915
|
<!-- TODO: арт. - артист -->
|
5835
5916
|
<rule break="no">
|
5836
|
-
<beforebreak
|
5837
|
-
<afterbreak>[\h\v]*[0-9]</afterbreak>
|
5917
|
+
<beforebreak>(?U)\b([Аа]рт|[Мм]ал|[Рр]ис|[Сс]пр)\.[\h\v]*</beforebreak>
|
5918
|
+
<afterbreak>[\h\v]*(№[\h\v]*)?[0-9]</afterbreak>
|
5838
5919
|
</rule>
|
5839
5920
|
<!-- ХІІ р., 3-6 арт., 2-3 тт. -->
|
5840
5921
|
<rule break="no">
|
@@ -5843,7 +5924,7 @@
|
|
5843
5924
|
</rule>
|
5844
5925
|
<!-- але розбиваємо «всього 20 м. Почалося» -->
|
5845
5926
|
<rule break="no">
|
5846
|
-
<beforebreak>(?<!\d[\h\v]*)\bм\.[\h\v]*</beforebreak>
|
5927
|
+
<beforebreak>(?U)(?<!\d[\h\v]*)\bм\.[\h\v]*</beforebreak>
|
5847
5928
|
<afterbreak>[А-ЯІЇЄҐ][а-яіїєґ']</afterbreak>
|
5848
5929
|
</rule>
|
5849
5930
|
<!-- село/сторінка/місто, але щоб не збігалося з секундами/метрами -->
|
@@ -5867,7 +5948,7 @@
|
|
5867
5948
|
<afterbreak>[\h\v]*[‐-―-][\h\v]*([Рр]ед|[Аа]вт)\.[\h\v]*[\)\]]</afterbreak>
|
5868
5949
|
</rule>
|
5869
5950
|
<rule break="no">
|
5870
|
-
<beforebreak
|
5951
|
+
<beforebreak>(?U)\b([Рр]ед)\.[\h\v]*</beforebreak>
|
5871
5952
|
<afterbreak>[А-ЯІЇЄҐ]</afterbreak>
|
5872
5953
|
</rule>
|
5873
5954
|
<!-- Цензор.НЕТ -->
|
@@ -5895,6 +5976,7 @@
|
|
5895
5976
|
<afterbreak>([‐-―-][\h\v]*)?\p{Lu}[^\p{Lu}]</afterbreak>
|
5896
5977
|
</rule>
|
5897
5978
|
</languagerule>
|
5979
|
+
|
5898
5980
|
<languagerule languagerulename="Belarusian">
|
5899
5981
|
<rule break="no">
|
5900
5982
|
<beforebreak>\b\d+\.\s</beforebreak>
|
@@ -6359,7 +6441,7 @@
|
|
6359
6441
|
</rule>
|
6360
6442
|
<!-- Not break for ellipses (...) -->
|
6361
6443
|
<rule break="no">
|
6362
|
-
<beforebreak>[^\s](
|
6444
|
+
<beforebreak>[^\s](\.\.\.|…)\s</beforebreak>
|
6363
6445
|
<afterbreak>\p{Ll}</afterbreak>
|
6364
6446
|
</rule>
|
6365
6447
|
<!-- z.B. "bla (...) blubb" -> without ending sentence -->
|
@@ -6633,7 +6715,7 @@
|
|
6633
6715
|
|
6634
6716
|
Не раздвајати после наводника осим ако нису праћени
|
6635
6717
|
великим словом. На пример:
|
6636
|
-
|
6718
|
+
"Тако је!", рече он.-->
|
6637
6719
|
<rule break="no">
|
6638
6720
|
<beforebreak>["'“],\s</beforebreak>
|
6639
6721
|
<afterbreak>\p{Ll}</afterbreak>
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: srx-languagetool
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.15.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Aaron Madlon-Kay
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2025-03-28 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: srx
|
@@ -24,7 +24,7 @@ dependencies:
|
|
24
24
|
- - "<"
|
25
25
|
- !ruby/object:Gem::Version
|
26
26
|
version: '1.0'
|
27
|
-
description:
|
27
|
+
description:
|
28
28
|
email:
|
29
29
|
- aaron@madlon-kay.com
|
30
30
|
executables: []
|
@@ -58,7 +58,7 @@ metadata:
|
|
58
58
|
source_code_uri: https://github.com/amake/srx-languagetool-ruby.git
|
59
59
|
changelog_uri: https://github.com/amake/srx-languagetool-ruby/blob/master/CHANGELOG.md
|
60
60
|
rubygems_mfa_required: 'true'
|
61
|
-
post_install_message:
|
61
|
+
post_install_message:
|
62
62
|
rdoc_options: []
|
63
63
|
require_paths:
|
64
64
|
- lib
|
@@ -73,8 +73,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
73
73
|
- !ruby/object:Gem::Version
|
74
74
|
version: '0'
|
75
75
|
requirements: []
|
76
|
-
rubygems_version: 3.
|
77
|
-
signing_key:
|
76
|
+
rubygems_version: 3.5.22
|
77
|
+
signing_key:
|
78
78
|
specification_version: 4
|
79
79
|
summary: SRX segmentation rules from LanguageTool
|
80
80
|
test_files: []
|