srx-languagetool 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/main.yml +2 -2
- data/.gitignore +1 -0
- data/.ruby-version +1 -0
- data/CHANGELOG.md +4 -0
- data/Gemfile.lock +11 -11
- data/README.md +1 -1
- data/lib/srx/languagetool/version.rb +1 -1
- data/lib/srx/segment.srx +16 -8
- metadata +4 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d929b515d7e87e260c01373e1f5d6c35bf35fa05f8fe29fea39ba18d0078e120
|
|
4
|
+
data.tar.gz: ee821f5cfb3b4fea3d347c3ef3583459da60d044c458aa5ad907b378d5eef963
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 2733d28e0ed99d6b5fd0c15b30311aed10059c34c8c411de051901ca545ed6a4eb299e0695190817b45d6691f74b3d78215d6a5ce68c589b0776e25ca3c3dada
|
|
7
|
+
data.tar.gz: 4344f1b2dc867dcbffb8db8b2db95da458de718f0481814e8dd23965c2b3831199702333637d56017566388de5da0d745995276e0376c9e16e63dc9f15cd8d10
|
data/.github/workflows/main.yml
CHANGED
|
@@ -10,10 +10,10 @@ jobs:
|
|
|
10
10
|
- name: Set up Ruby
|
|
11
11
|
uses: ruby/setup-ruby@v1
|
|
12
12
|
with:
|
|
13
|
-
ruby-version: 2.7.
|
|
13
|
+
ruby-version: 2.7.3
|
|
14
14
|
- name: Install
|
|
15
15
|
run: |
|
|
16
|
-
gem install bundler -v 2.2.
|
|
16
|
+
gem install bundler -v 2.2.16
|
|
17
17
|
bundle install
|
|
18
18
|
- name: Type check
|
|
19
19
|
run: bundle exec solargraph typecheck --level typed
|
data/.gitignore
CHANGED
data/.ruby-version
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
2.7.3
|
data/CHANGELOG.md
CHANGED
data/Gemfile.lock
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
srx-languagetool (0.
|
|
4
|
+
srx-languagetool (0.2.0)
|
|
5
5
|
srx (< 1.0)
|
|
6
6
|
|
|
7
7
|
GEM
|
|
@@ -14,28 +14,28 @@ GEM
|
|
|
14
14
|
diff-lcs (1.4.4)
|
|
15
15
|
e2mmap (0.1.0)
|
|
16
16
|
jaro_winkler (1.5.4)
|
|
17
|
-
kramdown (2.3.
|
|
17
|
+
kramdown (2.3.1)
|
|
18
18
|
rexml
|
|
19
19
|
kramdown-parser-gfm (1.1.0)
|
|
20
20
|
kramdown (~> 2.0)
|
|
21
|
-
minitest (5.14.
|
|
22
|
-
nokogiri (1.11.
|
|
21
|
+
minitest (5.14.4)
|
|
22
|
+
nokogiri (1.11.3-x86_64-darwin)
|
|
23
23
|
racc (~> 1.4)
|
|
24
24
|
parallel (1.20.1)
|
|
25
|
-
parser (3.0.
|
|
25
|
+
parser (3.0.1.0)
|
|
26
26
|
ast (~> 2.4.1)
|
|
27
27
|
racc (1.5.2)
|
|
28
28
|
rainbow (3.0.0)
|
|
29
29
|
rake (13.0.3)
|
|
30
|
-
regexp_parser (2.
|
|
30
|
+
regexp_parser (2.1.1)
|
|
31
31
|
reverse_markdown (2.0.0)
|
|
32
32
|
nokogiri
|
|
33
|
-
rexml (3.2.
|
|
33
|
+
rexml (3.2.5)
|
|
34
34
|
rspec-expectations (3.10.1)
|
|
35
35
|
diff-lcs (>= 1.2.0, < 2.0)
|
|
36
36
|
rspec-support (~> 3.10.0)
|
|
37
37
|
rspec-support (3.10.2)
|
|
38
|
-
rubocop (1.
|
|
38
|
+
rubocop (1.12.1)
|
|
39
39
|
parallel (~> 1.10)
|
|
40
40
|
parser (>= 3.0.0.0)
|
|
41
41
|
rainbow (>= 2.2.2, < 4.0)
|
|
@@ -47,7 +47,7 @@ GEM
|
|
|
47
47
|
rubocop-ast (1.4.1)
|
|
48
48
|
parser (>= 2.7.1.5)
|
|
49
49
|
ruby-progressbar (1.11.0)
|
|
50
|
-
solargraph (0.40.
|
|
50
|
+
solargraph (0.40.4)
|
|
51
51
|
backport (~> 1.1)
|
|
52
52
|
benchmark
|
|
53
53
|
bundler (>= 1.17.2)
|
|
@@ -61,7 +61,7 @@ GEM
|
|
|
61
61
|
thor (~> 1.0)
|
|
62
62
|
tilt (~> 2.0)
|
|
63
63
|
yard (~> 0.9, >= 0.9.24)
|
|
64
|
-
srx (0.
|
|
64
|
+
srx (0.6.0)
|
|
65
65
|
nokogiri (~> 1.11)
|
|
66
66
|
thor (1.1.0)
|
|
67
67
|
tilt (2.0.10)
|
|
@@ -81,4 +81,4 @@ DEPENDENCIES
|
|
|
81
81
|
srx-languagetool!
|
|
82
82
|
|
|
83
83
|
BUNDLED WITH
|
|
84
|
-
2.2.
|
|
84
|
+
2.2.16
|
data/README.md
CHANGED
data/lib/srx/segment.srx
CHANGED
|
@@ -1368,7 +1368,7 @@
|
|
|
1368
1368
|
<afterbreak>\p{Ll}+</afterbreak>
|
|
1369
1369
|
</rule>
|
|
1370
1370
|
<rule break="no">
|
|
1371
|
-
<beforebreak>[\.\s]\p{L}{1,2}\.\s</beforebreak
|
|
1371
|
+
<beforebreak>[\.\s](?!(on|it|of|to|be|by|at|he|we|so|do|if|up|my|me|us|go|am))\p{L}{1,2}\.\s</beforebreak><!-- not 'no'/'in', these could be abbreviations-->
|
|
1372
1372
|
<afterbreak>[\p{N}\p{Ll}]</afterbreak>
|
|
1373
1373
|
</rule>
|
|
1374
1374
|
<rule break="no">
|
|
@@ -1705,6 +1705,14 @@
|
|
|
1705
1705
|
<beforebreak>\bmax\.\s</beforebreak>
|
|
1706
1706
|
<afterbreak>\p{Ll}</afterbreak>
|
|
1707
1707
|
</rule>
|
|
1708
|
+
<rule break="yes">
|
|
1709
|
+
<beforebreak>[?!.]['"\u00BB\u2019\u201D\u203A\u00AB\p{Pe}\u0002]\s</beforebreak>
|
|
1710
|
+
<afterbreak>[A-Z][a-z]</afterbreak>
|
|
1711
|
+
</rule>
|
|
1712
|
+
<rule break="yes">
|
|
1713
|
+
<beforebreak>[?!.]\s</beforebreak>
|
|
1714
|
+
<afterbreak>['"\u00BB\u2019\u201D\u203A\u00AB\p{Pe}\u0002][A-Z][a-z]</afterbreak>
|
|
1715
|
+
</rule>
|
|
1708
1716
|
</languagerule>
|
|
1709
1717
|
<languagerule languagerulename="Slovak">
|
|
1710
1718
|
<rule break="no">
|
|
@@ -4554,11 +4562,11 @@
|
|
|
4554
4562
|
</rule>
|
|
4555
4563
|
<!-- Abbreviations that can finish sentences -->
|
|
4556
4564
|
<rule break="no">
|
|
4557
|
-
<beforebreak>\
|
|
4565
|
+
<beforebreak>\b(s|ca)\.\s</beforebreak>
|
|
4558
4566
|
<afterbreak>[XIV]+\b</afterbreak>
|
|
4559
4567
|
</rule>
|
|
4560
4568
|
<rule break="no">
|
|
4561
|
-
<beforebreak>\b(min|m)\.\s</beforebreak>
|
|
4569
|
+
<beforebreak>\b(min|m|ca)\.\s</beforebreak>
|
|
4562
4570
|
<afterbreak>[0-9]+\b</afterbreak>
|
|
4563
4571
|
</rule>
|
|
4564
4572
|
<rule break="no">
|
|
@@ -4658,11 +4666,11 @@
|
|
|
4658
4666
|
</rule>
|
|
4659
4667
|
<!-- Abbreviations that can finish sentences -->
|
|
4660
4668
|
<rule break="no">
|
|
4661
|
-
<beforebreak>\
|
|
4669
|
+
<beforebreak>\b(s|ca)\.\s</beforebreak>
|
|
4662
4670
|
<afterbreak>[XIV]+\b</afterbreak>
|
|
4663
4671
|
</rule>
|
|
4664
4672
|
<rule break="no">
|
|
4665
|
-
<beforebreak>\b(min|m)\.\s</beforebreak>
|
|
4673
|
+
<beforebreak>\b(min|m|ca)\.\s</beforebreak>
|
|
4666
4674
|
<afterbreak>[0-9]+\b</afterbreak>
|
|
4667
4675
|
</rule>
|
|
4668
4676
|
<rule break="no">
|
|
@@ -4862,7 +4870,7 @@
|
|
|
4862
4870
|
</rule>
|
|
4863
4871
|
<!-- don't split at cases like "Friedrich II. wird auch..." -->
|
|
4864
4872
|
<rule break="no">
|
|
4865
|
-
<beforebreak>[\s
|
|
4873
|
+
<beforebreak>[\s ][IVX]+\.\s</beforebreak>
|
|
4866
4874
|
<afterbreak>[^\p{Lu}]+</afterbreak>
|
|
4867
4875
|
</rule>
|
|
4868
4876
|
<!-- don't split at cases like "im 13. oder 14. Jahrhundert" -->
|
|
@@ -6279,7 +6287,7 @@
|
|
|
6279
6287
|
<afterbreak></afterbreak>
|
|
6280
6288
|
</rule>
|
|
6281
6289
|
<rule break="no">
|
|
6282
|
-
<beforebreak>[^\.]\s[
|
|
6290
|
+
<beforebreak>[^\.]\s[ضصثقفغعهخحجچشسیبلاتنمکگ\ظطزرذدپوًٌٍَُِّْA-Z]\.\s</beforebreak>
|
|
6283
6291
|
<afterbreak></afterbreak>
|
|
6284
6292
|
</rule>
|
|
6285
6293
|
<rule break="no">
|
|
@@ -6359,7 +6367,7 @@
|
|
|
6359
6367
|
</rule>
|
|
6360
6368
|
<!--Не раздвајај у случају као на пр.: "Петар I дошао је ..."-->
|
|
6361
6369
|
<rule break="no">
|
|
6362
|
-
<beforebreak>[\s
|
|
6370
|
+
<beforebreak>[\s ][IVX]+\s</beforebreak>
|
|
6363
6371
|
<afterbreak>[^\p{Lu}]+</afterbreak>
|
|
6364
6372
|
</rule>
|
|
6365
6373
|
<!--Не раздвајај у случају као "од 13. до 14. века"-->
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: srx-languagetool
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Aaron Madlon-Kay
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2021-
|
|
11
|
+
date: 2021-04-15 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: srx
|
|
@@ -120,6 +120,7 @@ files:
|
|
|
120
120
|
- ".gitignore"
|
|
121
121
|
- ".rubocop.yml"
|
|
122
122
|
- ".rubocop_todo.yml"
|
|
123
|
+
- ".ruby-version"
|
|
123
124
|
- ".solargraph.yml"
|
|
124
125
|
- CHANGELOG.md
|
|
125
126
|
- Gemfile
|
|
@@ -155,7 +156,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
155
156
|
- !ruby/object:Gem::Version
|
|
156
157
|
version: '0'
|
|
157
158
|
requirements: []
|
|
158
|
-
rubygems_version: 3.1.
|
|
159
|
+
rubygems_version: 3.1.6
|
|
159
160
|
signing_key:
|
|
160
161
|
specification_version: 4
|
|
161
162
|
summary: SRX segmentation rules from LanguageTool
|