srx-languagetool 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 13c9ee40fd02a436a0016cade2b0aeeecb615c20ff912e703558800a9deffbec
4
- data.tar.gz: 4c81d6bffbd73860fa60d5e9b65ee0a62428454dd69fe73452d6a103cf65abe1
3
+ metadata.gz: d929b515d7e87e260c01373e1f5d6c35bf35fa05f8fe29fea39ba18d0078e120
4
+ data.tar.gz: ee821f5cfb3b4fea3d347c3ef3583459da60d044c458aa5ad907b378d5eef963
5
5
  SHA512:
6
- metadata.gz: e0dabae1c8f62bcd5133c566c750e99f5a473b9ff7c19d1981e92c4aec99155adf99ddc0158a3bf128869d678e7862addc2344b3b6f42c779af141ae84030068
7
- data.tar.gz: f89e5779174040869de0a5f25d4e73380a87a051fc5da969964d544f62a9919e8cc952fcf28a6561aedc89006c38ec4d56ebf8bef0a230f60b2fc89d65f0d841
6
+ metadata.gz: 2733d28e0ed99d6b5fd0c15b30311aed10059c34c8c411de051901ca545ed6a4eb299e0695190817b45d6691f74b3d78215d6a5ce68c589b0776e25ca3c3dada
7
+ data.tar.gz: 4344f1b2dc867dcbffb8db8b2db95da458de718f0481814e8dd23965c2b3831199702333637d56017566388de5da0d745995276e0376c9e16e63dc9f15cd8d10
@@ -10,10 +10,10 @@ jobs:
10
10
  - name: Set up Ruby
11
11
  uses: ruby/setup-ruby@v1
12
12
  with:
13
- ruby-version: 2.7.2
13
+ ruby-version: 2.7.3
14
14
  - name: Install
15
15
  run: |
16
- gem install bundler -v 2.2.9
16
+ gem install bundler -v 2.2.16
17
17
  bundle install
18
18
  - name: Type check
19
19
  run: bundle exec solargraph typecheck --level typed
data/.gitignore CHANGED
@@ -6,3 +6,4 @@
6
6
  /pkg/
7
7
  /spec/reports/
8
8
  /tmp/
9
+ /vendor
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ 2.7.3
data/CHANGELOG.md CHANGED
@@ -1,5 +1,9 @@
1
1
  ## [Unreleased]
2
2
 
3
+ ## [0.2.0] - 2021-04-15
4
+
5
+ - Update rules to LanguageTool 5.3
6
+
3
7
  ## [0.1.0] - 2021-02-13
4
8
 
5
9
  - Initial release
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- srx-languagetool (0.1.0)
4
+ srx-languagetool (0.2.0)
5
5
  srx (< 1.0)
6
6
 
7
7
  GEM
@@ -14,28 +14,28 @@ GEM
14
14
  diff-lcs (1.4.4)
15
15
  e2mmap (0.1.0)
16
16
  jaro_winkler (1.5.4)
17
- kramdown (2.3.0)
17
+ kramdown (2.3.1)
18
18
  rexml
19
19
  kramdown-parser-gfm (1.1.0)
20
20
  kramdown (~> 2.0)
21
- minitest (5.14.3)
22
- nokogiri (1.11.1-x86_64-darwin)
21
+ minitest (5.14.4)
22
+ nokogiri (1.11.3-x86_64-darwin)
23
23
  racc (~> 1.4)
24
24
  parallel (1.20.1)
25
- parser (3.0.0.0)
25
+ parser (3.0.1.0)
26
26
  ast (~> 2.4.1)
27
27
  racc (1.5.2)
28
28
  rainbow (3.0.0)
29
29
  rake (13.0.3)
30
- regexp_parser (2.0.3)
30
+ regexp_parser (2.1.1)
31
31
  reverse_markdown (2.0.0)
32
32
  nokogiri
33
- rexml (3.2.4)
33
+ rexml (3.2.5)
34
34
  rspec-expectations (3.10.1)
35
35
  diff-lcs (>= 1.2.0, < 2.0)
36
36
  rspec-support (~> 3.10.0)
37
37
  rspec-support (3.10.2)
38
- rubocop (1.9.1)
38
+ rubocop (1.12.1)
39
39
  parallel (~> 1.10)
40
40
  parser (>= 3.0.0.0)
41
41
  rainbow (>= 2.2.2, < 4.0)
@@ -47,7 +47,7 @@ GEM
47
47
  rubocop-ast (1.4.1)
48
48
  parser (>= 2.7.1.5)
49
49
  ruby-progressbar (1.11.0)
50
- solargraph (0.40.3)
50
+ solargraph (0.40.4)
51
51
  backport (~> 1.1)
52
52
  benchmark
53
53
  bundler (>= 1.17.2)
@@ -61,7 +61,7 @@ GEM
61
61
  thor (~> 1.0)
62
62
  tilt (~> 2.0)
63
63
  yard (~> 0.9, >= 0.9.24)
64
- srx (0.1.0)
64
+ srx (0.6.0)
65
65
  nokogiri (~> 1.11)
66
66
  thor (1.1.0)
67
67
  tilt (2.0.10)
@@ -81,4 +81,4 @@ DEPENDENCIES
81
81
  srx-languagetool!
82
82
 
83
83
  BUNDLED WITH
84
- 2.2.9
84
+ 2.2.16
data/README.md CHANGED
@@ -29,7 +29,7 @@ For detailed usage information, please see
29
29
  [srx-ruby](https://github.com/amake/srx-ruby).
30
30
 
31
31
  ```ruby
32
- require 'srx-languagetool'
32
+ require 'srx/languagetool'
33
33
 
34
34
  data = Srx::Data.languagetool
35
35
  engine = Srx::Engine.new(data)
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Srx
4
4
  module Languagetool
5
- VERSION = '0.1.0'
5
+ VERSION = '0.2.0'
6
6
  end
7
7
  end
data/lib/srx/segment.srx CHANGED
@@ -1368,7 +1368,7 @@
1368
1368
  <afterbreak>\p{Ll}+</afterbreak>
1369
1369
  </rule>
1370
1370
  <rule break="no">
1371
- <beforebreak>[\.\s]\p{L}{1,2}\.\s</beforebreak>
1371
+ <beforebreak>[\.\s](?!(on|it|of|to|be|by|at|he|we|so|do|if|up|my|me|us|go|am))\p{L}{1,2}\.\s</beforebreak><!-- not 'no'/'in', these could be abbreviations-->
1372
1372
  <afterbreak>[\p{N}\p{Ll}]</afterbreak>
1373
1373
  </rule>
1374
1374
  <rule break="no">
@@ -1705,6 +1705,14 @@
1705
1705
  <beforebreak>\bmax\.\s</beforebreak>
1706
1706
  <afterbreak>\p{Ll}</afterbreak>
1707
1707
  </rule>
1708
+ <rule break="yes">
1709
+ <beforebreak>[?!.]['"\u00BB\u2019\u201D\u203A\u00AB\p{Pe}\u0002]\s</beforebreak>
1710
+ <afterbreak>[A-Z][a-z]</afterbreak>
1711
+ </rule>
1712
+ <rule break="yes">
1713
+ <beforebreak>[?!.]\s</beforebreak>
1714
+ <afterbreak>['"\u00BB\u2019\u201D\u203A\u00AB\p{Pe}\u0002][A-Z][a-z]</afterbreak>
1715
+ </rule>
1708
1716
  </languagerule>
1709
1717
  <languagerule languagerulename="Slovak">
1710
1718
  <rule break="no">
@@ -4554,11 +4562,11 @@
4554
4562
  </rule>
4555
4563
  <!-- Abbreviations that can finish sentences -->
4556
4564
  <rule break="no">
4557
- <beforebreak>\bs\.\s</beforebreak>
4565
+ <beforebreak>\b(s|ca)\.\s</beforebreak>
4558
4566
  <afterbreak>[XIV]+\b</afterbreak>
4559
4567
  </rule>
4560
4568
  <rule break="no">
4561
- <beforebreak>\b(min|m)\.\s</beforebreak>
4569
+ <beforebreak>\b(min|m|ca)\.\s</beforebreak>
4562
4570
  <afterbreak>[0-9]+\b</afterbreak>
4563
4571
  </rule>
4564
4572
  <rule break="no">
@@ -4658,11 +4666,11 @@
4658
4666
  </rule>
4659
4667
  <!-- Abbreviations that can finish sentences -->
4660
4668
  <rule break="no">
4661
- <beforebreak>\bs\.\s</beforebreak>
4669
+ <beforebreak>\b(s|ca)\.\s</beforebreak>
4662
4670
  <afterbreak>[XIV]+\b</afterbreak>
4663
4671
  </rule>
4664
4672
  <rule break="no">
4665
- <beforebreak>\b(min|m)\.\s</beforebreak>
4673
+ <beforebreak>\b(min|m|ca)\.\s</beforebreak>
4666
4674
  <afterbreak>[0-9]+\b</afterbreak>
4667
4675
  </rule>
4668
4676
  <rule break="no">
@@ -4862,7 +4870,7 @@
4862
4870
  </rule>
4863
4871
  <!-- don't split at cases like "Friedrich II. wird auch..." -->
4864
4872
  <rule break="no">
4865
- <beforebreak>[\s ][IVX]+\.\s</beforebreak>
4873
+ <beforebreak>[\s ][IVX]+\.\s</beforebreak>
4866
4874
  <afterbreak>[^\p{Lu}]+</afterbreak>
4867
4875
  </rule>
4868
4876
  <!-- don't split at cases like "im 13. oder 14. Jahrhundert" -->
@@ -6279,7 +6287,7 @@
6279
6287
  <afterbreak></afterbreak>
6280
6288
  </rule>
6281
6289
  <rule break="no">
6282
- <beforebreak>[^\.]\s[ضصثقفغعهخحجچشسیبلاتنمکگ\ظطزرذدپوًٌٍَُِّْA-Z]\.\s</beforebreak>
6290
+ <beforebreak>[^\.]\s[ضصثقفغعهخحجچشسیبلاتنمکگ\ظطزرذدپوًٌٍَُِّْA-Z]\.\s</beforebreak>
6283
6291
  <afterbreak></afterbreak>
6284
6292
  </rule>
6285
6293
  <rule break="no">
@@ -6359,7 +6367,7 @@
6359
6367
  </rule>
6360
6368
  <!--Не раздвајај у случају као на пр.: "Петар I дошао је ..."-->
6361
6369
  <rule break="no">
6362
- <beforebreak>[\s ][IVX]+\s</beforebreak>
6370
+ <beforebreak>[\s ][IVX]+\s</beforebreak>
6363
6371
  <afterbreak>[^\p{Lu}]+</afterbreak>
6364
6372
  </rule>
6365
6373
  <!--Не раздвајај у случају као "од 13. до 14. века"-->
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: srx-languagetool
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Aaron Madlon-Kay
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2021-02-13 00:00:00.000000000 Z
11
+ date: 2021-04-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: srx
@@ -120,6 +120,7 @@ files:
120
120
  - ".gitignore"
121
121
  - ".rubocop.yml"
122
122
  - ".rubocop_todo.yml"
123
+ - ".ruby-version"
123
124
  - ".solargraph.yml"
124
125
  - CHANGELOG.md
125
126
  - Gemfile
@@ -155,7 +156,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
155
156
  - !ruby/object:Gem::Version
156
157
  version: '0'
157
158
  requirements: []
158
- rubygems_version: 3.1.4
159
+ rubygems_version: 3.1.6
159
160
  signing_key:
160
161
  specification_version: 4
161
162
  summary: SRX segmentation rules from LanguageTool