pragmatic_segmenter_server 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 84a577b1c9cebdb0a87a1e53e6db54f6129ad6f6
4
- data.tar.gz: 6bb06f2ad71d3e3d99cf7b1613fee6b23cb639ac
2
+ SHA256:
3
+ metadata.gz: cbbfa81ea6f9f89d46a43592d074b1dfce290e92025df05dfa404c834ba823d5
4
+ data.tar.gz: a8a5de048c216ed3e8714a51268301013e839962d04681323f0c2d9693735cd9
5
5
  SHA512:
6
- metadata.gz: 3b8af5ed8487401bc3f73aecefe58fc151c1399b56a53a2687bf314f70d9f3dbfc3d90413b1216948030ddc077966eba40bbafc848bc063a6b17343db226e02c
7
- data.tar.gz: eb6b32274896fdb74d368f2882cd6648773a95f73140df85505974e36e8a17c0a23c84cff176eebe311bce90bfb3e31cc197d4c75d677a6fc22ae0377c149464
6
+ metadata.gz: 26bf50391aa511334e76690fce663b9b405f66e4ef31b691afce77cfcc10c20d98487441bd979a0bcc5620d2d27903471b0e7743c9c702d370361c46aa007c69
7
+ data.tar.gz: 6efc4d1754da81a31a27b80bf290f697d4ab09896a729136587438a12ebc75ebcac9533fca477f13ab39d3f8f4274f1cbba6e884333419d8acc4c94f20308606
@@ -2,6 +2,7 @@ require 'pragmatic_segmenter'
2
2
 
3
3
 
4
4
  def segment(text, lang, segmentByNewline)
5
+
5
6
  newLineRegex = /((?: *[\n\r\t]+ *)+)/
6
7
  mask = ''
7
8
  segments = []
@@ -11,17 +12,22 @@ def segment(text, lang, segmentByNewline)
11
12
  else
12
13
  textParts = [text]
13
14
  end
14
-
15
+
15
16
  textParts.each do |textPart|
16
17
  if segmentByNewline && textPart.match(newLineRegex)
17
18
  mask += textPart
18
19
  else
20
+ if textPart == ""
21
+ next
22
+ end
19
23
  ps = PragmaticSegmenter::Segmenter.new(text: textPart, language: lang, clean:false)
20
24
  ps.segment.each do |segment|
21
25
  segments.push(segment)
22
26
  end
23
27
  mask += textPart.gsub(Regexp.new(ps.segment.map { |string| Regexp.escape(string) }.join("|")), "{}")
28
+
24
29
  end
30
+
25
31
  end
26
32
 
27
33
  return segments, mask
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: pragmatic_segmenter_server
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Laurent Bié
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-01-07 00:00:00.000000000 Z
11
+ date: 2023-01-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: sinatra
@@ -66,7 +66,7 @@ dependencies:
66
66
  - - '='
67
67
  - !ruby/object:Gem::Version
68
68
  version: 0.3.18
69
- description: A HTTP server for pragmatic segmenter
69
+ description: An HTTP server for pragmatic segmenter
70
70
  email: l.bie@pangeanic.com
71
71
  executables: []
72
72
  extensions: []
@@ -93,8 +93,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
93
93
  - !ruby/object:Gem::Version
94
94
  version: '0'
95
95
  requirements: []
96
- rubyforge_project:
97
- rubygems_version: 2.5.2.1
96
+ rubygems_version: 3.1.2
98
97
  signing_key:
99
98
  specification_version: 4
100
99
  summary: A server for pragmatic segmenter