pragmatic_segmenter_server 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/lib/pragmatic_segmenter_server/segment.rb +7 -1
- metadata +4 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: cbbfa81ea6f9f89d46a43592d074b1dfce290e92025df05dfa404c834ba823d5
|
4
|
+
data.tar.gz: a8a5de048c216ed3e8714a51268301013e839962d04681323f0c2d9693735cd9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 26bf50391aa511334e76690fce663b9b405f66e4ef31b691afce77cfcc10c20d98487441bd979a0bcc5620d2d27903471b0e7743c9c702d370361c46aa007c69
|
7
|
+
data.tar.gz: 6efc4d1754da81a31a27b80bf290f697d4ab09896a729136587438a12ebc75ebcac9533fca477f13ab39d3f8f4274f1cbba6e884333419d8acc4c94f20308606
|
@@ -2,6 +2,7 @@ require 'pragmatic_segmenter'
|
|
2
2
|
|
3
3
|
|
4
4
|
def segment(text, lang, segmentByNewline)
|
5
|
+
|
5
6
|
newLineRegex = /((?: *[\n\r\t]+ *)+)/
|
6
7
|
mask = ''
|
7
8
|
segments = []
|
@@ -11,17 +12,22 @@ def segment(text, lang, segmentByNewline)
|
|
11
12
|
else
|
12
13
|
textParts = [text]
|
13
14
|
end
|
14
|
-
|
15
|
+
|
15
16
|
textParts.each do |textPart|
|
16
17
|
if segmentByNewline && textPart.match(newLineRegex)
|
17
18
|
mask += textPart
|
18
19
|
else
|
20
|
+
if textPart == ""
|
21
|
+
next
|
22
|
+
end
|
19
23
|
ps = PragmaticSegmenter::Segmenter.new(text: textPart, language: lang, clean:false)
|
20
24
|
ps.segment.each do |segment|
|
21
25
|
segments.push(segment)
|
22
26
|
end
|
23
27
|
mask += textPart.gsub(Regexp.new(ps.segment.map { |string| Regexp.escape(string) }.join("|")), "{}")
|
28
|
+
|
24
29
|
end
|
30
|
+
|
25
31
|
end
|
26
32
|
|
27
33
|
return segments, mask
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pragmatic_segmenter_server
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Laurent Bié
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-01-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sinatra
|
@@ -66,7 +66,7 @@ dependencies:
|
|
66
66
|
- - '='
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: 0.3.18
|
69
|
-
description:
|
69
|
+
description: An HTTP server for pragmatic segmenter
|
70
70
|
email: l.bie@pangeanic.com
|
71
71
|
executables: []
|
72
72
|
extensions: []
|
@@ -93,8 +93,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
93
93
|
- !ruby/object:Gem::Version
|
94
94
|
version: '0'
|
95
95
|
requirements: []
|
96
|
-
|
97
|
-
rubygems_version: 2.5.2.1
|
96
|
+
rubygems_version: 3.1.2
|
98
97
|
signing_key:
|
99
98
|
specification_version: 4
|
100
99
|
summary: A server for pragmatic segmenter
|