pragmatic_segmenter_server 0.0.3 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/lib/pragmatic_segmenter_server/segment.rb +7 -1
- metadata +4 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 8281b6457cd8b7e9f3bfe9f2bd147f8754c4412685cb46f7b44ed8abcd31fe2b
|
4
|
+
data.tar.gz: 6557146cf60c86522164aa6ea3981eae1e7974d81b5be27a3c0870c05b31ff6d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '018b13d0e8808690a8d83634bcf07a44ea17a302539dc2dd3fe7e80e919c8ed7c7666e83aa637d97d0909fed9364af0872c8b9f1d02bef7e730433130ce3c342'
|
7
|
+
data.tar.gz: 9fad7b9caaa15acf5d717fdccca9551560a2f88bf1326c0b31c8b213af30a9ca9f20d5c1c8e8e5f35c878d27f55e3e1e6930b242ecfcd47de92d602eb0a12265
|
@@ -2,6 +2,7 @@ require 'pragmatic_segmenter'
|
|
2
2
|
|
3
3
|
|
4
4
|
def segment(text, lang, segmentByNewline)
|
5
|
+
|
5
6
|
newLineRegex = /((?: *[\n\r\t]+ *)+)/
|
6
7
|
mask = ''
|
7
8
|
segments = []
|
@@ -11,17 +12,22 @@ def segment(text, lang, segmentByNewline)
|
|
11
12
|
else
|
12
13
|
textParts = [text]
|
13
14
|
end
|
14
|
-
|
15
|
+
|
15
16
|
textParts.each do |textPart|
|
16
17
|
if segmentByNewline && textPart.match(newLineRegex)
|
17
18
|
mask += textPart
|
18
19
|
else
|
20
|
+
if textPart == ""
|
21
|
+
next
|
22
|
+
end
|
19
23
|
ps = PragmaticSegmenter::Segmenter.new(text: textPart, language: lang, clean:false)
|
20
24
|
ps.segment.each do |segment|
|
21
25
|
segments.push(segment)
|
22
26
|
end
|
23
27
|
mask += textPart.gsub(Regexp.new(ps.segment.map { |string| Regexp.escape(string) }.join("|")), "{}")
|
28
|
+
|
24
29
|
end
|
30
|
+
|
25
31
|
end
|
26
32
|
|
27
33
|
return segments, mask
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: pragmatic_segmenter_server
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Laurent Bié
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2023-04-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sinatra
|
@@ -66,7 +66,7 @@ dependencies:
|
|
66
66
|
- - '='
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: 0.3.18
|
69
|
-
description:
|
69
|
+
description: An HTTP server for pragmatic segmenter
|
70
70
|
email: l.bie@pangeanic.com
|
71
71
|
executables: []
|
72
72
|
extensions: []
|
@@ -93,8 +93,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
93
93
|
- !ruby/object:Gem::Version
|
94
94
|
version: '0'
|
95
95
|
requirements: []
|
96
|
-
|
97
|
-
rubygems_version: 2.5.2.1
|
96
|
+
rubygems_version: 3.1.2
|
98
97
|
signing_key:
|
99
98
|
specification_version: 4
|
100
99
|
summary: A server for pragmatic segmenter
|