pragmatic_segmenter_server 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 54321133f0512e94bc8df32b823780820dd57569
4
+ data.tar.gz: 595f906c6115a8cded9521529a66b4d2cca7c280
5
+ SHA512:
6
+ metadata.gz: f92cf7292c893d7cb3ddc053a176007e9fe72022b7b6557e841e853032ccec9a5c7a8a495d2c714762c96617838d6a51e9099235af3c37fbaef651f8b7dc94d8
7
+ data.tar.gz: 4fb69b43f5f9af07e611038437e877706aece47b8c155c8dc4dd20e7463895b41812c574732199372d7bc19bcf19f9adaf2eaf0b7876ab3146118db12552cc07
@@ -0,0 +1,28 @@
1
+ require 'pragmatic_segmenter'
2
+
3
+
4
+ def segment(text, lang, segmentByNewline)
5
+ newLineRegex = /((?: *[\n\r\t]+ *)+)/
6
+ mask = ''
7
+ segments = []
8
+
9
+ if segmentByNewline
10
+ textParts = text.split(newLineRegex)
11
+ else
12
+ textParts = [text]
13
+ end
14
+
15
+ textParts.each do |textPart|
16
+ if segmentByNewline && textPart.match(newLineRegex)
17
+ mask += textPart
18
+ else
19
+ ps = PragmaticSegmenter::Segmenter.new(text: textPart, language: lang, clean:false)
20
+ ps.segment.each do |segment|
21
+ segments.push(segment)
22
+ end
23
+ mask += textPart.gsub(Regexp.new(ps.segment.map { |string| Regexp.escape(string) }.join("|")), "{}")
24
+ end
25
+ end
26
+
27
+ return segments, mask
28
+ end
@@ -0,0 +1,65 @@
1
+ require 'sinatra'
2
+ require 'json'
3
+ require 'json-schema'
4
+ require_relative 'segment'
5
+
6
+
7
+ post '/segment' do
8
+ content_type :json
9
+
10
+ # Input
11
+ input = JSON.parse(request.body.read)
12
+
13
+ schema = {
14
+ "type" => "object",
15
+ "required" => ["texts", "lang"],
16
+ "properties" => {
17
+ "texts" => {
18
+ "type" => "array",
19
+ "items": {
20
+ "type": "string",
21
+ "minItems": 1
22
+ }
23
+ },
24
+ "lang" => {
25
+ "type" => "string"
26
+ },
27
+ "segmentByNewline" => {
28
+ "type" => "boolean"
29
+ }
30
+ }
31
+ }
32
+
33
+ begin
34
+ JSON::Validator.validate!(schema, input)
35
+ rescue JSON::Schema::ValidationError => e
36
+ json_message = {"error":e.message}.to_json
37
+ status 400
38
+ body json_message
39
+ return
40
+ end
41
+
42
+ # Extract info
43
+ lang = input["lang"]
44
+ texts = input["texts"]
45
+
46
+ if !input.key?("segmentByNewline")
47
+ segmentByNewline = true
48
+ else
49
+ segmentByNewline = input["segmentByNewline"]
50
+ end
51
+
52
+
53
+ # Segmentation
54
+ results = []
55
+ texts.each do |text, index|
56
+ segments, mask = segment(text, lang, segmentByNewline)
57
+ results.push({
58
+ "text": text,
59
+ "segments": segments,
60
+ "mask": mask
61
+ })
62
+ end
63
+
64
+ puts results.to_json
65
+ end
metadata ADDED
@@ -0,0 +1,101 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: pragmatic_segmenter_server
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ platform: ruby
6
+ authors:
7
+ - Laurent Bié
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-01-07 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: sinatra
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - '='
18
+ - !ruby/object:Gem::Version
19
+ version: 2.0.3
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - '='
25
+ - !ruby/object:Gem::Version
26
+ version: 2.0.3
27
+ - !ruby/object:Gem::Dependency
28
+ name: sinatra-contrib
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - '='
32
+ - !ruby/object:Gem::Version
33
+ version: 2.0.3
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - '='
39
+ - !ruby/object:Gem::Version
40
+ version: 2.0.3
41
+ - !ruby/object:Gem::Dependency
42
+ name: json-schema
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - '='
46
+ - !ruby/object:Gem::Version
47
+ version: 2.8.0
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - '='
53
+ - !ruby/object:Gem::Version
54
+ version: 2.8.0
55
+ - !ruby/object:Gem::Dependency
56
+ name: pragmatic_segmenter
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - '='
60
+ - !ruby/object:Gem::Version
61
+ version: 0.3.18
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - '='
67
+ - !ruby/object:Gem::Version
68
+ version: 0.3.18
69
+ description: A HTTP server for pragmatic segmenter
70
+ email: l.bie@pangeanic.com
71
+ executables: []
72
+ extensions: []
73
+ extra_rdoc_files: []
74
+ files:
75
+ - lib/pragmatic_segmenter_server.rb
76
+ - lib/pragmatic_segmenter_server/segment.rb
77
+ homepage: ''
78
+ licenses:
79
+ - MIT
80
+ metadata: {}
81
+ post_install_message:
82
+ rdoc_options: []
83
+ require_paths:
84
+ - lib
85
+ required_ruby_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ required_rubygems_version: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ">="
93
+ - !ruby/object:Gem::Version
94
+ version: '0'
95
+ requirements: []
96
+ rubyforge_project:
97
+ rubygems_version: 2.5.2.1
98
+ signing_key:
99
+ specification_version: 4
100
+ summary: A server for pragmatic segmenter
101
+ test_files: []