enju_accessor 0.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 4d0b1471223e18688be92aa3b34ed6fb36d7522a
4
+ data.tar.gz: be48f7ad2f104c76c82ff130241a21d53dd24a53
5
+ SHA512:
6
+ metadata.gz: d273b60be7d369ecd5fa41cea9f24f1c622c5907c3fba338f110d95166cd65f0002c0b1e94fab65bc02e4650c4d383e4f211b076194ae3a1004a152a8f143f2a
7
+ data.tar.gz: ae09b24bfe748f1bd46a55759d6f736ce50666fc2dd6d83c917877bc66266cfcdd414c589db9f9b1eaeabbfb4dcea370e2bcebdd5d511d751d822525de100a4f
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+ require 'enju_accessor'
3
+ require 'json'
4
+
5
+ enju = EnjuAccessor.new("http://localhost:38401/cgi-lilfes/enju")
6
+
7
+ text = ARGF.read
8
+ annotation = enju.parse_text(text)
9
+ puts annotation.to_json
data/bin/enju_tag_text ADDED
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+ require 'enju_accessor'
3
+ require 'json'
4
+
5
+ enju = EnjuAccessor.new("http://localhost:38401/cgi-lilfes/enju")
6
+
7
+ text = ARGF.read
8
+ annotation = enju.tag_text(text)
9
+ puts annotation.to_json
@@ -0,0 +1,184 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rest-client'
3
+ require 'text_sentencer'
4
+ require 'nokogiri'
5
+
6
+ # An instance of this class holds the parsing result of a natural language query as anlyzed by Enju.
7
+ class EnjuAccessor
8
+ def initialize(enju_cgi_url)
9
+ @enju_cgi = RestClient::Resource.new(enju_cgi_url)
10
+ @sentencer = TextSentencer.new
11
+ @tid_base, @rid_base = 0, 0
12
+ end
13
+
14
+ def get_parse (sentence)
15
+ begin
16
+ response = @enju_cgi.get :params => {:sentence=>sentence, :format=>'so'}
17
+ rescue => e
18
+ raise IOError, "Abnormal behavior of the Enju CGI server: #{e.message}."
19
+ end
20
+
21
+ parse = case response.code
22
+ when 200 # 200 means success
23
+ raise "Empty input." if response =~/^Empty line/
24
+ r = response.encode("ASCII-8BIT").force_encoding("UTF-8").to_s
25
+ read_parse(sentence, r)
26
+ else
27
+ raise IOError, "Abnormal response from the Enju CGI server."
28
+ end
29
+
30
+ parse
31
+ end
32
+
33
+ def read_parse (sentence, r)
34
+ toks = {}
35
+ cons = {}
36
+
37
+ adjustment = 0
38
+
39
+ # r is a parsing result in SO format.
40
+ lines = r.split(/\r?\n/)
41
+
42
+ idx = 0
43
+ lines.each do |line| # for each line of analysis
44
+ b, e, attr_str = line.split(/\t/)
45
+ b = b.to_i
46
+ e = e.to_i
47
+
48
+ node = Nokogiri::HTML.parse('<node ' + attr_str + '>')
49
+ attrs = node.css('node').first.to_h
50
+
51
+ if attrs['tok'] == ""
52
+ base = attrs['base']
53
+
54
+ b += adjustment
55
+ base.each_char{|c| adjustment += (1 - c.bytesize) if c !~ /\p{ASCII}/}
56
+ e += adjustment
57
+
58
+ id = attrs['id']
59
+ pos = attrs['pos']
60
+ pos = attrs['base'] if [',', '.', ':', '(', ')', '``', '&apos;&apos;'].include?(pos)
61
+ pos.sub!('$', '-DOLLAR-')
62
+ pos = '-COLON-' if pos == 'HYPH'
63
+ toks[id] = {beg: b, end:e, word:sentence[b ... e], idx:idx, base:base, pos:pos, cat:attrs['cat'], args:{}}
64
+ toks[id][:args][:arg1] = attrs['arg1'] if attrs['arg1']
65
+ toks[id][:args][:arg2] = attrs['arg2'] if attrs['arg2']
66
+ toks[id][:args][:arg3] = attrs['arg3'] if attrs['arg3']
67
+ toks[id][:args][:mod] = attrs['mod'] if attrs['mod']
68
+ idx += 1
69
+ end
70
+ end
71
+
72
+ lines.each do |line| # for each line of analysis
73
+ b, e, attr_str = line.split(/\t/)
74
+ b = b.to_i
75
+ e = e.to_i
76
+
77
+ node = Nokogiri::HTML.parse('<node ' + attr_str + '>')
78
+ attrs = node.css('node').first.to_h
79
+
80
+ if attrs['cons'] == ""
81
+ id = attrs['id']
82
+ head = attrs['head']
83
+ sem_head = attrs['sem_head']
84
+ cat = attrs['cat']
85
+ cons[id] = {head:head, sem_head: sem_head, cat:cat}
86
+ end
87
+ end
88
+
89
+ # puts sentence
90
+ # puts toks.map{|t| t.to_s}.join("\n")
91
+ # puts cons.map{|c| c.to_s}.join("\n")
92
+ # puts "-----"
93
+ # exit
94
+
95
+ [toks, cons]
96
+ end
97
+
98
+ def parse_sentence (sentence, offset_base = 0, mode = '')
99
+ @tid_base, @rid_base = 0, 0 unless mode == 'continue'
100
+
101
+ toks, cons = get_parse(sentence)
102
+
103
+ denotations = []
104
+ tid_mapping = {}
105
+ idx_last = 0
106
+ toks.each do |id, tok|
107
+ id = tid_mapping[id] = 'T' + (tok[:idx] + @tid_base).to_s
108
+ denotations << {id:id, span:{begin: tok[:beg] + offset_base, end: tok[:end] + offset_base}, obj: tok[:pos]}
109
+ idx_last = tok[:idx]
110
+ end
111
+
112
+ # puts toks.map{|t| t.to_s}.join("\n")
113
+
114
+ cons.each do |id, con|
115
+ thead = con[:sem_head]
116
+ thead = cons[thead][:sem_head] until thead.start_with?('t')
117
+ con[:thead] = thead
118
+ end
119
+
120
+ relations = []
121
+ rid_num = @rid_base
122
+ toks.each do |id, tok|
123
+ unless tok[:args].empty?
124
+ tok[:args].each do |type, arg|
125
+ arg = cons[arg][:thead] if arg.start_with?('c')
126
+ next if tid_mapping[arg].nil?
127
+ relations << {id: 'R' + rid_num.to_s, subj: tid_mapping[arg], obj: tid_mapping[id], pred: type.to_s.downcase + 'Of'}
128
+ rid_num += 1
129
+ end
130
+ end
131
+ end
132
+
133
+ @tid_base = @tid_base + idx_last + 1
134
+ @rid_base = rid_num
135
+
136
+ {:denotations => denotations, :relations => relations}
137
+ end
138
+
139
+ def tag_sentence (sentence, offset_base = 0, mode = '')
140
+ @id_base = 0 unless mode == 'continue'
141
+
142
+ get_parse(sentence)
143
+
144
+ denotations = []
145
+ idx_last = 0
146
+ @tokens.each do |token|
147
+ denotations << {id: 'P' + (token[:idx] + @id_base).to_s, span: {begin: token[:beg] + offset_base, end: token[:end] + offset_base}, obj: token[:pos]}
148
+ denotations << {id: 'B' + (token[:idx] + @id_base).to_s, span: {begin: token[:beg] + offset_base, end: token[:end] + offset_base}, obj: token[:base]}
149
+ idx_last = token[:idx]
150
+ end
151
+
152
+ @id_base = @id_base + idx_last + 1
153
+
154
+ {:denotations => denotations}
155
+ end
156
+
157
+ def parse_text (text)
158
+ segments = @sentencer.segment(text)
159
+
160
+ denotations, relations = [], []
161
+ segments.each_with_index do |s, i|
162
+ mode = (i == 0)? nil : 'continue'
163
+ annotation = parse_sentence(text[s[0]...s[1]], s[0], mode)
164
+ denotations += annotation[:denotations]
165
+ relations += annotation[:relations]
166
+ end
167
+
168
+ {:text=> text, :denotations => denotations, :relations => relations}
169
+ end
170
+
171
+ def tag_text (text)
172
+ segments = @sentencer.segment(text)
173
+
174
+ denotations = []
175
+ segments.each_with_index do |s, i|
176
+ mode = (i == 0)? nil : 'continue'
177
+ annotation = tag_sentence(text[s[0]...s[1]], s[0], mode)
178
+ denotations += annotation[:denotations]
179
+ end
180
+
181
+ {:text=> text, :denotations => denotations}
182
+ end
183
+
184
+ end
@@ -0,0 +1 @@
1
+ require 'enju_accessor/enju_accessor'
metadata ADDED
@@ -0,0 +1,51 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: enju_accessor
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.9'
5
+ platform: ruby
6
+ authors:
7
+ - Jin-Dong Kim
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-10-13 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A wrapper for Enju CGI service to convert the output to the PubAnnotation
14
+ JSON format.
15
+ email: jindong.kim@gmail.com
16
+ executables:
17
+ - enju_parse_text
18
+ - enju_tag_text
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - bin/enju_parse_text
23
+ - bin/enju_tag_text
24
+ - lib/enju_accessor.rb
25
+ - lib/enju_accessor/enju_accessor.rb
26
+ homepage: https://github.com/jdkim/enju_accessor
27
+ licenses:
28
+ - MIT
29
+ metadata: {}
30
+ post_install_message:
31
+ rdoc_options: []
32
+ require_paths:
33
+ - lib
34
+ required_ruby_version: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - ">="
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ required_rubygems_version: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ requirements: []
45
+ rubyforge_project:
46
+ rubygems_version: 2.6.11
47
+ signing_key:
48
+ specification_version: 4
49
+ summary: A wrapper for Enju CGI service to convert the output to the PubAnnotation
50
+ JSON format.
51
+ test_files: []