enju_accessor 0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 4d0b1471223e18688be92aa3b34ed6fb36d7522a
4
+ data.tar.gz: be48f7ad2f104c76c82ff130241a21d53dd24a53
5
+ SHA512:
6
+ metadata.gz: d273b60be7d369ecd5fa41cea9f24f1c622c5907c3fba338f110d95166cd65f0002c0b1e94fab65bc02e4650c4d383e4f211b076194ae3a1004a152a8f143f2a
7
+ data.tar.gz: ae09b24bfe748f1bd46a55759d6f736ce50666fc2dd6d83c917877bc66266cfcdd414c589db9f9b1eaeabbfb4dcea370e2bcebdd5d511d751d822525de100a4f
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+ require 'enju_accessor'
3
+ require 'json'
4
+
5
+ enju = EnjuAccessor.new("http://localhost:38401/cgi-lilfes/enju")
6
+
7
+ text = ARGF.read
8
+ annotation = enju.parse_text(text)
9
+ puts annotation.to_json
data/bin/enju_tag_text ADDED
@@ -0,0 +1,9 @@
1
+ #!/usr/bin/env ruby
2
+ require 'enju_accessor'
3
+ require 'json'
4
+
5
+ enju = EnjuAccessor.new("http://localhost:38401/cgi-lilfes/enju")
6
+
7
+ text = ARGF.read
8
+ annotation = enju.tag_text(text)
9
+ puts annotation.to_json
@@ -0,0 +1,184 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rest-client'
3
+ require 'text_sentencer'
4
+ require 'nokogiri'
5
+
6
+ # An instance of this class holds the parsing result of a natural language query as anlyzed by Enju.
7
+ class EnjuAccessor
8
+ def initialize(enju_cgi_url)
9
+ @enju_cgi = RestClient::Resource.new(enju_cgi_url)
10
+ @sentencer = TextSentencer.new
11
+ @tid_base, @rid_base = 0, 0
12
+ end
13
+
14
+ def get_parse (sentence)
15
+ begin
16
+ response = @enju_cgi.get :params => {:sentence=>sentence, :format=>'so'}
17
+ rescue => e
18
+ raise IOError, "Abnormal behavior of the Enju CGI server: #{e.message}."
19
+ end
20
+
21
+ parse = case response.code
22
+ when 200 # 200 means success
23
+ raise "Empty input." if response =~/^Empty line/
24
+ r = response.encode("ASCII-8BIT").force_encoding("UTF-8").to_s
25
+ read_parse(sentence, r)
26
+ else
27
+ raise IOError, "Abnormal response from the Enju CGI server."
28
+ end
29
+
30
+ parse
31
+ end
32
+
33
+ def read_parse (sentence, r)
34
+ toks = {}
35
+ cons = {}
36
+
37
+ adjustment = 0
38
+
39
+ # r is a parsing result in SO format.
40
+ lines = r.split(/\r?\n/)
41
+
42
+ idx = 0
43
+ lines.each do |line| # for each line of analysis
44
+ b, e, attr_str = line.split(/\t/)
45
+ b = b.to_i
46
+ e = e.to_i
47
+
48
+ node = Nokogiri::HTML.parse('<node ' + attr_str + '>')
49
+ attrs = node.css('node').first.to_h
50
+
51
+ if attrs['tok'] == ""
52
+ base = attrs['base']
53
+
54
+ b += adjustment
55
+ base.each_char{|c| adjustment += (1 - c.bytesize) if c !~ /\p{ASCII}/}
56
+ e += adjustment
57
+
58
+ id = attrs['id']
59
+ pos = attrs['pos']
60
+ pos = attrs['base'] if [',', '.', ':', '(', ')', '``', '&apos;&apos;'].include?(pos)
61
+ pos.sub!('$', '-DOLLAR-')
62
+ pos = '-COLON-' if pos == 'HYPH'
63
+ toks[id] = {beg: b, end:e, word:sentence[b ... e], idx:idx, base:base, pos:pos, cat:attrs['cat'], args:{}}
64
+ toks[id][:args][:arg1] = attrs['arg1'] if attrs['arg1']
65
+ toks[id][:args][:arg2] = attrs['arg2'] if attrs['arg2']
66
+ toks[id][:args][:arg3] = attrs['arg3'] if attrs['arg3']
67
+ toks[id][:args][:mod] = attrs['mod'] if attrs['mod']
68
+ idx += 1
69
+ end
70
+ end
71
+
72
+ lines.each do |line| # for each line of analysis
73
+ b, e, attr_str = line.split(/\t/)
74
+ b = b.to_i
75
+ e = e.to_i
76
+
77
+ node = Nokogiri::HTML.parse('<node ' + attr_str + '>')
78
+ attrs = node.css('node').first.to_h
79
+
80
+ if attrs['cons'] == ""
81
+ id = attrs['id']
82
+ head = attrs['head']
83
+ sem_head = attrs['sem_head']
84
+ cat = attrs['cat']
85
+ cons[id] = {head:head, sem_head: sem_head, cat:cat}
86
+ end
87
+ end
88
+
89
+ # puts sentence
90
+ # puts toks.map{|t| t.to_s}.join("\n")
91
+ # puts cons.map{|c| c.to_s}.join("\n")
92
+ # puts "-----"
93
+ # exit
94
+
95
+ [toks, cons]
96
+ end
97
+
98
+ def parse_sentence (sentence, offset_base = 0, mode = '')
99
+ @tid_base, @rid_base = 0, 0 unless mode == 'continue'
100
+
101
+ toks, cons = get_parse(sentence)
102
+
103
+ denotations = []
104
+ tid_mapping = {}
105
+ idx_last = 0
106
+ toks.each do |id, tok|
107
+ id = tid_mapping[id] = 'T' + (tok[:idx] + @tid_base).to_s
108
+ denotations << {id:id, span:{begin: tok[:beg] + offset_base, end: tok[:end] + offset_base}, obj: tok[:pos]}
109
+ idx_last = tok[:idx]
110
+ end
111
+
112
+ # puts toks.map{|t| t.to_s}.join("\n")
113
+
114
+ cons.each do |id, con|
115
+ thead = con[:sem_head]
116
+ thead = cons[thead][:sem_head] until thead.start_with?('t')
117
+ con[:thead] = thead
118
+ end
119
+
120
+ relations = []
121
+ rid_num = @rid_base
122
+ toks.each do |id, tok|
123
+ unless tok[:args].empty?
124
+ tok[:args].each do |type, arg|
125
+ arg = cons[arg][:thead] if arg.start_with?('c')
126
+ next if tid_mapping[arg].nil?
127
+ relations << {id: 'R' + rid_num.to_s, subj: tid_mapping[arg], obj: tid_mapping[id], pred: type.to_s.downcase + 'Of'}
128
+ rid_num += 1
129
+ end
130
+ end
131
+ end
132
+
133
+ @tid_base = @tid_base + idx_last + 1
134
+ @rid_base = rid_num
135
+
136
+ {:denotations => denotations, :relations => relations}
137
+ end
138
+
139
+ def tag_sentence (sentence, offset_base = 0, mode = '')
140
+ @id_base = 0 unless mode == 'continue'
141
+
142
+ get_parse(sentence)
143
+
144
+ denotations = []
145
+ idx_last = 0
146
+ @tokens.each do |token|
147
+ denotations << {id: 'P' + (token[:idx] + @id_base).to_s, span: {begin: token[:beg] + offset_base, end: token[:end] + offset_base}, obj: token[:pos]}
148
+ denotations << {id: 'B' + (token[:idx] + @id_base).to_s, span: {begin: token[:beg] + offset_base, end: token[:end] + offset_base}, obj: token[:base]}
149
+ idx_last = token[:idx]
150
+ end
151
+
152
+ @id_base = @id_base + idx_last + 1
153
+
154
+ {:denotations => denotations}
155
+ end
156
+
157
+ def parse_text (text)
158
+ segments = @sentencer.segment(text)
159
+
160
+ denotations, relations = [], []
161
+ segments.each_with_index do |s, i|
162
+ mode = (i == 0)? nil : 'continue'
163
+ annotation = parse_sentence(text[s[0]...s[1]], s[0], mode)
164
+ denotations += annotation[:denotations]
165
+ relations += annotation[:relations]
166
+ end
167
+
168
+ {:text=> text, :denotations => denotations, :relations => relations}
169
+ end
170
+
171
+ def tag_text (text)
172
+ segments = @sentencer.segment(text)
173
+
174
+ denotations = []
175
+ segments.each_with_index do |s, i|
176
+ mode = (i == 0)? nil : 'continue'
177
+ annotation = tag_sentence(text[s[0]...s[1]], s[0], mode)
178
+ denotations += annotation[:denotations]
179
+ end
180
+
181
+ {:text=> text, :denotations => denotations}
182
+ end
183
+
184
+ end
@@ -0,0 +1 @@
1
+ require 'enju_accessor/enju_accessor'
metadata ADDED
@@ -0,0 +1,51 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: enju_accessor
3
+ version: !ruby/object:Gem::Version
4
+ version: '0.9'
5
+ platform: ruby
6
+ authors:
7
+ - Jin-Dong Kim
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-10-13 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A wrapper for Enju CGI service to convert the output to the PubAnnotation
14
+ JSON format.
15
+ email: jindong.kim@gmail.com
16
+ executables:
17
+ - enju_parse_text
18
+ - enju_tag_text
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - bin/enju_parse_text
23
+ - bin/enju_tag_text
24
+ - lib/enju_accessor.rb
25
+ - lib/enju_accessor/enju_accessor.rb
26
+ homepage: https://github.com/jdkim/enju_accessor
27
+ licenses:
28
+ - MIT
29
+ metadata: {}
30
+ post_install_message:
31
+ rdoc_options: []
32
+ require_paths:
33
+ - lib
34
+ required_ruby_version: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - ">="
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ required_rubygems_version: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ requirements: []
45
+ rubyforge_project:
46
+ rubygems_version: 2.6.11
47
+ signing_key:
48
+ specification_version: 4
49
+ summary: A wrapper for Enju CGI service to convert the output to the PubAnnotation
50
+ JSON format.
51
+ test_files: []