RubyGems - enju_accessor - Versions diffs - 0.9 - Mend

enju_accessor 0.9

Files changed (6) hide show

checksums.yaml +7 -0
data/bin/enju_parse_text +9 -0
data/bin/enju_tag_text +9 -0
data/lib/enju_accessor/enju_accessor.rb +184 -0
data/lib/enju_accessor.rb +1 -0
metadata +51 -0

checksums.yaml ADDED Viewed

@@ -0,0 +1,7 @@
+---
+SHA1:
+  metadata.gz: 4d0b1471223e18688be92aa3b34ed6fb36d7522a
+  data.tar.gz: be48f7ad2f104c76c82ff130241a21d53dd24a53
+SHA512:
+  metadata.gz: d273b60be7d369ecd5fa41cea9f24f1c622c5907c3fba338f110d95166cd65f0002c0b1e94fab65bc02e4650c4d383e4f211b076194ae3a1004a152a8f143f2a
+  data.tar.gz: ae09b24bfe748f1bd46a55759d6f736ce50666fc2dd6d83c917877bc66266cfcdd414c589db9f9b1eaeabbfb4dcea370e2bcebdd5d511d751d822525de100a4f

data/bin/enju_parse_text ADDED Viewed

@@ -0,0 +1,9 @@
+#!/usr/bin/env ruby
+require 'enju_accessor'
+require 'json'
+enju = EnjuAccessor.new("http://localhost:38401/cgi-lilfes/enju")
+text = ARGF.read
+annotation = enju.parse_text(text)
+puts annotation.to_json

data/bin/enju_tag_text ADDED Viewed

@@ -0,0 +1,9 @@
+#!/usr/bin/env ruby
+require 'enju_accessor'
+require 'json'
+enju = EnjuAccessor.new("http://localhost:38401/cgi-lilfes/enju")
+text = ARGF.read
+annotation = enju.tag_text(text)
+puts annotation.to_json

data/lib/enju_accessor/enju_accessor.rb ADDED Viewed

@@ -0,0 +1,184 @@
+#!/usr/bin/env ruby
+require 'rest-client'
+require 'text_sentencer'
+require 'nokogiri'
+# An instance of this class holds the parsing result of a natural language query as anlyzed by Enju.
+class EnjuAccessor
+  def initialize(enju_cgi_url)
+    @enju_cgi = RestClient::Resource.new(enju_cgi_url)
+    @sentencer = TextSentencer.new
+    @tid_base, @rid_base = 0, 0
+  end
+  def get_parse (sentence)
+    begin
+      response = @enju_cgi.get :params => {:sentence=>sentence, :format=>'so'}
+    rescue => e
+      raise IOError, "Abnormal behavior of the Enju CGI server: #{e.message}."
+    end
+    parse = case response.code
+    when 200             # 200 means success
+      raise "Empty input." if response =~/^Empty line/
+      r = response.encode("ASCII-8BIT").force_encoding("UTF-8").to_s
+      read_parse(sentence, r)
+    else
+      raise IOError, "Abnormal response from the Enju CGI server."
+    end
+    parse
+  end
+  def read_parse (sentence, r)
+    toks = {}
+    cons = {}
+    adjustment = 0
+    # r is a parsing result in SO format.
+    lines = r.split(/\r?\n/)
+    idx = 0
+    lines.each do |line|  # for each line of analysis
+      b, e, attr_str = line.split(/\t/)
+      b = b.to_i
+      e = e.to_i
+      node = Nokogiri::HTML.parse('<node ' + attr_str + '>')
+      attrs = node.css('node').first.to_h
+      if attrs['tok'] == ""
+        base = attrs['base']
+        b += adjustment
+        base.each_char{|c| adjustment += (1 - c.bytesize) if c !~ /\p{ASCII}/}
+        e += adjustment
+        id = attrs['id']
+        pos = attrs['pos']
+        pos = attrs['base'] if [',', '.', ':', '(', ')', '``', '&apos;&apos;'].include?(pos)
+        pos.sub!('$', '-DOLLAR-')
+        pos = '-COLON-' if pos == 'HYPH'
+        toks[id] = {beg: b, end:e, word:sentence[b ... e], idx:idx, base:base, pos:pos, cat:attrs['cat'], args:{}}
+        toks[id][:args][:arg1] = attrs['arg1'] if attrs['arg1']
+        toks[id][:args][:arg2] = attrs['arg2'] if attrs['arg2']
+        toks[id][:args][:arg3] = attrs['arg3'] if attrs['arg3']
+        toks[id][:args][:mod] = attrs['mod'] if attrs['mod']
+        idx += 1
+      end
+    end
+    lines.each do |line|  # for each line of analysis
+      b, e, attr_str = line.split(/\t/)
+      b = b.to_i
+      e = e.to_i
+      node = Nokogiri::HTML.parse('<node ' + attr_str + '>')
+      attrs = node.css('node').first.to_h
+      if attrs['cons'] == ""
+        id = attrs['id']
+        head = attrs['head']
+        sem_head = attrs['sem_head']
+        cat = attrs['cat']
+        cons[id] = {head:head, sem_head: sem_head, cat:cat}
+      end
+    end
+    # puts sentence
+    # puts toks.map{|t| t.to_s}.join("\n")
+    # puts cons.map{|c| c.to_s}.join("\n")
+    # puts "-----"
+    # exit
+    [toks, cons]
+  end
+  def parse_sentence (sentence, offset_base = 0, mode = '')
+    @tid_base, @rid_base = 0, 0 unless mode == 'continue'
+    toks, cons = get_parse(sentence)
+    denotations = []
+    tid_mapping = {}
+    idx_last = 0
+    toks.each do |id, tok|
+      id = tid_mapping[id] = 'T' + (tok[:idx] + @tid_base).to_s
+      denotations << {id:id, span:{begin: tok[:beg] + offset_base, end: tok[:end] + offset_base}, obj: tok[:pos]}
+      idx_last = tok[:idx]
+    end
+    # puts toks.map{|t| t.to_s}.join("\n")
+    cons.each do |id, con|
+      thead = con[:sem_head]
+      thead = cons[thead][:sem_head] until thead.start_with?('t')
+      con[:thead] = thead
+    end
+    relations = []
+    rid_num = @rid_base
+    toks.each do |id, tok|
+      unless tok[:args].empty?
+        tok[:args].each do |type, arg|
+          arg = cons[arg][:thead] if arg.start_with?('c')
+          next if tid_mapping[arg].nil?
+          relations << {id: 'R' + rid_num.to_s, subj: tid_mapping[arg], obj: tid_mapping[id], pred: type.to_s.downcase + 'Of'}
+          rid_num += 1
+        end
+      end
+    end
+    @tid_base = @tid_base + idx_last + 1
+    @rid_base = rid_num
+    {:denotations => denotations, :relations => relations}
+  end
+  def tag_sentence (sentence, offset_base = 0, mode = '')
+    @id_base = 0 unless mode == 'continue'
+    get_parse(sentence)
+    denotations = []
+    idx_last = 0
+    @tokens.each do |token|
+      denotations << {id: 'P' + (token[:idx] + @id_base).to_s, span: {begin: token[:beg] + offset_base, end: token[:end] + offset_base}, obj: token[:pos]}
+      denotations << {id: 'B' + (token[:idx] + @id_base).to_s, span: {begin: token[:beg] + offset_base, end: token[:end] + offset_base}, obj: token[:base]}
+      idx_last = token[:idx]
+    end
+    @id_base = @id_base + idx_last + 1
+    {:denotations => denotations}
+  end
+  def parse_text (text)
+    segments = @sentencer.segment(text)
+    denotations, relations = [], []
+    segments.each_with_index do |s, i|
+      mode = (i == 0)? nil : 'continue'
+      annotation = parse_sentence(text[s[0]...s[1]], s[0], mode)
+      denotations += annotation[:denotations]
+      relations += annotation[:relations]
+    end
+    {:text=> text, :denotations => denotations, :relations => relations}
+  end
+  def tag_text (text)
+    segments = @sentencer.segment(text)
+    denotations = []
+    segments.each_with_index do |s, i|
+      mode = (i == 0)? nil : 'continue'
+      annotation = tag_sentence(text[s[0]...s[1]], s[0], mode)
+      denotations += annotation[:denotations]
+    end
+    {:text=> text, :denotations => denotations}
+  end
+end

data/lib/enju_accessor.rb ADDED Viewed

	@@ -0,0 +1 @@
1	+ require 'enju_accessor/enju_accessor'

metadata ADDED Viewed

@@ -0,0 +1,51 @@
+--- !ruby/object:Gem::Specification
+name: enju_accessor
+version: !ruby/object:Gem::Version
+  version: '0.9'
+platform: ruby
+authors:
+- Jin-Dong Kim
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2017-10-13 00:00:00.000000000 Z
+dependencies: []
+description: A wrapper for Enju CGI service to convert the output to the PubAnnotation
+  JSON format.
+email: jindong.kim@gmail.com
+executables:
+- enju_parse_text
+- enju_tag_text
+extensions: []
+extra_rdoc_files: []
+files:
+- bin/enju_parse_text
+- bin/enju_tag_text
+- lib/enju_accessor.rb
+- lib/enju_accessor/enju_accessor.rb
+homepage: https://github.com/jdkim/enju_accessor
+licenses:
+- MIT
+metadata: {}
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: '0'
+requirements: []
+rubyforge_project:
+rubygems_version: 2.6.11
+signing_key:
+specification_version: 4
+summary: A wrapper for Enju CGI service to convert the output to the PubAnnotation
+  JSON format.
+test_files: []