RubyGems - wlapi - Versions diffs - 0.0.6 → 0.7.1 - Mend

wlapi 0.0.6 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

data/INSTALL +5 -0
data/README +11 -5
data/README.rdoc +22 -0
data/Rakefile +14 -0
data/lib/wlapi/api.rb +354 -0
data/lib/wlapi.rb +1 -267
data/test/test_api.rb +233 -0
data/wlapi.gemspec +9 -7
metadata +14 -47
data/doc/INSTALL.html +0 -90
data/doc/LICENSE.html +0 -115
data/doc/README.html +0 -119
data/doc/WLAPI/API.html +0 -1008
data/doc/WLAPI.html +0 -155
data/doc/created.rid +0 -6
data/doc/example/example_rb.html +0 -65
data/doc/images/brick.png +0 -0
data/doc/images/brick_link.png +0 -0
data/doc/images/bug.png +0 -0
data/doc/images/bullet_black.png +0 -0
data/doc/images/bullet_toggle_minus.png +0 -0
data/doc/images/bullet_toggle_plus.png +0 -0
data/doc/images/date.png +0 -0
data/doc/images/find.png +0 -0
data/doc/images/loadingAnimation.gif +0 -0
data/doc/images/macFFBgHack.png +0 -0
data/doc/images/package.png +0 -0
data/doc/images/page_green.png +0 -0
data/doc/images/page_white_text.png +0 -0
data/doc/images/page_white_width.png +0 -0
data/doc/images/plugin.png +0 -0
data/doc/images/ruby.png +0 -0
data/doc/images/tag_green.png +0 -0
data/doc/images/wrench.png +0 -0
data/doc/images/wrench_orange.png +0 -0
data/doc/images/zoom.png +0 -0
data/doc/index.html +0 -131
data/doc/js/darkfish.js +0 -116
data/doc/js/jquery.js +0 -32
data/doc/js/quicksearch.js +0 -114
data/doc/js/thickbox-compressed.js +0 -10
data/doc/lib/wlapi_rb.html +0 -59
data/doc/rdoc.css +0 -706
data/example/example.rb +0 -17

data/INSTALL CHANGED Viewed

@@ -0,0 +1,5 @@
+Install this library using:
+  gem install wlapi-x.y.z.gem
+The appropriate .gem file you can find in the ./pkg directory.

data/README CHANGED Viewed

@@ -1,16 +1,22 @@
-= WLAPI
+WLAPI
-* http://rubygems.org/gems/wlapi
-* http://www.uni-trier.de/index.php?id=24140
+RubyGems (http://rubygems.org/gems/wlapi)
+Developers Homepage (http://www.uni-trier.de/index.php?id=24140)
+WLAPI Project Page (http://wlapi.rubyforge.org/)
-== DESCRIPTION
+DESCRIPTION
 WLAPI is a simple API for Wortschatz Leipzig project.
+SYNOPSIS
   $ require 'wlapi'
   $ api = WLAPI::API.new
   $ api.synonyms('Haus', 15) # returns an array with string values (UTF8 encoded)
 See RDOC documentation for details on particular methods.
-You can find some invocation examples in example/example.rb
+LICENSE
+WLAPI is a copyrighted software by Andrei Beliankou, 2010.
+You may use, redistribute and change it under the terms
+provided in the LICENSE file.

data/README.rdoc ADDED Viewed

@@ -0,0 +1,22 @@
+= WLAPI
+* {RubyGems}[http://rubygems.org/gems/wlapi]
+* Developers {Homepage}[http://www.uni-trier.de/index.php?id=24140]
+* {WLAPI Project Page}[http://wlapi.rubyforge.org/]
+== DESCRIPTION
+WLAPI is a simple API for Wortschatz Leipzig project.
+== SYNOPSIS
+  $ require 'wlapi'
+  $ api = WLAPI::API.new
+  $ api.synonyms('Haus', 15) # returns an array with string values (UTF8 encoded)
+See RDOC documentation for details on particular methods.
+== LICENSE
+WLAPI is a copyrighted software by Andrei Beliankou, 2010.
+You may use, redistribute and change it under the terms
+provided in the LICENSE file.

data/Rakefile ADDED Viewed

@@ -0,0 +1,14 @@
+require 'rubygems'
+require 'rake'
+# we can require 'rake/clean' to add 'clobber' and 'clean' tasks
+require 'rake/clean'
+require 'rake/testtask'
+SRC = FileList['*.rb']
+CLOBBER.include('doc', '**/*.html', '*.gem')
+Rake::TestTask.new do |t|
+  t.test_files = FileList['test/*.rb']
+end

data/lib/wlapi/api.rb ADDED Viewed

@@ -0,0 +1,354 @@
+# -*- coding: utf-8 -*-
+# 2010-, Andrei Beliankou
+# :title: Ruby based API for Wortschatz Leipzig web services
+# SAVON is a SOAP client.
+require 'savon'
+# REXML is fast enough for our task.
+require 'rexml/document'
+include REXML
+# Top level namespace wrapper for WLAPI
+module WLAPI
+  # This class represents an interface to the linguistic web services
+  # provided by the University of Leipzig.
+  #
+  # See the project 'Wortschatz Leipzig' for more details.
+  class API
+    # At the creation point clients for all services are being instantiated.
+    # You can also set the login and the password (it defaults to 'anonymous').
+    #   api = WLAPI::API.new
+    def initialize(login = 'anonymous', pass = 'anonymous')
+      # This hash contains the URLs to the single services.
+      endpoint = 'http://wortschatz.uni-leipzig.de/axis/services'
+      @services = {
+        'Thesaurus' => "#{endpoint}/Thesaurus",
+        'Baseform' => "#{endpoint}/Baseform",
+        'Similarity' => "#{endpoint}/Similarity",
+        'Synonyms' => "#{endpoint}/Synonyms",
+        'Sachgebiet' => "#{endpoint}/Sachgebiet",
+        'Frequencies' => "#{endpoint}/Frequencies",
+        'Kookurrenzschnitt' => "#{endpoint}/Kookkurrenzschnitt",
+        'ExperimentalSynonyms' => "#{endpoint}/ExperimentalSynonyms",
+        'RightCollocationFinder' => "#{endpoint}/RightCollocationFinder",
+        'LeftCollocationFinder' => "#{endpoint}/LeftCollocationFinder",
+        'Wordforms' => "#{endpoint}/Wordforms",
+        'CooccurrencesAll' => "#{endpoint}/CooccurrencesAll",
+        'LeftNeighbours' => "#{endpoint}/LeftNeighbours",
+        'RightNeighbours' => "#{endpoint}/RightNeighbours",
+        'Sentences' => "#{endpoint}/Sentences",
+        'Cooccurrences' => "#{endpoint}/Cooccurrences"
+        # no MARSService and Kreuzwortrraetsel
+      }
+      # cl short for client.
+      # Dynamically create all the clients and set access credentials.
+      # It can be a very bad idea to instantiate all the clients at once,
+      # we should investigate the typical user behaviour.
+      # If only one service is used in the separate session => rewrite the class!
+      @services.each do |key, val|
+        cl_name = '@cl_' + key
+        eval("#{cl_name} = Savon::Client.new(val)")
+        eval("#{cl_name}.request.basic_auth(login, pass)")
+      end
+      # Savon creates very verbose logs, switching off.
+      Savon::Request.log = false unless $DEBUG
+    end
+    # Main methods to access different services.
+    #
+    # You can define the limit for the result set, it defaults to 10.
+    # If you want to get all the results, you should provide a number,
+    # which would be greater than the result set since we cannot
+    # predict how many answers the server will give us. Just try it.
+    ##############################################################################
+    ## One parameter methods.
+    ##############################################################################
+    # Returns the frequency and frequency class of the input word.
+    # Frequency class is computed in relation to the most frequent word
+    # in the corpus. The higher the class, the rarer the word:
+    #   api.frequencies("Autos") => ["40614", "9"]
+    def frequencies(word)
+      arg1 = ['Wort', word]
+      answer = query(@cl_Frequencies, @services['Frequencies'], arg1)
+      return get_answer(answer)
+    end
+    # Gets the baseform (whatever it is :) not lemma).
+    # Returns the lemmatized (base) form of the input word
+    # and the POS tag in an array:
+    #   api.baseform("Auto") => ["Auto", "N"]
+    def baseform(word)
+      arg1 = ['Wort', word]
+      answer = query(@cl_Baseform, @services['Baseform'], arg1)
+      return get_answer(answer)
+    end
+    # Returns categories for a given input word as an array:
+    #   api.domain("Michael") => ["Vorname", "Nachname", "Männername"]
+    #--
+    # Is it a good name? all names are in English, but here..
+    # let's call it domain, not sachgebiet
+    def domain(word)
+      arg1 = ['Wort', word]
+      answer = query(@cl_Sachgebiet, @services['Sachgebiet'], arg1)
+      return get_answer(answer)
+    end
+    ## Two parameter methods.
+    ##############################################################################
+    # Returns all other word forms of the same lemma for a given word form.
+    #   api.wordforms("Auto") => ["Auto", "Autos"]
+    def wordforms(word, limit = 10)
+      # note, it is the only service which requires 'Word', not 'Wort'
+      arg1 = ['Word', word]
+      arg2 = ['Limit', limit]
+      answer = query(@cl_Wordforms, @services['Wordforms'], arg1, arg2)
+      return get_answer(answer)
+    end
+    # As the Synonyms service returns synonyms of the given input word.
+    # However, this first builds a lemma of the input word
+    # and thus returns more synonyms:
+    #   api.thesaurus("Auto") => ["Auto", "Bahn", "Wagen", "Zug", "Schiff", ...]
+    def thesaurus(word, limit = 10)
+      arg1 = ['Wort', word]
+      arg2 = ['Limit', limit]
+      answer = query(@cl_Thesaurus, @services['Thesaurus'], arg1, arg2)
+      return get_answer(answer)
+    end
+    # This method searches for synonyms.
+    # Returns synonyms of the input word. In other words, this is a thesaurus.
+    #   api.synonyms("Auto") => ["Kraftwagen", "Automobil", "Benzinkutsche", ...]
+    def synonyms(word, limit = 10)
+      arg1 = ['Wort', word]
+      arg2 = ['Limit', limit]
+      answer = query(@cl_Synonyms, @services['Synonyms'], arg1, arg2)
+      # Synonym service provide multiple values, so we take only odd.
+      return get_answer(answer, '[position() mod 2 = 1 ]')
+    end
+    # Returns sample sentences containing the input word.
+    # The return value is an array:
+    #   api.sentences("Auto") => ["40808144", "Zweitens der freche, frische Klang der Hupe
+    #   und drittens die hinreißend gestylten 16-Zoll-Felgen,
+    #   die es leider nur für dieses Auto gibt.", ...]
+    #--
+    # ok, but results should be filtered
+    def sentences(word, limit = 10)
+      arg1 = ['Wort', word]
+      arg2 = ['Limit', limit]
+      answer = query(@cl_Sentences, @services['Sentences'], arg1, arg2)
+      return get_answer(answer)
+    end
+    # For a given input word, returns statistically significant left neighbours
+    # (words co-occurring immediately to the left of the input word).
+    #   api.left_neighbours("Auto") => ["geparktes", "Auto", "561", ...]
+    #--
+    # ok, but results should be filtered
+    def left_neighbours(word, limit = 10)
+      arg1 = ['Wort', word]
+      arg2 = ['Limit', limit]
+      answer = query(@cl_LeftNeighbours, @services['LeftNeighbours'], arg1, arg2)
+      return get_answer(answer)
+    end
+    # For a given input word, returns statistically significant right neighbours
+    # (words co-occurring immediately to the right of the input word).
+    #   api.right_neighbours("Auto") => ["Auto", "erfaßt", "575", ...]
+    #--
+    # ok, but results should be filtered
+    def right_neighbours(word, limit = 10)
+      arg1 = ['Wort', word]
+      arg2 = ['Limit', limit]
+      answer = query(@cl_RightNeighbours, @services['RightNeighbours'], arg1, arg2)
+      return get_answer(answer)
+    end
+    # Returns automatically computed contextually similar words of the input word.
+    # Such similar words may be antonyms, hyperonyms, synonyms,
+    # cohyponyms or other.
+    # Note that due to the huge amount of data any query to this services
+    # may take a long time.
+    #   api.similarity("Auto") => ["Auto", "Wagen", "26", ...]
+    def similarity(word, limit = 10)
+      arg1 = ['Wort', word]
+      arg2 = ['Limit', limit]
+      answer = query(@cl_Similarity, @services['Similarity'], arg1, arg2)
+      return get_answer(answer)
+    end
+    # This service delivers an experimental synonyms request for internal tests.
+    #--
+    # don't know, if we have to include this service...
+    def experimental_synonyms(word, limit = 10)
+      arg1 = ['Wort', word]
+      arg2 = ['Limit', limit]
+      answer = query(@cl_ExperimentalSynonyms, @services['ExperimentalSynonyms'], arg1, arg2)
+      return get_answer(answer)
+    end
+    ## Three parameter methods.
+    ##############################################################################
+    # Attempts to find linguistic collocations that occur to the right
+    # of the given input word.
+    # The parameter 'Wortart' accepts four values 'A, V, N, S'
+    # which stand for adjective, verb, noun and stopword respectively.
+    # The parameter restricts the type of words found.
+    # It returns an array:
+    #   api.right_collocation_finder("Auto", "V", 10) => ["Auto", "abfackeln", "V", ...]
+    def right_collocation_finder(word, pos, limit = 10)
+      arg1 = ['Wort', word]
+      arg2 = ['Wortart', pos]
+      arg3 = ['Limit', limit]
+      answer = query(@cl_RightCollocationFinder, @services['RightCollocationFinder'], arg1, arg2, arg3)
+      return get_answer(answer)
+    end
+    # Attempts to find linguistic collocations that occur to the left
+    # of the given input word.
+    # The parameter 'Wortart' accepts four values 'A, V, N, S'
+    # which stand for adjective, verb, noun and stopword respectively.
+    # The parameter restricts the type of words found.
+    # It returns an array:
+    #   api.left_collocation_finder("Stuhl", "A", 10) => ["apostolisch", "A", "Stuhl", ...]
+    def left_collocation_finder(word, pos, limit = 10)
+      arg1 = ['Wort', word]
+      arg2 = ['Wortart', pos]
+      arg3 = ['Limit', limit]
+      answer = query(@cl_LeftCollocationFinder, @services['LeftCollocationFinder'], arg1, arg2, arg3)
+      return get_answer(answer)
+    end
+    # Returns statistically significant co-occurrences of the input word.
+    def cooccurrences(word, sign, limit = 10)
+      arg1 = ['Wort', word]
+      arg2 = ['Mindestsignifikanz', sign]
+      arg3 = ['Limit', limit]
+      raise 'Not implemented yet!'
+    end
+    # Returns statistically significant co-occurrences of the input word.
+    # However, it searches in the unrestricted version of the co-occurrences table
+    # as in the Cooccurrences services,
+    # which means significantly longer wait times.
+    def cooccurrences_all(word, sign, limit = 10)
+      arg1 = ['Wort', word]
+      arg2 = ['Mindestsignifikanz', sign]
+      arg3 = ['Limit', limit]
+      raise 'Not implemented yet!'
+    end
+    # Returns the intersection of the co-occurrences of the two given words.
+    # The result set is ordered according to the sum of the significances
+    # in descending order. Note that due to the join involved,
+    # this make take some time.
+    #--
+    # let's call it intersection, not kookurrenzschnitt
+    # is being used INTERN, we need additional credentials
+    def intersection(word1, word2, limit = 10)
+      arg1 = ['Wort 1', word1]
+      arg2 = ['Wort 2', word2]
+      arg3 = ['Limit', limit]
+      # we are not going to implement it now
+      raise 'Will never be implemented!'
+    end
+    private
+    # Main query method, it invokes the soap engine.
+    # This method combines all the data to one SOAP request and gets the answer.
+    # It is not compatible with one and three parameter methods yet.
+    # args contains an array [[key1, value1], [key2, value2], [key3, value3]]
+    # with keys and values for the soap query
+    def query(cl, namespace, *args)
+      # Calling the action with ! (disables the wsdl query).
+      # wsdl is disabled since calling the server for wsdl can last too long.
+      resp = cl.execute! do |soap|
+        # adding a namespace, wsdl is disabled
+        soap.namespace = namespace
+        soap.namespaces['xmlns:soapenv'] = "http://schemas.xmlsoap.org/soap/envelope/"
+        # Every service has a different namespace.
+        soap.namespaces['xmlns:urn'] = "urn:#{namespace.sub(/.+ces\//, '')}"
+        soap.namespaces['xmlns:dat'] = "http://datatypes.webservice.wortschatz.uni_leipzig.de"
+        body = "<urn:objRequestParameters>"
+        body << "<urn:corpus>de</urn:corpus>"
+        body << "<urn:parameters>"
+        # setting the first argument (usually 'Wort')
+        if args[0]
+        body << "<urn:dataVectors><dat:dataRow>#{args[0][0]}</dat:dataRow><dat:dataRow>#{args[0][1]}</dat:dataRow></urn:dataVectors>"
+        end
+        # setting the second argument (usually 'Limit')
+        if args[1]
+          body << "<urn:dataVectors><dat:dataRow>#{args[1][0]}</dat:dataRow><dat:dataRow>#{args[1][1]}</dat:dataRow></urn:dataVectors>"
+        end
+        # setting setting the third argument (no common value)
+        if args[2]
+          body << "<urn:dataVectors><dat:dataRow>#{args[2][0]}</dat:dataRow><dat:dataRow>#{args[2][1]}</dat:dataRow></urn:dataVectors>"
+        end
+        body << "</urn:parameters>"
+        body << "</urn:objRequestParameters>"
+        soap.body = body
+        STDERR.puts soap.to_xml if $DEBUG
+      end
+      doc = Document.new(resp.to_xml)
+      STDERR.puts doc if $DEBUG
+      return doc
+    end
+    # This method extracts valuable data from the XML structure
+    # of the soap response. It returns an array with extracted xml text nodes
+    # or nil, if the service provided no answer.
+    # The same collection is printed to stderr in the DEBUG mode.
+    #--
+    # TODO: what if the answer is empty?
+    def get_answer(doc, mod='')
+      result = []
+      # The path seems to be weird, because the namespaces change incrementally
+      # in the output, so I don't want to treat it here.
+      # A modifier needed because synonyms service provides duplicate values.
+      XPath.each(doc, "//result/*/*#{mod}") {|el| STDERR.puts el.text} if $DEBUG
+      XPath.each(doc, "//result/*/*#{mod}") {|el| result << el.text}
+      if result.empty?
+        return nil
+      end
+      return result
+    end
+  end # class
+end # module