wlapi 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. data/INSTALL +0 -0
  2. data/LICENSE +19 -0
  3. data/README +5 -0
  4. data/doc/INSTALL.html +88 -0
  5. data/doc/LICENSE.html +113 -0
  6. data/doc/README.html +92 -0
  7. data/doc/WLAPI.html +1006 -0
  8. data/doc/bin/example_rb.html +54 -0
  9. data/doc/classes/WLAPI/API.html +463 -0
  10. data/doc/classes/WLAPI/API.src/M000001.html +51 -0
  11. data/doc/classes/WLAPI/API.src/M000002.html +18 -0
  12. data/doc/classes/WLAPI/API.src/M000003.html +18 -0
  13. data/doc/classes/WLAPI/API.src/M000004.html +18 -0
  14. data/doc/classes/WLAPI/API.src/M000005.html +20 -0
  15. data/doc/classes/WLAPI/API.src/M000006.html +19 -0
  16. data/doc/classes/WLAPI/API.src/M000007.html +20 -0
  17. data/doc/classes/WLAPI/API.src/M000008.html +19 -0
  18. data/doc/classes/WLAPI/API.src/M000009.html +19 -0
  19. data/doc/classes/WLAPI/API.src/M000010.html +19 -0
  20. data/doc/classes/WLAPI/API.src/M000011.html +18 -0
  21. data/doc/classes/WLAPI/API.src/M000012.html +18 -0
  22. data/doc/classes/WLAPI/API.src/M000013.html +18 -0
  23. data/doc/classes/WLAPI/API.src/M000014.html +18 -0
  24. data/doc/classes/WLAPI/API.src/M000015.html +18 -0
  25. data/doc/classes/WLAPI/API.src/M000016.html +18 -0
  26. data/doc/classes/WLAPI/API.src/M000017.html +18 -0
  27. data/doc/classes/WLAPI.html +121 -0
  28. data/doc/created.rid +1 -0
  29. data/doc/files/example/example_rb.html +119 -0
  30. data/doc/files/lib/wlapi/wlapi_rb.html +122 -0
  31. data/doc/fr_class_index.html +28 -0
  32. data/doc/fr_file_index.html +28 -0
  33. data/doc/fr_method_index.html +43 -0
  34. data/doc/index.html +24 -0
  35. data/doc/lib/wlapi_rb.html +59 -0
  36. data/doc/rdoc-style.css +208 -0
  37. data/doc/rdoc.css +706 -0
  38. data/example/example.rb +15 -0
  39. data/example/example.rb~ +2 -0
  40. data/lib/wlapi.rb +263 -0
  41. data/wlapi.gemspec +14 -0
  42. metadata +127 -0
data/lib/wlapi.rb ADDED
@@ -0,0 +1,263 @@
1
+ # 2010-, Andrei Beliankou
2
+ # Version: 0.3
3
+ # :title: Ruby based API for Wortschatz Leipzig web services
4
+
5
+
6
+ # SAVON is a SOAP client.
7
+ require 'savon'
8
+
9
+ # REXML is fast enough for our task.
10
+ require 'rexml/document'
11
+ include REXML
12
+
13
+ # This class represents an interface to the linguistic web services
14
+ # provided by the University of Leipzig.
15
+ #
16
+ # See the project 'Wortschatz Leipzig' for more details.
17
+ module WLAPI
18
+
19
+ class API
20
+
21
+ # At the creation point clients for all services are being instantiated.
22
+ # You can also set the login and the password (it defaults to 'anonymous').
23
+ def initialize(login = 'anonymous', pass = 'anonymous')
24
+ # This hash contains the URLs to the single services.
25
+ @services = {
26
+ 'Thesaurus' => 'http://wortschatz.uni-leipzig.de/axis/services/Thesaurus',
27
+ 'Baseform' => 'http://wortschatz.uni-leipzig.de/axis/services/Baseform',
28
+ 'Similarity' => 'http://wortschatz.uni-leipzig.de/axis/services/Similarity',
29
+ 'Synonyms' => 'http://wortschatz.uni-leipzig.de/axis/services/Synonyms',
30
+ 'Sachgebiet' => 'http://wortschatz.uni-leipzig.de/axis/services/Sachgebiet',
31
+ 'Frequencies' => 'http://wortschatz.uni-leipzig.de/axis/services/Frequencies',
32
+ 'Kookurrenzschnitt' => 'http://wortschatz.uni-leipzig.de/axis/services/Kookkurrenzschnitt',
33
+ 'ExperimentalSynonyms' => 'http://wortschatz.uni-leipzig.de/axis/services/ExperimentalSynonyms',
34
+ 'RightCollocationFinder' => 'http://wortschatz.uni-leipzig.de/axis/services/RightCollocationFinder',
35
+ 'LeftCollocationFinder' => 'http://wortschatz.uni-leipzig.de/axis/services/LeftCollocationFinder',
36
+ 'Wordforms' => 'http://wortschatz.uni-leipzig.de/axis/services/Wordforms',
37
+ 'CooccurrencesAll' => 'http://wortschatz.uni-leipzig.de/axis/services/CooccurrencesAll',
38
+ 'LeftNeighbours' => 'http://wortschatz.uni-leipzig.de/axis/services/LeftNeighbours',
39
+ 'RightNeighbours' => 'http://wortschatz.uni-leipzig.de/axis/services/RightNeighbours',
40
+ 'Sentences' => 'http://wortschatz.uni-leipzig.de/axis/services/Sentences',
41
+ 'Cooccurrences' => 'http://wortschatz.uni-leipzig.de/axis/services/Cooccurrences'
42
+ # no MARSService and Kreuzwortrraetsel
43
+ }
44
+
45
+ # cl short for client.
46
+ # Dynamically create all the clients and set access credentials.
47
+ # It can be a very bad idea to instantiate all the clients at once,
48
+ # we should investigate the typical user behaviour.
49
+ # If only one service is used in the separate session => rewrite the class!
50
+ @services.each do |key, val|
51
+ cl_name = '@cl_' + key
52
+ eval("#{cl_name} = Savon::Client.new(val)")
53
+ eval("#{cl_name}.request.basic_auth(login, pass)")
54
+ end
55
+
56
+ # Savon creates very verbose logs, switching off.
57
+ Savon::Request.log = false unless $DEBUG
58
+ end
59
+
60
+ # Main methods to access different services.
61
+ #
62
+ # You can define the limit for the result set, it defaults to 10.
63
+ # If you want to get all the results, you should provide a number,
64
+ # which would be greater than the result set since we cannot
65
+ # predict how many answers the server will give us. Just try it.
66
+ ##############################################################################
67
+
68
+ ## One parameter methods.
69
+ ##############################################################################
70
+
71
+ # Returns the frequency and frequency class of the input word.
72
+ # Frequency class is computed in relation to the most frequent word
73
+ # in the corpus. The higher the class, the rarer the word.
74
+ def frequencies
75
+ raise 'Not implemented yet!'
76
+ end
77
+
78
+ # This method gets the baseform (whatever it is :) not lemma).
79
+ # Returns the lemmatized (base) form of the input word.
80
+ def baseform
81
+ raise 'Not implemented yet!'
82
+ end
83
+
84
+ # Returns categories for a given input word.
85
+ # --
86
+ # Is it a good name? all names are in English, but here...
87
+ def sachgebiet
88
+ raise 'Not implemented yet!'
89
+ end
90
+
91
+ ## Two parameter methods.
92
+ ##############################################################################
93
+
94
+ # Returns all other word forms of the same lemma for a given word form .
95
+ # --
96
+ # malformed soap, investigate!!!
97
+ # doesn't function
98
+ def wordforms(word, limit)
99
+ raise 'Not implemented yet!' # remove later
100
+ answer = query(word, limit, @cl_Wordforms, @services['Wordforms'])
101
+ return get_answer(answer)
102
+ end
103
+
104
+ # As the Synonyms service returns synonyms of the given input word.
105
+ # However, this first builds a lemma of the input word
106
+ # and thus returns more synonyms.
107
+ def thesaurus(word, limit = 10)
108
+ answer = query(word, limit, @cl_Thesaurus, @services['Thesaurus'])
109
+ return get_answer(answer)
110
+ end
111
+
112
+ # This method searches for synonyms.
113
+ # Returns synonyms of the input word. In other words, this is a thesaurus.
114
+ def synonyms(word, limit = 10)
115
+ answer = query(word, limit, @cl_Synonyms, @services['Synonyms'])
116
+ # Synonym service provide multiple values, so we take only odd.
117
+ return get_answer(answer, '[position() mod 2 = 1 ]')
118
+ end
119
+
120
+ # ok, but results should be filtered
121
+ # Returns sample sentences containing the input word.
122
+ def sentences(word, limit)
123
+ answer = query(word, limit, @cl_Sentences, @services['Sentences'])
124
+ return get_answer(answer)
125
+ end
126
+
127
+ # For a given input word, returns statistically significant left neighbours
128
+ # (words co-occurring immediately to the left of the input word).
129
+ # --
130
+ # ok, but results should be filtered
131
+ def left_neighbours(word, limit)
132
+ answer = query(word, limit, @cl_LeftNeighbours, @services['LeftNeighbours'])
133
+ return get_answer(answer)
134
+ end
135
+
136
+ # For a given input word, returns statistically significant right neighbours
137
+ # (words co-occurring immediately to the right of the input word).
138
+ # --
139
+ # ok, but results should be filtered
140
+ def right_neighbours(word, limit)
141
+ answer = query(word, limit, @cl_RightNeighbours, @services['RightNeighbours'])
142
+ return get_answer(answer)
143
+ end
144
+
145
+
146
+ # Returns automatically computed contextually similar words of the input word.
147
+ # Such similar words may be antonyms, hyperonyms, synonyms,
148
+ # cohyponyms or other.
149
+ # Note that due to the huge amount of data any query to this services
150
+ # may take a long time.
151
+ def similarity
152
+ raise 'Not implemented yet!'
153
+ end
154
+
155
+ # This service delivers an experimental synonyms request for internal tests.
156
+ # --
157
+ # don't know, if we have to include this service...
158
+ def experimental_synonyms
159
+ raise 'Not implemented yet!'
160
+ end
161
+
162
+ ## Three parameter methods.
163
+ ##############################################################################
164
+
165
+ # Attempts to find linguistic collocations that occur to the right
166
+ # of the given input word.
167
+ # The parameter Wortart accepts four values A,V,N,S which stand for adjective,
168
+ # verb, noun and stopword, respectively.
169
+ # The parameter restricts the type of words found.
170
+ def right_collocation_finder
171
+ raise 'Not implemented yet!'
172
+ end
173
+
174
+ # Attempts to find linguistic collocations that occur to the left
175
+ # of the given input word.
176
+ # The parameter Wortart accepts four values A,V,N,S which stand for adjective,
177
+ # verb, noun and stopword, respectively.
178
+ # The parameter restricts the type of words found.
179
+ def left_collocation_finder
180
+ raise 'Not implemented yet!'
181
+ end
182
+
183
+ # Returns statistically significant co-occurrences of the input word.
184
+ def cooccurrences
185
+ raise 'Not implemented yet!'
186
+ end
187
+
188
+ # Returns statistically significant co-occurrences of the input word.
189
+ # However, it searches in the unrestricted version of the co-occurrences table
190
+ # as in the Cooccurrences services,
191
+ # which means significantly longer wait times.
192
+ def cooccurrences_all
193
+ raise 'Not implemented yet!'
194
+ end
195
+
196
+ # Returns the intersection of the co-occurrences of the two given words.
197
+ # The result set is ordered according to the sum of the significances
198
+ # in descending order. Note that due to the join involved,
199
+ # this make take some time.
200
+ def kookurrenzschnitt
201
+ raise 'Not implemented yet!'
202
+ end
203
+
204
+ private
205
+
206
+ # Main query method, it invokes the soap engine.
207
+ # This method combines all the data to one SOAP request and gets the answer.
208
+ # It is not compatible with one and three parameter methods yet.
209
+ def query(word, limit, cl, namespace)
210
+ # Calling the action with ! (disables the wsdl query).
211
+ # wsdl is disabled since calling the server for wsdl can last too long.
212
+ resp = cl.execute! do |soap|
213
+
214
+ # adding a namespace, wsdl is disabled
215
+ soap.namespace = namespace
216
+
217
+ body = <<EOF
218
+ <urn:objRequestParameters>
219
+ <urn:corpus>de</urn:corpus>
220
+ <urn:parameters>
221
+ <urn:dataVectors>
222
+ <dat:dataRow>Wort</dat:dataRow>
223
+ <dat:dataRow>#{word}</dat:dataRow>
224
+ </urn:dataVectors>
225
+ <urn:dataVectors>
226
+ <dat:dataRow>Limit</dat:dataRow>
227
+ <dat:dataRow>#{limit}</dat:dataRow>
228
+ </urn:dataVectors>
229
+ </urn:parameters>
230
+ </urn:objRequestParameters>
231
+ EOF
232
+
233
+ soap.body = body
234
+
235
+ soap.namespaces['xmlns:soapenv'] = "http://schemas.xmlsoap.org/soap/envelope/"
236
+ # Every service has a different namespace, but it doesn't matter.
237
+ soap.namespaces['xmlns:urn'] = "urn:#{namespace.sub(/.+ces\//, '')}"
238
+
239
+ soap.namespaces['xmlns:dat'] = "http://datatypes.webservice.wortschatz.uni_leipzig.de"
240
+ end
241
+
242
+ doc = Document.new(resp.to_xml)
243
+
244
+ STDERR.puts doc if $DEBUG
245
+
246
+ return doc
247
+
248
+ end
249
+
250
+ # This method extracts valuable data from the XML structure
251
+ # of the soap respense. It returns an array with extracted xml text nodes
252
+ # and prints out the same collection in the DEBUG mode.
253
+ def get_answer(doc, mod='')
254
+ result = []
255
+ # The path seems to be weird, but the namespaces change incrementally
256
+ # in the output, so I don't want to treat it here.
257
+ # A modifier needed because synonyms service provides duplicate values.
258
+ XPath.each(doc, "//result/*/*#{mod}") {|el| STDERR.puts el.text} if $DEBUG
259
+ XPath.each(doc, "//result/*/*#{mod}") {|el| result << el.text}
260
+ return result
261
+ end
262
+ end
263
+ end
data/wlapi.gemspec ADDED
@@ -0,0 +1,14 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "wlapi"
3
+ s.summary = "Use different web services by Wortschatz Lepzip Project."
4
+ s.description = File.read(File.join(File.dirname(__FILE__), 'README'))
5
+ s.requirements = [ 'Using this gem requires understanding of the linguistic background behind the WL project.' ]
6
+ s.version = "0.0.3"
7
+ s.author = "Andrei Beliankou"
8
+ s.email = "a.belenkow@uni-trier.de"
9
+ s.homepage = "http://www.uni-trier.de/index.php?id=34451"
10
+ s.add_dependency('savon')
11
+ s.platform = Gem::Platform::RUBY
12
+ s.required_ruby_version = '>=1.8'
13
+ s.files = Dir['**/**']
14
+ end
metadata ADDED
@@ -0,0 +1,127 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: wlapi
3
+ version: !ruby/object:Gem::Version
4
+ hash: 25
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 3
10
+ version: 0.0.3
11
+ platform: ruby
12
+ authors:
13
+ - Andrei Beliankou
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-11-14 00:00:00 +01:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: savon
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
32
+ version: "0"
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ description: |
36
+ == WLAPI is a simple API for Wortschatz Leipzig project.
37
+
38
+ See RDOC documentation for details.
39
+
40
+ You can find some invokation examples in example/example.rb
41
+
42
+ email: a.belenkow@uni-trier.de
43
+ executables: []
44
+
45
+ extensions: []
46
+
47
+ extra_rdoc_files: []
48
+
49
+ files:
50
+ - wlapi.gemspec
51
+ - doc/fr_class_index.html
52
+ - doc/classes/WLAPI.html
53
+ - doc/classes/WLAPI/API.src/M000007.html
54
+ - doc/classes/WLAPI/API.src/M000016.html
55
+ - doc/classes/WLAPI/API.src/M000006.html
56
+ - doc/classes/WLAPI/API.src/M000003.html
57
+ - doc/classes/WLAPI/API.src/M000011.html
58
+ - doc/classes/WLAPI/API.src/M000008.html
59
+ - doc/classes/WLAPI/API.src/M000017.html
60
+ - doc/classes/WLAPI/API.src/M000010.html
61
+ - doc/classes/WLAPI/API.src/M000012.html
62
+ - doc/classes/WLAPI/API.src/M000009.html
63
+ - doc/classes/WLAPI/API.src/M000013.html
64
+ - doc/classes/WLAPI/API.src/M000014.html
65
+ - doc/classes/WLAPI/API.src/M000005.html
66
+ - doc/classes/WLAPI/API.src/M000015.html
67
+ - doc/classes/WLAPI/API.src/M000004.html
68
+ - doc/classes/WLAPI/API.src/M000001.html
69
+ - doc/classes/WLAPI/API.src/M000002.html
70
+ - doc/classes/WLAPI/API.html
71
+ - doc/fr_method_index.html
72
+ - doc/rdoc.css
73
+ - doc/index.html
74
+ - doc/fr_file_index.html
75
+ - doc/README.html
76
+ - doc/created.rid
77
+ - doc/rdoc-style.css
78
+ - doc/WLAPI.html
79
+ - doc/INSTALL.html
80
+ - doc/bin/example_rb.html
81
+ - doc/files/example/example_rb.html
82
+ - doc/files/lib/wlapi/wlapi_rb.html
83
+ - doc/LICENSE.html
84
+ - doc/lib/wlapi_rb.html
85
+ - INSTALL
86
+ - LICENSE
87
+ - README
88
+ - example/example.rb~
89
+ - example/example.rb
90
+ - lib/wlapi.rb
91
+ has_rdoc: true
92
+ homepage: http://www.uni-trier.de/index.php?id=34451
93
+ licenses: []
94
+
95
+ post_install_message:
96
+ rdoc_options: []
97
+
98
+ require_paths:
99
+ - lib
100
+ required_ruby_version: !ruby/object:Gem::Requirement
101
+ none: false
102
+ requirements:
103
+ - - ">="
104
+ - !ruby/object:Gem::Version
105
+ hash: 31
106
+ segments:
107
+ - 1
108
+ - 8
109
+ version: "1.8"
110
+ required_rubygems_version: !ruby/object:Gem::Requirement
111
+ none: false
112
+ requirements:
113
+ - - ">="
114
+ - !ruby/object:Gem::Version
115
+ hash: 3
116
+ segments:
117
+ - 0
118
+ version: "0"
119
+ requirements:
120
+ - Using this gem requires understanding of the linguistic background behind the WL project.
121
+ rubyforge_project:
122
+ rubygems_version: 1.3.7
123
+ signing_key:
124
+ specification_version: 3
125
+ summary: Use different web services by Wortschatz Lepzip Project.
126
+ test_files: []
127
+