wlapi 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/INSTALL +0 -0
- data/LICENSE +19 -0
- data/README +5 -0
- data/doc/INSTALL.html +88 -0
- data/doc/LICENSE.html +113 -0
- data/doc/README.html +92 -0
- data/doc/WLAPI.html +1006 -0
- data/doc/bin/example_rb.html +54 -0
- data/doc/classes/WLAPI/API.html +463 -0
- data/doc/classes/WLAPI/API.src/M000001.html +51 -0
- data/doc/classes/WLAPI/API.src/M000002.html +18 -0
- data/doc/classes/WLAPI/API.src/M000003.html +18 -0
- data/doc/classes/WLAPI/API.src/M000004.html +18 -0
- data/doc/classes/WLAPI/API.src/M000005.html +20 -0
- data/doc/classes/WLAPI/API.src/M000006.html +19 -0
- data/doc/classes/WLAPI/API.src/M000007.html +20 -0
- data/doc/classes/WLAPI/API.src/M000008.html +19 -0
- data/doc/classes/WLAPI/API.src/M000009.html +19 -0
- data/doc/classes/WLAPI/API.src/M000010.html +19 -0
- data/doc/classes/WLAPI/API.src/M000011.html +18 -0
- data/doc/classes/WLAPI/API.src/M000012.html +18 -0
- data/doc/classes/WLAPI/API.src/M000013.html +18 -0
- data/doc/classes/WLAPI/API.src/M000014.html +18 -0
- data/doc/classes/WLAPI/API.src/M000015.html +18 -0
- data/doc/classes/WLAPI/API.src/M000016.html +18 -0
- data/doc/classes/WLAPI/API.src/M000017.html +18 -0
- data/doc/classes/WLAPI.html +121 -0
- data/doc/created.rid +1 -0
- data/doc/files/example/example_rb.html +119 -0
- data/doc/files/lib/wlapi/wlapi_rb.html +122 -0
- data/doc/fr_class_index.html +28 -0
- data/doc/fr_file_index.html +28 -0
- data/doc/fr_method_index.html +43 -0
- data/doc/index.html +24 -0
- data/doc/lib/wlapi_rb.html +59 -0
- data/doc/rdoc-style.css +208 -0
- data/doc/rdoc.css +706 -0
- data/example/example.rb +15 -0
- data/example/example.rb~ +2 -0
- data/lib/wlapi.rb +263 -0
- data/wlapi.gemspec +14 -0
- metadata +127 -0
data/lib/wlapi.rb
ADDED
@@ -0,0 +1,263 @@
|
|
1
|
+
# 2010-, Andrei Beliankou
|
2
|
+
# Version: 0.3
|
3
|
+
# :title: Ruby based API for Wortschatz Leipzig web services
|
4
|
+
|
5
|
+
|
6
|
+
# SAVON is a SOAP client.
|
7
|
+
require 'savon'
|
8
|
+
|
9
|
+
# REXML is fast enough for our task.
|
10
|
+
require 'rexml/document'
|
11
|
+
include REXML
|
12
|
+
|
13
|
+
# This class represents an interface to the linguistic web services
|
14
|
+
# provided by the University of Leipzig.
|
15
|
+
#
|
16
|
+
# See the project 'Wortschatz Leipzig' for more details.
|
17
|
+
module WLAPI
|
18
|
+
|
19
|
+
class API
|
20
|
+
|
21
|
+
# At the creation point clients for all services are being instantiated.
|
22
|
+
# You can also set the login and the password (it defaults to 'anonymous').
|
23
|
+
def initialize(login = 'anonymous', pass = 'anonymous')
|
24
|
+
# This hash contains the URLs to the single services.
|
25
|
+
@services = {
|
26
|
+
'Thesaurus' => 'http://wortschatz.uni-leipzig.de/axis/services/Thesaurus',
|
27
|
+
'Baseform' => 'http://wortschatz.uni-leipzig.de/axis/services/Baseform',
|
28
|
+
'Similarity' => 'http://wortschatz.uni-leipzig.de/axis/services/Similarity',
|
29
|
+
'Synonyms' => 'http://wortschatz.uni-leipzig.de/axis/services/Synonyms',
|
30
|
+
'Sachgebiet' => 'http://wortschatz.uni-leipzig.de/axis/services/Sachgebiet',
|
31
|
+
'Frequencies' => 'http://wortschatz.uni-leipzig.de/axis/services/Frequencies',
|
32
|
+
'Kookurrenzschnitt' => 'http://wortschatz.uni-leipzig.de/axis/services/Kookkurrenzschnitt',
|
33
|
+
'ExperimentalSynonyms' => 'http://wortschatz.uni-leipzig.de/axis/services/ExperimentalSynonyms',
|
34
|
+
'RightCollocationFinder' => 'http://wortschatz.uni-leipzig.de/axis/services/RightCollocationFinder',
|
35
|
+
'LeftCollocationFinder' => 'http://wortschatz.uni-leipzig.de/axis/services/LeftCollocationFinder',
|
36
|
+
'Wordforms' => 'http://wortschatz.uni-leipzig.de/axis/services/Wordforms',
|
37
|
+
'CooccurrencesAll' => 'http://wortschatz.uni-leipzig.de/axis/services/CooccurrencesAll',
|
38
|
+
'LeftNeighbours' => 'http://wortschatz.uni-leipzig.de/axis/services/LeftNeighbours',
|
39
|
+
'RightNeighbours' => 'http://wortschatz.uni-leipzig.de/axis/services/RightNeighbours',
|
40
|
+
'Sentences' => 'http://wortschatz.uni-leipzig.de/axis/services/Sentences',
|
41
|
+
'Cooccurrences' => 'http://wortschatz.uni-leipzig.de/axis/services/Cooccurrences'
|
42
|
+
# no MARSService and Kreuzwortrraetsel
|
43
|
+
}
|
44
|
+
|
45
|
+
# cl short for client.
|
46
|
+
# Dynamically create all the clients and set access credentials.
|
47
|
+
# It can be a very bad idea to instantiate all the clients at once,
|
48
|
+
# we should investigate the typical user behaviour.
|
49
|
+
# If only one service is used in the separate session => rewrite the class!
|
50
|
+
@services.each do |key, val|
|
51
|
+
cl_name = '@cl_' + key
|
52
|
+
eval("#{cl_name} = Savon::Client.new(val)")
|
53
|
+
eval("#{cl_name}.request.basic_auth(login, pass)")
|
54
|
+
end
|
55
|
+
|
56
|
+
# Savon creates very verbose logs, switching off.
|
57
|
+
Savon::Request.log = false unless $DEBUG
|
58
|
+
end
|
59
|
+
|
60
|
+
# Main methods to access different services.
|
61
|
+
#
|
62
|
+
# You can define the limit for the result set, it defaults to 10.
|
63
|
+
# If you want to get all the results, you should provide a number,
|
64
|
+
# which would be greater than the result set since we cannot
|
65
|
+
# predict how many answers the server will give us. Just try it.
|
66
|
+
##############################################################################
|
67
|
+
|
68
|
+
## One parameter methods.
|
69
|
+
##############################################################################
|
70
|
+
|
71
|
+
# Returns the frequency and frequency class of the input word.
|
72
|
+
# Frequency class is computed in relation to the most frequent word
|
73
|
+
# in the corpus. The higher the class, the rarer the word.
|
74
|
+
def frequencies
|
75
|
+
raise 'Not implemented yet!'
|
76
|
+
end
|
77
|
+
|
78
|
+
# This method gets the baseform (whatever it is :) not lemma).
|
79
|
+
# Returns the lemmatized (base) form of the input word.
|
80
|
+
def baseform
|
81
|
+
raise 'Not implemented yet!'
|
82
|
+
end
|
83
|
+
|
84
|
+
# Returns categories for a given input word.
|
85
|
+
# --
|
86
|
+
# Is it a good name? all names are in English, but here...
|
87
|
+
def sachgebiet
|
88
|
+
raise 'Not implemented yet!'
|
89
|
+
end
|
90
|
+
|
91
|
+
## Two parameter methods.
|
92
|
+
##############################################################################
|
93
|
+
|
94
|
+
# Returns all other word forms of the same lemma for a given word form .
|
95
|
+
# --
|
96
|
+
# malformed soap, investigate!!!
|
97
|
+
# doesn't function
|
98
|
+
def wordforms(word, limit)
|
99
|
+
raise 'Not implemented yet!' # remove later
|
100
|
+
answer = query(word, limit, @cl_Wordforms, @services['Wordforms'])
|
101
|
+
return get_answer(answer)
|
102
|
+
end
|
103
|
+
|
104
|
+
# As the Synonyms service returns synonyms of the given input word.
|
105
|
+
# However, this first builds a lemma of the input word
|
106
|
+
# and thus returns more synonyms.
|
107
|
+
def thesaurus(word, limit = 10)
|
108
|
+
answer = query(word, limit, @cl_Thesaurus, @services['Thesaurus'])
|
109
|
+
return get_answer(answer)
|
110
|
+
end
|
111
|
+
|
112
|
+
# This method searches for synonyms.
|
113
|
+
# Returns synonyms of the input word. In other words, this is a thesaurus.
|
114
|
+
def synonyms(word, limit = 10)
|
115
|
+
answer = query(word, limit, @cl_Synonyms, @services['Synonyms'])
|
116
|
+
# Synonym service provide multiple values, so we take only odd.
|
117
|
+
return get_answer(answer, '[position() mod 2 = 1 ]')
|
118
|
+
end
|
119
|
+
|
120
|
+
# ok, but results should be filtered
|
121
|
+
# Returns sample sentences containing the input word.
|
122
|
+
def sentences(word, limit)
|
123
|
+
answer = query(word, limit, @cl_Sentences, @services['Sentences'])
|
124
|
+
return get_answer(answer)
|
125
|
+
end
|
126
|
+
|
127
|
+
# For a given input word, returns statistically significant left neighbours
|
128
|
+
# (words co-occurring immediately to the left of the input word).
|
129
|
+
# --
|
130
|
+
# ok, but results should be filtered
|
131
|
+
def left_neighbours(word, limit)
|
132
|
+
answer = query(word, limit, @cl_LeftNeighbours, @services['LeftNeighbours'])
|
133
|
+
return get_answer(answer)
|
134
|
+
end
|
135
|
+
|
136
|
+
# For a given input word, returns statistically significant right neighbours
|
137
|
+
# (words co-occurring immediately to the right of the input word).
|
138
|
+
# --
|
139
|
+
# ok, but results should be filtered
|
140
|
+
def right_neighbours(word, limit)
|
141
|
+
answer = query(word, limit, @cl_RightNeighbours, @services['RightNeighbours'])
|
142
|
+
return get_answer(answer)
|
143
|
+
end
|
144
|
+
|
145
|
+
|
146
|
+
# Returns automatically computed contextually similar words of the input word.
|
147
|
+
# Such similar words may be antonyms, hyperonyms, synonyms,
|
148
|
+
# cohyponyms or other.
|
149
|
+
# Note that due to the huge amount of data any query to this services
|
150
|
+
# may take a long time.
|
151
|
+
def similarity
|
152
|
+
raise 'Not implemented yet!'
|
153
|
+
end
|
154
|
+
|
155
|
+
# This service delivers an experimental synonyms request for internal tests.
|
156
|
+
# --
|
157
|
+
# don't know, if we have to include this service...
|
158
|
+
def experimental_synonyms
|
159
|
+
raise 'Not implemented yet!'
|
160
|
+
end
|
161
|
+
|
162
|
+
## Three parameter methods.
|
163
|
+
##############################################################################
|
164
|
+
|
165
|
+
# Attempts to find linguistic collocations that occur to the right
|
166
|
+
# of the given input word.
|
167
|
+
# The parameter Wortart accepts four values A,V,N,S which stand for adjective,
|
168
|
+
# verb, noun and stopword, respectively.
|
169
|
+
# The parameter restricts the type of words found.
|
170
|
+
def right_collocation_finder
|
171
|
+
raise 'Not implemented yet!'
|
172
|
+
end
|
173
|
+
|
174
|
+
# Attempts to find linguistic collocations that occur to the left
|
175
|
+
# of the given input word.
|
176
|
+
# The parameter Wortart accepts four values A,V,N,S which stand for adjective,
|
177
|
+
# verb, noun and stopword, respectively.
|
178
|
+
# The parameter restricts the type of words found.
|
179
|
+
def left_collocation_finder
|
180
|
+
raise 'Not implemented yet!'
|
181
|
+
end
|
182
|
+
|
183
|
+
# Returns statistically significant co-occurrences of the input word.
|
184
|
+
def cooccurrences
|
185
|
+
raise 'Not implemented yet!'
|
186
|
+
end
|
187
|
+
|
188
|
+
# Returns statistically significant co-occurrences of the input word.
|
189
|
+
# However, it searches in the unrestricted version of the co-occurrences table
|
190
|
+
# as in the Cooccurrences services,
|
191
|
+
# which means significantly longer wait times.
|
192
|
+
def cooccurrences_all
|
193
|
+
raise 'Not implemented yet!'
|
194
|
+
end
|
195
|
+
|
196
|
+
# Returns the intersection of the co-occurrences of the two given words.
|
197
|
+
# The result set is ordered according to the sum of the significances
|
198
|
+
# in descending order. Note that due to the join involved,
|
199
|
+
# this make take some time.
|
200
|
+
def kookurrenzschnitt
|
201
|
+
raise 'Not implemented yet!'
|
202
|
+
end
|
203
|
+
|
204
|
+
private
|
205
|
+
|
206
|
+
# Main query method, it invokes the soap engine.
|
207
|
+
# This method combines all the data to one SOAP request and gets the answer.
|
208
|
+
# It is not compatible with one and three parameter methods yet.
|
209
|
+
def query(word, limit, cl, namespace)
|
210
|
+
# Calling the action with ! (disables the wsdl query).
|
211
|
+
# wsdl is disabled since calling the server for wsdl can last too long.
|
212
|
+
resp = cl.execute! do |soap|
|
213
|
+
|
214
|
+
# adding a namespace, wsdl is disabled
|
215
|
+
soap.namespace = namespace
|
216
|
+
|
217
|
+
body = <<EOF
|
218
|
+
<urn:objRequestParameters>
|
219
|
+
<urn:corpus>de</urn:corpus>
|
220
|
+
<urn:parameters>
|
221
|
+
<urn:dataVectors>
|
222
|
+
<dat:dataRow>Wort</dat:dataRow>
|
223
|
+
<dat:dataRow>#{word}</dat:dataRow>
|
224
|
+
</urn:dataVectors>
|
225
|
+
<urn:dataVectors>
|
226
|
+
<dat:dataRow>Limit</dat:dataRow>
|
227
|
+
<dat:dataRow>#{limit}</dat:dataRow>
|
228
|
+
</urn:dataVectors>
|
229
|
+
</urn:parameters>
|
230
|
+
</urn:objRequestParameters>
|
231
|
+
EOF
|
232
|
+
|
233
|
+
soap.body = body
|
234
|
+
|
235
|
+
soap.namespaces['xmlns:soapenv'] = "http://schemas.xmlsoap.org/soap/envelope/"
|
236
|
+
# Every service has a different namespace, but it doesn't matter.
|
237
|
+
soap.namespaces['xmlns:urn'] = "urn:#{namespace.sub(/.+ces\//, '')}"
|
238
|
+
|
239
|
+
soap.namespaces['xmlns:dat'] = "http://datatypes.webservice.wortschatz.uni_leipzig.de"
|
240
|
+
end
|
241
|
+
|
242
|
+
doc = Document.new(resp.to_xml)
|
243
|
+
|
244
|
+
STDERR.puts doc if $DEBUG
|
245
|
+
|
246
|
+
return doc
|
247
|
+
|
248
|
+
end
|
249
|
+
|
250
|
+
# This method extracts valuable data from the XML structure
|
251
|
+
# of the soap respense. It returns an array with extracted xml text nodes
|
252
|
+
# and prints out the same collection in the DEBUG mode.
|
253
|
+
def get_answer(doc, mod='')
|
254
|
+
result = []
|
255
|
+
# The path seems to be weird, but the namespaces change incrementally
|
256
|
+
# in the output, so I don't want to treat it here.
|
257
|
+
# A modifier needed because synonyms service provides duplicate values.
|
258
|
+
XPath.each(doc, "//result/*/*#{mod}") {|el| STDERR.puts el.text} if $DEBUG
|
259
|
+
XPath.each(doc, "//result/*/*#{mod}") {|el| result << el.text}
|
260
|
+
return result
|
261
|
+
end
|
262
|
+
end
|
263
|
+
end
|
data/wlapi.gemspec
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = "wlapi"
|
3
|
+
s.summary = "Use different web services by Wortschatz Lepzip Project."
|
4
|
+
s.description = File.read(File.join(File.dirname(__FILE__), 'README'))
|
5
|
+
s.requirements = [ 'Using this gem requires understanding of the linguistic background behind the WL project.' ]
|
6
|
+
s.version = "0.0.3"
|
7
|
+
s.author = "Andrei Beliankou"
|
8
|
+
s.email = "a.belenkow@uni-trier.de"
|
9
|
+
s.homepage = "http://www.uni-trier.de/index.php?id=34451"
|
10
|
+
s.add_dependency('savon')
|
11
|
+
s.platform = Gem::Platform::RUBY
|
12
|
+
s.required_ruby_version = '>=1.8'
|
13
|
+
s.files = Dir['**/**']
|
14
|
+
end
|
metadata
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: wlapi
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 25
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 3
|
10
|
+
version: 0.0.3
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Andrei Beliankou
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-11-14 00:00:00 +01:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: savon
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
version: "0"
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
35
|
+
description: |
|
36
|
+
== WLAPI is a simple API for Wortschatz Leipzig project.
|
37
|
+
|
38
|
+
See RDOC documentation for details.
|
39
|
+
|
40
|
+
You can find some invokation examples in example/example.rb
|
41
|
+
|
42
|
+
email: a.belenkow@uni-trier.de
|
43
|
+
executables: []
|
44
|
+
|
45
|
+
extensions: []
|
46
|
+
|
47
|
+
extra_rdoc_files: []
|
48
|
+
|
49
|
+
files:
|
50
|
+
- wlapi.gemspec
|
51
|
+
- doc/fr_class_index.html
|
52
|
+
- doc/classes/WLAPI.html
|
53
|
+
- doc/classes/WLAPI/API.src/M000007.html
|
54
|
+
- doc/classes/WLAPI/API.src/M000016.html
|
55
|
+
- doc/classes/WLAPI/API.src/M000006.html
|
56
|
+
- doc/classes/WLAPI/API.src/M000003.html
|
57
|
+
- doc/classes/WLAPI/API.src/M000011.html
|
58
|
+
- doc/classes/WLAPI/API.src/M000008.html
|
59
|
+
- doc/classes/WLAPI/API.src/M000017.html
|
60
|
+
- doc/classes/WLAPI/API.src/M000010.html
|
61
|
+
- doc/classes/WLAPI/API.src/M000012.html
|
62
|
+
- doc/classes/WLAPI/API.src/M000009.html
|
63
|
+
- doc/classes/WLAPI/API.src/M000013.html
|
64
|
+
- doc/classes/WLAPI/API.src/M000014.html
|
65
|
+
- doc/classes/WLAPI/API.src/M000005.html
|
66
|
+
- doc/classes/WLAPI/API.src/M000015.html
|
67
|
+
- doc/classes/WLAPI/API.src/M000004.html
|
68
|
+
- doc/classes/WLAPI/API.src/M000001.html
|
69
|
+
- doc/classes/WLAPI/API.src/M000002.html
|
70
|
+
- doc/classes/WLAPI/API.html
|
71
|
+
- doc/fr_method_index.html
|
72
|
+
- doc/rdoc.css
|
73
|
+
- doc/index.html
|
74
|
+
- doc/fr_file_index.html
|
75
|
+
- doc/README.html
|
76
|
+
- doc/created.rid
|
77
|
+
- doc/rdoc-style.css
|
78
|
+
- doc/WLAPI.html
|
79
|
+
- doc/INSTALL.html
|
80
|
+
- doc/bin/example_rb.html
|
81
|
+
- doc/files/example/example_rb.html
|
82
|
+
- doc/files/lib/wlapi/wlapi_rb.html
|
83
|
+
- doc/LICENSE.html
|
84
|
+
- doc/lib/wlapi_rb.html
|
85
|
+
- INSTALL
|
86
|
+
- LICENSE
|
87
|
+
- README
|
88
|
+
- example/example.rb~
|
89
|
+
- example/example.rb
|
90
|
+
- lib/wlapi.rb
|
91
|
+
has_rdoc: true
|
92
|
+
homepage: http://www.uni-trier.de/index.php?id=34451
|
93
|
+
licenses: []
|
94
|
+
|
95
|
+
post_install_message:
|
96
|
+
rdoc_options: []
|
97
|
+
|
98
|
+
require_paths:
|
99
|
+
- lib
|
100
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
101
|
+
none: false
|
102
|
+
requirements:
|
103
|
+
- - ">="
|
104
|
+
- !ruby/object:Gem::Version
|
105
|
+
hash: 31
|
106
|
+
segments:
|
107
|
+
- 1
|
108
|
+
- 8
|
109
|
+
version: "1.8"
|
110
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
111
|
+
none: false
|
112
|
+
requirements:
|
113
|
+
- - ">="
|
114
|
+
- !ruby/object:Gem::Version
|
115
|
+
hash: 3
|
116
|
+
segments:
|
117
|
+
- 0
|
118
|
+
version: "0"
|
119
|
+
requirements:
|
120
|
+
- Using this gem requires understanding of the linguistic background behind the WL project.
|
121
|
+
rubyforge_project:
|
122
|
+
rubygems_version: 1.3.7
|
123
|
+
signing_key:
|
124
|
+
specification_version: 3
|
125
|
+
summary: Use different web services by Wortschatz Lepzip Project.
|
126
|
+
test_files: []
|
127
|
+
|