wlapi 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/INSTALL +0 -0
- data/LICENSE +19 -0
- data/README +5 -0
- data/doc/INSTALL.html +88 -0
- data/doc/LICENSE.html +113 -0
- data/doc/README.html +92 -0
- data/doc/WLAPI.html +1006 -0
- data/doc/bin/example_rb.html +54 -0
- data/doc/classes/WLAPI/API.html +463 -0
- data/doc/classes/WLAPI/API.src/M000001.html +51 -0
- data/doc/classes/WLAPI/API.src/M000002.html +18 -0
- data/doc/classes/WLAPI/API.src/M000003.html +18 -0
- data/doc/classes/WLAPI/API.src/M000004.html +18 -0
- data/doc/classes/WLAPI/API.src/M000005.html +20 -0
- data/doc/classes/WLAPI/API.src/M000006.html +19 -0
- data/doc/classes/WLAPI/API.src/M000007.html +20 -0
- data/doc/classes/WLAPI/API.src/M000008.html +19 -0
- data/doc/classes/WLAPI/API.src/M000009.html +19 -0
- data/doc/classes/WLAPI/API.src/M000010.html +19 -0
- data/doc/classes/WLAPI/API.src/M000011.html +18 -0
- data/doc/classes/WLAPI/API.src/M000012.html +18 -0
- data/doc/classes/WLAPI/API.src/M000013.html +18 -0
- data/doc/classes/WLAPI/API.src/M000014.html +18 -0
- data/doc/classes/WLAPI/API.src/M000015.html +18 -0
- data/doc/classes/WLAPI/API.src/M000016.html +18 -0
- data/doc/classes/WLAPI/API.src/M000017.html +18 -0
- data/doc/classes/WLAPI.html +121 -0
- data/doc/created.rid +1 -0
- data/doc/files/example/example_rb.html +119 -0
- data/doc/files/lib/wlapi/wlapi_rb.html +122 -0
- data/doc/fr_class_index.html +28 -0
- data/doc/fr_file_index.html +28 -0
- data/doc/fr_method_index.html +43 -0
- data/doc/index.html +24 -0
- data/doc/lib/wlapi_rb.html +59 -0
- data/doc/rdoc-style.css +208 -0
- data/doc/rdoc.css +706 -0
- data/example/example.rb +15 -0
- data/example/example.rb~ +2 -0
- data/lib/wlapi.rb +263 -0
- data/wlapi.gemspec +14 -0
- metadata +127 -0
data/lib/wlapi.rb
ADDED
@@ -0,0 +1,263 @@
|
|
1
|
+
# 2010-, Andrei Beliankou
|
2
|
+
# Version: 0.3
|
3
|
+
# :title: Ruby based API for Wortschatz Leipzig web services
|
4
|
+
|
5
|
+
|
6
|
+
# SAVON is a SOAP client.
|
7
|
+
require 'savon'
|
8
|
+
|
9
|
+
# REXML is fast enough for our task.
|
10
|
+
require 'rexml/document'
|
11
|
+
include REXML
|
12
|
+
|
13
|
+
# This class represents an interface to the linguistic web services
|
14
|
+
# provided by the University of Leipzig.
|
15
|
+
#
|
16
|
+
# See the project 'Wortschatz Leipzig' for more details.
|
17
|
+
module WLAPI
|
18
|
+
|
19
|
+
class API
|
20
|
+
|
21
|
+
# At the creation point clients for all services are being instantiated.
|
22
|
+
# You can also set the login and the password (it defaults to 'anonymous').
|
23
|
+
def initialize(login = 'anonymous', pass = 'anonymous')
|
24
|
+
# This hash contains the URLs to the single services.
|
25
|
+
@services = {
|
26
|
+
'Thesaurus' => 'http://wortschatz.uni-leipzig.de/axis/services/Thesaurus',
|
27
|
+
'Baseform' => 'http://wortschatz.uni-leipzig.de/axis/services/Baseform',
|
28
|
+
'Similarity' => 'http://wortschatz.uni-leipzig.de/axis/services/Similarity',
|
29
|
+
'Synonyms' => 'http://wortschatz.uni-leipzig.de/axis/services/Synonyms',
|
30
|
+
'Sachgebiet' => 'http://wortschatz.uni-leipzig.de/axis/services/Sachgebiet',
|
31
|
+
'Frequencies' => 'http://wortschatz.uni-leipzig.de/axis/services/Frequencies',
|
32
|
+
'Kookurrenzschnitt' => 'http://wortschatz.uni-leipzig.de/axis/services/Kookkurrenzschnitt',
|
33
|
+
'ExperimentalSynonyms' => 'http://wortschatz.uni-leipzig.de/axis/services/ExperimentalSynonyms',
|
34
|
+
'RightCollocationFinder' => 'http://wortschatz.uni-leipzig.de/axis/services/RightCollocationFinder',
|
35
|
+
'LeftCollocationFinder' => 'http://wortschatz.uni-leipzig.de/axis/services/LeftCollocationFinder',
|
36
|
+
'Wordforms' => 'http://wortschatz.uni-leipzig.de/axis/services/Wordforms',
|
37
|
+
'CooccurrencesAll' => 'http://wortschatz.uni-leipzig.de/axis/services/CooccurrencesAll',
|
38
|
+
'LeftNeighbours' => 'http://wortschatz.uni-leipzig.de/axis/services/LeftNeighbours',
|
39
|
+
'RightNeighbours' => 'http://wortschatz.uni-leipzig.de/axis/services/RightNeighbours',
|
40
|
+
'Sentences' => 'http://wortschatz.uni-leipzig.de/axis/services/Sentences',
|
41
|
+
'Cooccurrences' => 'http://wortschatz.uni-leipzig.de/axis/services/Cooccurrences'
|
42
|
+
# no MARSService and Kreuzwortrraetsel
|
43
|
+
}
|
44
|
+
|
45
|
+
# cl short for client.
|
46
|
+
# Dynamically create all the clients and set access credentials.
|
47
|
+
# It can be a very bad idea to instantiate all the clients at once,
|
48
|
+
# we should investigate the typical user behaviour.
|
49
|
+
# If only one service is used in the separate session => rewrite the class!
|
50
|
+
@services.each do |key, val|
|
51
|
+
cl_name = '@cl_' + key
|
52
|
+
eval("#{cl_name} = Savon::Client.new(val)")
|
53
|
+
eval("#{cl_name}.request.basic_auth(login, pass)")
|
54
|
+
end
|
55
|
+
|
56
|
+
# Savon creates very verbose logs, switching off.
|
57
|
+
Savon::Request.log = false unless $DEBUG
|
58
|
+
end
|
59
|
+
|
60
|
+
# Main methods to access different services.
|
61
|
+
#
|
62
|
+
# You can define the limit for the result set, it defaults to 10.
|
63
|
+
# If you want to get all the results, you should provide a number,
|
64
|
+
# which would be greater than the result set since we cannot
|
65
|
+
# predict how many answers the server will give us. Just try it.
|
66
|
+
##############################################################################
|
67
|
+
|
68
|
+
## One parameter methods.
|
69
|
+
##############################################################################
|
70
|
+
|
71
|
+
# Returns the frequency and frequency class of the input word.
|
72
|
+
# Frequency class is computed in relation to the most frequent word
|
73
|
+
# in the corpus. The higher the class, the rarer the word.
|
74
|
+
def frequencies
|
75
|
+
raise 'Not implemented yet!'
|
76
|
+
end
|
77
|
+
|
78
|
+
# This method gets the baseform (whatever it is :) not lemma).
|
79
|
+
# Returns the lemmatized (base) form of the input word.
|
80
|
+
def baseform
|
81
|
+
raise 'Not implemented yet!'
|
82
|
+
end
|
83
|
+
|
84
|
+
# Returns categories for a given input word.
|
85
|
+
# --
|
86
|
+
# Is it a good name? all names are in English, but here...
|
87
|
+
def sachgebiet
|
88
|
+
raise 'Not implemented yet!'
|
89
|
+
end
|
90
|
+
|
91
|
+
## Two parameter methods.
|
92
|
+
##############################################################################
|
93
|
+
|
94
|
+
# Returns all other word forms of the same lemma for a given word form .
|
95
|
+
# --
|
96
|
+
# malformed soap, investigate!!!
|
97
|
+
# doesn't function
|
98
|
+
def wordforms(word, limit)
|
99
|
+
raise 'Not implemented yet!' # remove later
|
100
|
+
answer = query(word, limit, @cl_Wordforms, @services['Wordforms'])
|
101
|
+
return get_answer(answer)
|
102
|
+
end
|
103
|
+
|
104
|
+
# As the Synonyms service returns synonyms of the given input word.
|
105
|
+
# However, this first builds a lemma of the input word
|
106
|
+
# and thus returns more synonyms.
|
107
|
+
def thesaurus(word, limit = 10)
|
108
|
+
answer = query(word, limit, @cl_Thesaurus, @services['Thesaurus'])
|
109
|
+
return get_answer(answer)
|
110
|
+
end
|
111
|
+
|
112
|
+
# This method searches for synonyms.
|
113
|
+
# Returns synonyms of the input word. In other words, this is a thesaurus.
|
114
|
+
def synonyms(word, limit = 10)
|
115
|
+
answer = query(word, limit, @cl_Synonyms, @services['Synonyms'])
|
116
|
+
# Synonym service provide multiple values, so we take only odd.
|
117
|
+
return get_answer(answer, '[position() mod 2 = 1 ]')
|
118
|
+
end
|
119
|
+
|
120
|
+
# ok, but results should be filtered
|
121
|
+
# Returns sample sentences containing the input word.
|
122
|
+
def sentences(word, limit)
|
123
|
+
answer = query(word, limit, @cl_Sentences, @services['Sentences'])
|
124
|
+
return get_answer(answer)
|
125
|
+
end
|
126
|
+
|
127
|
+
# For a given input word, returns statistically significant left neighbours
|
128
|
+
# (words co-occurring immediately to the left of the input word).
|
129
|
+
# --
|
130
|
+
# ok, but results should be filtered
|
131
|
+
def left_neighbours(word, limit)
|
132
|
+
answer = query(word, limit, @cl_LeftNeighbours, @services['LeftNeighbours'])
|
133
|
+
return get_answer(answer)
|
134
|
+
end
|
135
|
+
|
136
|
+
# For a given input word, returns statistically significant right neighbours
|
137
|
+
# (words co-occurring immediately to the right of the input word).
|
138
|
+
# --
|
139
|
+
# ok, but results should be filtered
|
140
|
+
def right_neighbours(word, limit)
|
141
|
+
answer = query(word, limit, @cl_RightNeighbours, @services['RightNeighbours'])
|
142
|
+
return get_answer(answer)
|
143
|
+
end
|
144
|
+
|
145
|
+
|
146
|
+
# Returns automatically computed contextually similar words of the input word.
|
147
|
+
# Such similar words may be antonyms, hyperonyms, synonyms,
|
148
|
+
# cohyponyms or other.
|
149
|
+
# Note that due to the huge amount of data any query to this services
|
150
|
+
# may take a long time.
|
151
|
+
def similarity
|
152
|
+
raise 'Not implemented yet!'
|
153
|
+
end
|
154
|
+
|
155
|
+
# This service delivers an experimental synonyms request for internal tests.
|
156
|
+
# --
|
157
|
+
# don't know, if we have to include this service...
|
158
|
+
def experimental_synonyms
|
159
|
+
raise 'Not implemented yet!'
|
160
|
+
end
|
161
|
+
|
162
|
+
## Three parameter methods.
|
163
|
+
##############################################################################
|
164
|
+
|
165
|
+
# Attempts to find linguistic collocations that occur to the right
|
166
|
+
# of the given input word.
|
167
|
+
# The parameter Wortart accepts four values A,V,N,S which stand for adjective,
|
168
|
+
# verb, noun and stopword, respectively.
|
169
|
+
# The parameter restricts the type of words found.
|
170
|
+
def right_collocation_finder
|
171
|
+
raise 'Not implemented yet!'
|
172
|
+
end
|
173
|
+
|
174
|
+
# Attempts to find linguistic collocations that occur to the left
|
175
|
+
# of the given input word.
|
176
|
+
# The parameter Wortart accepts four values A,V,N,S which stand for adjective,
|
177
|
+
# verb, noun and stopword, respectively.
|
178
|
+
# The parameter restricts the type of words found.
|
179
|
+
def left_collocation_finder
|
180
|
+
raise 'Not implemented yet!'
|
181
|
+
end
|
182
|
+
|
183
|
+
# Returns statistically significant co-occurrences of the input word.
|
184
|
+
def cooccurrences
|
185
|
+
raise 'Not implemented yet!'
|
186
|
+
end
|
187
|
+
|
188
|
+
# Returns statistically significant co-occurrences of the input word.
|
189
|
+
# However, it searches in the unrestricted version of the co-occurrences table
|
190
|
+
# as in the Cooccurrences services,
|
191
|
+
# which means significantly longer wait times.
|
192
|
+
def cooccurrences_all
|
193
|
+
raise 'Not implemented yet!'
|
194
|
+
end
|
195
|
+
|
196
|
+
# Returns the intersection of the co-occurrences of the two given words.
|
197
|
+
# The result set is ordered according to the sum of the significances
|
198
|
+
# in descending order. Note that due to the join involved,
|
199
|
+
# this make take some time.
|
200
|
+
def kookurrenzschnitt
|
201
|
+
raise 'Not implemented yet!'
|
202
|
+
end
|
203
|
+
|
204
|
+
private
|
205
|
+
|
206
|
+
# Main query method, it invokes the soap engine.
|
207
|
+
# This method combines all the data to one SOAP request and gets the answer.
|
208
|
+
# It is not compatible with one and three parameter methods yet.
|
209
|
+
def query(word, limit, cl, namespace)
|
210
|
+
# Calling the action with ! (disables the wsdl query).
|
211
|
+
# wsdl is disabled since calling the server for wsdl can last too long.
|
212
|
+
resp = cl.execute! do |soap|
|
213
|
+
|
214
|
+
# adding a namespace, wsdl is disabled
|
215
|
+
soap.namespace = namespace
|
216
|
+
|
217
|
+
body = <<EOF
|
218
|
+
<urn:objRequestParameters>
|
219
|
+
<urn:corpus>de</urn:corpus>
|
220
|
+
<urn:parameters>
|
221
|
+
<urn:dataVectors>
|
222
|
+
<dat:dataRow>Wort</dat:dataRow>
|
223
|
+
<dat:dataRow>#{word}</dat:dataRow>
|
224
|
+
</urn:dataVectors>
|
225
|
+
<urn:dataVectors>
|
226
|
+
<dat:dataRow>Limit</dat:dataRow>
|
227
|
+
<dat:dataRow>#{limit}</dat:dataRow>
|
228
|
+
</urn:dataVectors>
|
229
|
+
</urn:parameters>
|
230
|
+
</urn:objRequestParameters>
|
231
|
+
EOF
|
232
|
+
|
233
|
+
soap.body = body
|
234
|
+
|
235
|
+
soap.namespaces['xmlns:soapenv'] = "http://schemas.xmlsoap.org/soap/envelope/"
|
236
|
+
# Every service has a different namespace, but it doesn't matter.
|
237
|
+
soap.namespaces['xmlns:urn'] = "urn:#{namespace.sub(/.+ces\//, '')}"
|
238
|
+
|
239
|
+
soap.namespaces['xmlns:dat'] = "http://datatypes.webservice.wortschatz.uni_leipzig.de"
|
240
|
+
end
|
241
|
+
|
242
|
+
doc = Document.new(resp.to_xml)
|
243
|
+
|
244
|
+
STDERR.puts doc if $DEBUG
|
245
|
+
|
246
|
+
return doc
|
247
|
+
|
248
|
+
end
|
249
|
+
|
250
|
+
# This method extracts valuable data from the XML structure
|
251
|
+
# of the soap respense. It returns an array with extracted xml text nodes
|
252
|
+
# and prints out the same collection in the DEBUG mode.
|
253
|
+
def get_answer(doc, mod='')
|
254
|
+
result = []
|
255
|
+
# The path seems to be weird, but the namespaces change incrementally
|
256
|
+
# in the output, so I don't want to treat it here.
|
257
|
+
# A modifier needed because synonyms service provides duplicate values.
|
258
|
+
XPath.each(doc, "//result/*/*#{mod}") {|el| STDERR.puts el.text} if $DEBUG
|
259
|
+
XPath.each(doc, "//result/*/*#{mod}") {|el| result << el.text}
|
260
|
+
return result
|
261
|
+
end
|
262
|
+
end
|
263
|
+
end
|
data/wlapi.gemspec
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = "wlapi"
|
3
|
+
s.summary = "Use different web services by Wortschatz Lepzip Project."
|
4
|
+
s.description = File.read(File.join(File.dirname(__FILE__), 'README'))
|
5
|
+
s.requirements = [ 'Using this gem requires understanding of the linguistic background behind the WL project.' ]
|
6
|
+
s.version = "0.0.3"
|
7
|
+
s.author = "Andrei Beliankou"
|
8
|
+
s.email = "a.belenkow@uni-trier.de"
|
9
|
+
s.homepage = "http://www.uni-trier.de/index.php?id=34451"
|
10
|
+
s.add_dependency('savon')
|
11
|
+
s.platform = Gem::Platform::RUBY
|
12
|
+
s.required_ruby_version = '>=1.8'
|
13
|
+
s.files = Dir['**/**']
|
14
|
+
end
|
metadata
ADDED
@@ -0,0 +1,127 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: wlapi
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 25
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 3
|
10
|
+
version: 0.0.3
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- Andrei Beliankou
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-11-14 00:00:00 +01:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: savon
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
version: "0"
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
35
|
+
description: |
|
36
|
+
== WLAPI is a simple API for Wortschatz Leipzig project.
|
37
|
+
|
38
|
+
See RDOC documentation for details.
|
39
|
+
|
40
|
+
You can find some invokation examples in example/example.rb
|
41
|
+
|
42
|
+
email: a.belenkow@uni-trier.de
|
43
|
+
executables: []
|
44
|
+
|
45
|
+
extensions: []
|
46
|
+
|
47
|
+
extra_rdoc_files: []
|
48
|
+
|
49
|
+
files:
|
50
|
+
- wlapi.gemspec
|
51
|
+
- doc/fr_class_index.html
|
52
|
+
- doc/classes/WLAPI.html
|
53
|
+
- doc/classes/WLAPI/API.src/M000007.html
|
54
|
+
- doc/classes/WLAPI/API.src/M000016.html
|
55
|
+
- doc/classes/WLAPI/API.src/M000006.html
|
56
|
+
- doc/classes/WLAPI/API.src/M000003.html
|
57
|
+
- doc/classes/WLAPI/API.src/M000011.html
|
58
|
+
- doc/classes/WLAPI/API.src/M000008.html
|
59
|
+
- doc/classes/WLAPI/API.src/M000017.html
|
60
|
+
- doc/classes/WLAPI/API.src/M000010.html
|
61
|
+
- doc/classes/WLAPI/API.src/M000012.html
|
62
|
+
- doc/classes/WLAPI/API.src/M000009.html
|
63
|
+
- doc/classes/WLAPI/API.src/M000013.html
|
64
|
+
- doc/classes/WLAPI/API.src/M000014.html
|
65
|
+
- doc/classes/WLAPI/API.src/M000005.html
|
66
|
+
- doc/classes/WLAPI/API.src/M000015.html
|
67
|
+
- doc/classes/WLAPI/API.src/M000004.html
|
68
|
+
- doc/classes/WLAPI/API.src/M000001.html
|
69
|
+
- doc/classes/WLAPI/API.src/M000002.html
|
70
|
+
- doc/classes/WLAPI/API.html
|
71
|
+
- doc/fr_method_index.html
|
72
|
+
- doc/rdoc.css
|
73
|
+
- doc/index.html
|
74
|
+
- doc/fr_file_index.html
|
75
|
+
- doc/README.html
|
76
|
+
- doc/created.rid
|
77
|
+
- doc/rdoc-style.css
|
78
|
+
- doc/WLAPI.html
|
79
|
+
- doc/INSTALL.html
|
80
|
+
- doc/bin/example_rb.html
|
81
|
+
- doc/files/example/example_rb.html
|
82
|
+
- doc/files/lib/wlapi/wlapi_rb.html
|
83
|
+
- doc/LICENSE.html
|
84
|
+
- doc/lib/wlapi_rb.html
|
85
|
+
- INSTALL
|
86
|
+
- LICENSE
|
87
|
+
- README
|
88
|
+
- example/example.rb~
|
89
|
+
- example/example.rb
|
90
|
+
- lib/wlapi.rb
|
91
|
+
has_rdoc: true
|
92
|
+
homepage: http://www.uni-trier.de/index.php?id=34451
|
93
|
+
licenses: []
|
94
|
+
|
95
|
+
post_install_message:
|
96
|
+
rdoc_options: []
|
97
|
+
|
98
|
+
require_paths:
|
99
|
+
- lib
|
100
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
101
|
+
none: false
|
102
|
+
requirements:
|
103
|
+
- - ">="
|
104
|
+
- !ruby/object:Gem::Version
|
105
|
+
hash: 31
|
106
|
+
segments:
|
107
|
+
- 1
|
108
|
+
- 8
|
109
|
+
version: "1.8"
|
110
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
111
|
+
none: false
|
112
|
+
requirements:
|
113
|
+
- - ">="
|
114
|
+
- !ruby/object:Gem::Version
|
115
|
+
hash: 3
|
116
|
+
segments:
|
117
|
+
- 0
|
118
|
+
version: "0"
|
119
|
+
requirements:
|
120
|
+
- Using this gem requires understanding of the linguistic background behind the WL project.
|
121
|
+
rubyforge_project:
|
122
|
+
rubygems_version: 1.3.7
|
123
|
+
signing_key:
|
124
|
+
specification_version: 3
|
125
|
+
summary: Use different web services by Wortschatz Lepzip Project.
|
126
|
+
test_files: []
|
127
|
+
|