wlapi 0.0.6 → 0.7.1

Sign up to get free protection for your applications and to get access to all the features.
data/lib/wlapi.rb CHANGED
@@ -1,267 +1 @@
1
- # 2010-, Andrei Beliankou
2
- # Version: 0.0.4
3
- # :title: Ruby based API for Wortschatz Leipzig web services
4
-
5
-
6
- # SAVON is a SOAP client.
7
- require 'savon'
8
-
9
- # REXML is fast enough for our task.
10
- require 'rexml/document'
11
- include REXML
12
-
13
- # Top level namespace wrapper for WLAPI
14
- module WLAPI
15
-
16
- # This class represents an interface to the linguistic web services
17
- # provided by the University of Leipzig.
18
- #
19
- # See the project 'Wortschatz Leipzig' for more details.
20
- class API
21
-
22
- # At the creation point clients for all services are being instantiated.
23
- # You can also set the login and the password (it defaults to 'anonymous').
24
- def initialize(login = 'anonymous', pass = 'anonymous')
25
- # This hash contains the URLs to the single services.
26
- @services = {
27
- 'Thesaurus' => 'http://wortschatz.uni-leipzig.de/axis/services/Thesaurus',
28
- 'Baseform' => 'http://wortschatz.uni-leipzig.de/axis/services/Baseform',
29
- 'Similarity' => 'http://wortschatz.uni-leipzig.de/axis/services/Similarity',
30
- 'Synonyms' => 'http://wortschatz.uni-leipzig.de/axis/services/Synonyms',
31
- 'Sachgebiet' => 'http://wortschatz.uni-leipzig.de/axis/services/Sachgebiet',
32
- 'Frequencies' => 'http://wortschatz.uni-leipzig.de/axis/services/Frequencies',
33
- 'Kookurrenzschnitt' => 'http://wortschatz.uni-leipzig.de/axis/services/Kookkurrenzschnitt',
34
- 'ExperimentalSynonyms' => 'http://wortschatz.uni-leipzig.de/axis/services/ExperimentalSynonyms',
35
- 'RightCollocationFinder' => 'http://wortschatz.uni-leipzig.de/axis/services/RightCollocationFinder',
36
- 'LeftCollocationFinder' => 'http://wortschatz.uni-leipzig.de/axis/services/LeftCollocationFinder',
37
- 'Wordforms' => 'http://wortschatz.uni-leipzig.de/axis/services/Wordforms',
38
- 'CooccurrencesAll' => 'http://wortschatz.uni-leipzig.de/axis/services/CooccurrencesAll',
39
- 'LeftNeighbours' => 'http://wortschatz.uni-leipzig.de/axis/services/LeftNeighbours',
40
- 'RightNeighbours' => 'http://wortschatz.uni-leipzig.de/axis/services/RightNeighbours',
41
- 'Sentences' => 'http://wortschatz.uni-leipzig.de/axis/services/Sentences',
42
- 'Cooccurrences' => 'http://wortschatz.uni-leipzig.de/axis/services/Cooccurrences'
43
- # no MARSService and Kreuzwortrraetsel
44
- }
45
-
46
- # cl short for client.
47
- # Dynamically create all the clients and set access credentials.
48
- # It can be a very bad idea to instantiate all the clients at once,
49
- # we should investigate the typical user behaviour.
50
- # If only one service is used in the separate session => rewrite the class!
51
- @services.each do |key, val|
52
- cl_name = '@cl_' + key
53
- eval("#{cl_name} = Savon::Client.new(val)")
54
- eval("#{cl_name}.request.basic_auth(login, pass)")
55
- end
56
-
57
- # Savon creates very verbose logs, switching off.
58
- Savon::Request.log = false unless $DEBUG
59
- end
60
-
61
- # Main methods to access different services.
62
- #
63
- # You can define the limit for the result set, it defaults to 10.
64
- # If you want to get all the results, you should provide a number,
65
- # which would be greater than the result set since we cannot
66
- # predict how many answers the server will give us. Just try it.
67
- ##############################################################################
68
-
69
- ## One parameter methods.
70
- ##############################################################################
71
-
72
- # Returns the frequency and frequency class of the input word.
73
- # Frequency class is computed in relation to the most frequent word
74
- # in the corpus. The higher the class, the rarer the word.
75
- def frequencies
76
- raise 'Not implemented yet!'
77
- end
78
-
79
- # This method gets the baseform (whatever it is :) not lemma).
80
- # Returns the lemmatized (base) form of the input word.
81
- def baseform
82
- raise 'Not implemented yet!'
83
- end
84
-
85
- # Returns categories for a given input word.
86
- # --
87
- # Is it a good name? all names are in English, but here..
88
- # let's call it domain, not sachgebiet
89
- def domain
90
- raise 'Not implemented yet!'
91
- end
92
-
93
- ## Two parameter methods.
94
- ##############################################################################
95
-
96
- # Returns all other word forms of the same lemma for a given word form .
97
- # --
98
- # malformed soap, investigate!!!
99
- # doesn't function
100
- def wordforms(word, limit)
101
- raise 'Not implemented yet!' # remove later
102
- answer = query(word, limit, @cl_Wordforms, @services['Wordforms'])
103
- return get_answer(answer)
104
- end
105
-
106
- # As the Synonyms service returns synonyms of the given input word.
107
- # However, this first builds a lemma of the input word
108
- # and thus returns more synonyms.
109
- def thesaurus(word, limit = 10)
110
- answer = query(word, limit, @cl_Thesaurus, @services['Thesaurus'])
111
- return get_answer(answer)
112
- end
113
-
114
- # This method searches for synonyms.
115
- # Returns synonyms of the input word. In other words, this is a thesaurus.
116
- def synonyms(word, limit = 10)
117
- answer = query(word, limit, @cl_Synonyms, @services['Synonyms'])
118
- # Synonym service provide multiple values, so we take only odd.
119
- return get_answer(answer, '[position() mod 2 = 1 ]')
120
- end
121
-
122
- # ok, but results should be filtered
123
- # Returns sample sentences containing the input word.
124
- def sentences(word, limit)
125
- answer = query(word, limit, @cl_Sentences, @services['Sentences'])
126
- return get_answer(answer)
127
- end
128
-
129
- # For a given input word, returns statistically significant left neighbours
130
- # (words co-occurring immediately to the left of the input word).
131
- # --
132
- # ok, but results should be filtered
133
- def left_neighbours(word, limit)
134
- answer = query(word, limit, @cl_LeftNeighbours, @services['LeftNeighbours'])
135
- return get_answer(answer)
136
- end
137
-
138
- # For a given input word, returns statistically significant right neighbours
139
- # (words co-occurring immediately to the right of the input word).
140
- # --
141
- # ok, but results should be filtered
142
- def right_neighbours(word, limit)
143
- answer = query(word, limit, @cl_RightNeighbours, @services['RightNeighbours'])
144
- return get_answer(answer)
145
- end
146
-
147
-
148
- # Returns automatically computed contextually similar words of the input word.
149
- # Such similar words may be antonyms, hyperonyms, synonyms,
150
- # cohyponyms or other.
151
- # Note that due to the huge amount of data any query to this services
152
- # may take a long time.
153
- def similarity
154
- raise 'Not implemented yet!'
155
- end
156
-
157
- # This service delivers an experimental synonyms request for internal tests.
158
- # --
159
- # don't know, if we have to include this service...
160
- def experimental_synonyms
161
- raise 'Not implemented yet!'
162
- end
163
-
164
- ## Three parameter methods.
165
- ##############################################################################
166
-
167
- # Attempts to find linguistic collocations that occur to the right
168
- # of the given input word.
169
- # The parameter Wortart accepts four values A,V,N,S which stand for adjective,
170
- # verb, noun and stopword, respectively.
171
- # The parameter restricts the type of words found.
172
- def right_collocation_finder
173
- raise 'Not implemented yet!'
174
- end
175
-
176
- # Attempts to find linguistic collocations that occur to the left
177
- # of the given input word.
178
- # The parameter Wortart accepts four values A,V,N,S which stand for adjective,
179
- # verb, noun and stopword, respectively.
180
- # The parameter restricts the type of words found.
181
- def left_collocation_finder
182
- raise 'Not implemented yet!'
183
- end
184
-
185
- # Returns statistically significant co-occurrences of the input word.
186
- def cooccurrences
187
- raise 'Not implemented yet!'
188
- end
189
-
190
- # Returns statistically significant co-occurrences of the input word.
191
- # However, it searches in the unrestricted version of the co-occurrences table
192
- # as in the Cooccurrences services,
193
- # which means significantly longer wait times.
194
- def cooccurrences_all
195
- raise 'Not implemented yet!'
196
- end
197
-
198
- # Returns the intersection of the co-occurrences of the two given words.
199
- # The result set is ordered according to the sum of the significances
200
- # in descending order. Note that due to the join involved,
201
- # this make take some time.
202
- # --
203
- # let's call it intersection, not kookurrenzschnitt
204
- def intersection
205
- raise 'Not implemented yet!'
206
- end
207
-
208
- private
209
-
210
- # Main query method, it invokes the soap engine.
211
- # This method combines all the data to one SOAP request and gets the answer.
212
- # It is not compatible with one and three parameter methods yet.
213
- def query(word, limit, cl, namespace)
214
- # Calling the action with ! (disables the wsdl query).
215
- # wsdl is disabled since calling the server for wsdl can last too long.
216
- resp = cl.execute! do |soap|
217
-
218
- # adding a namespace, wsdl is disabled
219
- soap.namespace = namespace
220
-
221
- body = <<EOF
222
- <urn:objRequestParameters>
223
- <urn:corpus>de</urn:corpus>
224
- <urn:parameters>
225
- <urn:dataVectors>
226
- <dat:dataRow>Wort</dat:dataRow>
227
- <dat:dataRow>#{word}</dat:dataRow>
228
- </urn:dataVectors>
229
- <urn:dataVectors>
230
- <dat:dataRow>Limit</dat:dataRow>
231
- <dat:dataRow>#{limit}</dat:dataRow>
232
- </urn:dataVectors>
233
- </urn:parameters>
234
- </urn:objRequestParameters>
235
- EOF
236
-
237
- soap.body = body
238
-
239
- soap.namespaces['xmlns:soapenv'] = "http://schemas.xmlsoap.org/soap/envelope/"
240
- # Every service has a different namespace, but it doesn't matter.
241
- soap.namespaces['xmlns:urn'] = "urn:#{namespace.sub(/.+ces\//, '')}"
242
-
243
- soap.namespaces['xmlns:dat'] = "http://datatypes.webservice.wortschatz.uni_leipzig.de"
244
- end
245
-
246
- doc = Document.new(resp.to_xml)
247
-
248
- STDERR.puts doc if $DEBUG
249
-
250
- return doc
251
-
252
- end
253
-
254
- # This method extracts valuable data from the XML structure
255
- # of the soap response. It returns an array with extracted xml text nodes
256
- # and prints out the same collection in the DEBUG mode.
257
- def get_answer(doc, mod='')
258
- result = []
259
- # The path seems to be weird, but the namespaces change incrementally
260
- # in the output, so I don't want to treat it here.
261
- # A modifier needed because synonyms service provides duplicate values.
262
- XPath.each(doc, "//result/*/*#{mod}") {|el| STDERR.puts el.text} if $DEBUG
263
- XPath.each(doc, "//result/*/*#{mod}") {|el| result << el.text}
264
- return result
265
- end
266
- end
267
- end
1
+ require 'wlapi/api'
data/test/test_api.rb ADDED
@@ -0,0 +1,233 @@
1
+ # -*- coding: utf-8 -*-
2
+ require 'rubygems'
3
+ require 'test/unit'
4
+ require 'wlapi'
5
+
6
+
7
+ class TestApi < Test::Unit::TestCase
8
+
9
+ def setup
10
+ @api = WLAPI::API.new
11
+ @word = 'Stuhl'
12
+ end
13
+
14
+ def teardown
15
+ end
16
+
17
+ # one parameter
18
+ def test_frequencies
19
+ assert_respond_to(@api, :frequencies)
20
+ assert_raise(ArgumentError) do
21
+ @api.frequencies(@word, 5)
22
+ end
23
+ assert_raise(ArgumentError) do
24
+ @api.frequencies
25
+ end
26
+ response = @api.frequencies(@word)
27
+ assert_not_nil(response)
28
+ assert_instance_of(Array, response)
29
+ assert(!response.empty?)
30
+ assert_equal(2, response.size)
31
+ assert_match(/\d+/, response[0])
32
+ assert_match(/\d+/, response[1])
33
+ end
34
+
35
+ def test_baseform
36
+ assert_respond_to(@api, :baseform)
37
+ assert_raise(ArgumentError) do
38
+ @api.baseform(@word, 5)
39
+ end
40
+ assert_raise(ArgumentError) do
41
+ @api.baseform
42
+ end
43
+ response = @api.baseform(@word)
44
+ assert_not_nil(response)
45
+ assert_instance_of(Array, response)
46
+ assert(!response.empty?)
47
+ assert_equal(2, response.size)
48
+ assert_match(/\w+/, response[0])
49
+ assert_match(/[AVN]/, response[1])
50
+ end
51
+
52
+ def test_domain
53
+ assert_respond_to(@api, :domain)
54
+ assert_raise(ArgumentError) do
55
+ @api.domain(@word, 5)
56
+ end
57
+ assert_raise(ArgumentError) do
58
+ @api.domain
59
+ end
60
+ response = @api.domain(@word)
61
+ assert_not_nil(response)
62
+ assert_instance_of(Array, response)
63
+ assert(!response.empty?)
64
+ # we cannot predict the minimal structure of the response
65
+ end
66
+
67
+ # two parameters
68
+ def test_wordforms
69
+ assert_respond_to(@api, :wordforms)
70
+ assert_raise(ArgumentError) do
71
+ @api.wordforms
72
+ end
73
+ assert_raise(ArgumentError) do
74
+ @api.wordforms(1, 2, 3)
75
+ end
76
+ response = @api.wordforms(@word)
77
+ assert_not_nil(response)
78
+ assert_instance_of(Array, response)
79
+ assert(!response.empty?)
80
+ end
81
+
82
+ def test_thesaurus
83
+ assert_respond_to(@api, :thesaurus)
84
+ assert_raise(ArgumentError) do
85
+ @api.thesaurus
86
+ end
87
+ assert_raise(ArgumentError) do
88
+ @api.thesaurus(1, 2, 3)
89
+ end
90
+ response = @api.thesaurus(@word)
91
+ assert_not_nil(response)
92
+ assert_instance_of(Array, response)
93
+ assert(!response.empty?)
94
+ end
95
+
96
+ def test_synonyms
97
+ assert_respond_to(@api, :synonyms)
98
+ assert_raise(ArgumentError) do
99
+ @api.synonyms
100
+ end
101
+ assert_raise(ArgumentError) do
102
+ @api.synonyms(1, 2, 3)
103
+ end
104
+ response = @api.synonyms(@word)
105
+ assert_not_nil(response)
106
+ assert_instance_of(Array, response)
107
+ assert(!response.empty?)
108
+ end
109
+
110
+ def test_sentences
111
+ assert_respond_to(@api, :sentences)
112
+ assert_raise(ArgumentError) do
113
+ @api.sentences
114
+ end
115
+ assert_raise(ArgumentError) do
116
+ @api.sentences(1, 2, 3)
117
+ end
118
+ response = @api.sentences(@word)
119
+ assert_not_nil(response)
120
+ assert_instance_of(Array, response)
121
+ assert(!response.empty?)
122
+ end
123
+
124
+ def test_left_neighbours
125
+ assert_respond_to(@api, :left_neighbours)
126
+ assert_raise(ArgumentError) do
127
+ @api.left_neighbours
128
+ end
129
+ assert_raise(ArgumentError) do
130
+ @api.left_neighbours(1, 2, 3)
131
+ end
132
+ response = @api.left_neighbours(@word)
133
+ assert_not_nil(response)
134
+ assert_instance_of(Array, response)
135
+ assert(!response.empty?)
136
+ end
137
+
138
+ def test_right_neighbours
139
+ assert_respond_to(@api, :right_neighbours)
140
+ assert_raise(ArgumentError) do
141
+ @api.right_neighbours
142
+ end
143
+ assert_raise(ArgumentError) do
144
+ @api.right_neighbours(1, 2, 3)
145
+ end
146
+ response = @api.right_neighbours(@word)
147
+ assert_not_nil(response)
148
+ assert_instance_of(Array, response)
149
+ assert(!response.empty?)
150
+ end
151
+
152
+ def test_similarity
153
+ assert_respond_to(@api, :similarity)
154
+ assert_raise(ArgumentError) do
155
+ @api.similarity
156
+ end
157
+ assert_raise(ArgumentError) do
158
+ @api.similarity(1, 2, 3)
159
+ end
160
+ response = @api.similarity(@word)
161
+ assert_not_nil(response)
162
+ assert_instance_of(Array, response)
163
+ assert(!response.empty?)
164
+ end
165
+
166
+ def test_experimental_synonyms
167
+ assert_respond_to(@api, :experimental_synonyms)
168
+ assert_raise(ArgumentError) do
169
+ @api.experimental_synonyms
170
+ end
171
+ assert_raise(ArgumentError) do
172
+ @api.experimental_synonyms(1, 2, 3)
173
+ end
174
+ response = @api.experimental_synonyms(@word)
175
+ assert_not_nil(response)
176
+ assert_instance_of(Array, response)
177
+ assert(!response.empty?)
178
+ end
179
+
180
+ # three parameters
181
+ def test_right_collocation_finder
182
+ assert_respond_to(@api, :right_collocation_finder)
183
+ assert_raise(ArgumentError) do
184
+ @api.right_collocation_finder
185
+ end
186
+ assert_raise(ArgumentError) do
187
+ @api.right_collocation_finder(1, 2, 3, 4)
188
+ end
189
+ response = @api.right_collocation_finder(@word, 'V')
190
+ assert_not_nil(response)
191
+ assert_instance_of(Array, response)
192
+ assert(!response.empty?)
193
+ end
194
+
195
+ def test_left_collocation_finder
196
+ assert_respond_to(@api, :left_collocation_finder)
197
+ assert_raise(ArgumentError) do
198
+ @api.left_collocation_finder
199
+ end
200
+ assert_raise(ArgumentError) do
201
+ @api.left_collocation_finder(1, 2, 3, 4)
202
+ end
203
+ response = @api.left_collocation_finder(@word, 'A')
204
+ assert_not_nil(response)
205
+ assert_instance_of(Array, response)
206
+ assert(!response.empty?)
207
+ end
208
+
209
+ def test_cooccurrences
210
+ assert_raise(RuntimeError) do
211
+ @api.cooccurrences(@word, 1, 10)
212
+ end
213
+ end
214
+
215
+ def test_cooccurrences_all
216
+ assert_raise(RuntimeError) do
217
+ @api.cooccurrences_all(@word, 1, 10)
218
+ end
219
+ end
220
+
221
+ def test_intersection
222
+ assert_raise(RuntimeError) do
223
+ @api.intersection(@word, @word, 10)
224
+ end
225
+ end
226
+
227
+ # private methods
228
+ def test_query
229
+ end
230
+
231
+ def test_get_answer
232
+ end
233
+ end
data/wlapi.gemspec CHANGED
@@ -1,17 +1,19 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "wlapi"
3
- s.summary = "Use different web services provided by the Project Wortschatz."
4
- s.description = "WLAPI is a programmatic API for web services provided by the project Wortschatz, University of Leipzig."
3
+ s.summary = "WLAPI is a programmatic API for web services provided by the project Wortschatz, University of Leipzig. Use different linguistic services such as synonym and collocation search." # it is the description for 'gem list -d'
4
+ s.description = "WLAPI is a programmatic API for web services provided by the project Wortschatz, University of Leipzig." # it appears on the RubyGems page
5
5
  s.rubyforge_project = "wlapi"
6
- s.requirements = [ 'Using this gem requires understanding of the linguistic background behind the WL project.' ]
7
- s.version = "0.0.6"
6
+ s.requirements = [ 'Using this gem requires understanding of the linguistic background behind the WL project.' ] # it isn't currently used
7
+ s.version = "0.7.1"
8
8
  s.author = "Andrei Beliankou"
9
9
  s.email = "a.belenkow@uni-trier.de"
10
10
  s.homepage = "http://www.uni-trier.de/index.php?id=34451"
11
+ # s.require_paths = 'lib' # it is the default value, why to use?
11
12
  s.add_dependency('savon')
12
- s.rdoc_options = ["-m", "README"]
13
- s.extra_rdoc_files = ["README", "example/example.rb", "LICENSE"]
13
+ s.rdoc_options = ["-m", "README.rdoc"]
14
+ s.extra_rdoc_files = ["README.rdoc", "LICENSE"]
14
15
  s.platform = Gem::Platform::RUBY
15
16
  s.required_ruby_version = '>=1.8.5'
16
- s.files = Dir['**/**']
17
+ s.files = Dir['**/*']
18
+ s.test_files = ["test/test_api.rb"]
17
19
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: wlapi
3
3
  version: !ruby/object:Gem::Version
4
- hash: 19
4
+ hash: 1
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
- - 0
9
- - 6
10
- version: 0.0.6
8
+ - 7
9
+ - 1
10
+ version: 0.7.1
11
11
  platform: ruby
12
12
  authors:
13
13
  - Andrei Beliankou
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-11-16 00:00:00 +01:00
18
+ date: 2010-11-21 00:00:00 +01:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -39,50 +39,17 @@ executables: []
39
39
  extensions: []
40
40
 
41
41
  extra_rdoc_files:
42
- - README
43
- - example/example.rb
42
+ - README.rdoc
44
43
  - LICENSE
45
44
  files:
46
- - wlapi-0.0.6.gem
45
+ - README.rdoc
47
46
  - wlapi.gemspec
48
- - doc/images/loadingAnimation.gif
49
- - doc/images/wrench.png
50
- - doc/images/brick.png
51
- - doc/images/page_white_text.png
52
- - doc/images/ruby.png
53
- - doc/images/plugin.png
54
- - doc/images/bug.png
55
- - doc/images/bullet_toggle_minus.png
56
- - doc/images/find.png
57
- - doc/images/zoom.png
58
- - doc/images/bullet_toggle_plus.png
59
- - doc/images/date.png
60
- - doc/images/page_white_width.png
61
- - doc/images/tag_green.png
62
- - doc/images/wrench_orange.png
63
- - doc/images/macFFBgHack.png
64
- - doc/images/package.png
65
- - doc/images/page_green.png
66
- - doc/images/brick_link.png
67
- - doc/images/bullet_black.png
68
- - doc/js/darkfish.js
69
- - doc/js/jquery.js
70
- - doc/js/quicksearch.js
71
- - doc/js/thickbox-compressed.js
72
- - doc/rdoc.css
73
- - doc/index.html
74
- - doc/README.html
75
- - doc/created.rid
76
- - doc/WLAPI.html
77
- - doc/INSTALL.html
78
- - doc/WLAPI/API.html
79
- - doc/example/example_rb.html
80
- - doc/LICENSE.html
81
- - doc/lib/wlapi_rb.html
82
47
  - INSTALL
48
+ - test/test_api.rb
83
49
  - LICENSE
50
+ - Rakefile
84
51
  - README
85
- - example/example.rb
52
+ - lib/wlapi/api.rb
86
53
  - lib/wlapi.rb
87
54
  has_rdoc: true
88
55
  homepage: http://www.uni-trier.de/index.php?id=34451
@@ -91,7 +58,7 @@ licenses: []
91
58
  post_install_message:
92
59
  rdoc_options:
93
60
  - -m
94
- - README
61
+ - README.rdoc
95
62
  require_paths:
96
63
  - lib
97
64
  required_ruby_version: !ruby/object:Gem::Requirement
@@ -120,6 +87,6 @@ rubyforge_project: wlapi
120
87
  rubygems_version: 1.3.7
121
88
  signing_key:
122
89
  specification_version: 3
123
- summary: Use different web services provided by the Project Wortschatz.
124
- test_files: []
125
-
90
+ summary: WLAPI is a programmatic API for web services provided by the project Wortschatz, University of Leipzig. Use different linguistic services such as synonym and collocation search.
91
+ test_files:
92
+ - test/test_api.rb