yanser 0.0.3 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,6 @@
1
+ --private
2
+ --protected
3
+ --title 'YANSER - Yahoo! ANSwers harvestER'
4
+ -
5
+ CHANGELOG
6
+ LICENSE
@@ -0,0 +1,24 @@
1
+ === COMPLETED
2
+ ==== 0.1.0
3
+ Yanser supportst YANAPI 0.3.1 and further now.
4
+ ==== 0.0.3
5
+ Yanser depends on the old interface of YANAPI. No functional improvements.
6
+ ==== 0.0.2
7
+ Small changes in the documentation.
8
+ ==== 0.0.1
9
+ Initial release of Yanser.
10
+
11
+
12
+ === PLANNED
13
+
14
+ ==== 0.0.2
15
+ ==== 0.0.3
16
+ ==== 0.4.0
17
+ ==== 0.5.0
18
+ ==== 0.6.0
19
+ ==== 0.7.0
20
+ ==== 0.8.0
21
+ ==== 0.9.0
22
+ ==== 1.0.0
23
+
24
+
data/README CHANGED
@@ -2,20 +2,36 @@
2
2
 
3
3
  * {RubyGems}[http://rubygems.org/gems/yanser]
4
4
  * Developers {Homepage}[http://www.uni-trier.de/index.php?id=24140]
5
- * {YANAPI Project Page}[http://yanser.rubyforge.org/]
5
+ * {YANSER Project Page}[http://yanser.rubyforge.org/]
6
6
 
7
7
  == DESCRIPTION
8
8
 
9
- YANSER (Yahoo! ANSwers harvestER) is a convinient search tool providing access to the Yahoo! Answers Q&A collection. Based on YANAPI it provides a simple CLI and helps to search for Questions and Answers which contain a set of key words, belong to a specific semantic domain or are posted by a certain user. Yanser is a research tool in the field of Computational Linguistics.
9
+ YANSER (Yahoo! ANSwers harvestER) is a convinient search tool providing access
10
+ to the Yahoo! Answers Q&A collection. Based on YANAPI it provides a simple CLI
11
+ and helps to search for Questions and Answers which contain a set of key words,
12
+ belong to a specific semantic domain or are posted by a certain user.
13
+
14
+ Yanser is a research tool in the field of Computational Linguistics.
10
15
 
11
16
  == SYNOPSIS
12
17
  $ yanser --help
13
18
 
19
+ == FEATURE LIST
20
+
21
+ == INSTALLATION
22
+
23
+ == USAGE
24
+
25
+ == CHANGELOG
26
+
27
+ See CHANGELOG.
28
+
29
+ == CONTRIBUTORS
14
30
 
15
31
 
16
32
  == LICENSE
17
33
 
18
34
  YANSER is a copyrighted software by Andrei Beliankou, 2011.
35
+
19
36
  You may use, redistribute and change it under the terms
20
37
  provided in the LICENSE file.
21
-
data/bin/yanser CHANGED
@@ -1,19 +1,36 @@
1
1
  # -*- coding: utf-8; mode: ruby -*-
2
-
3
- # это неверное решение, на самом деле я не должен напрямую указывать,
4
- # где находятся файлы, но пока пусть будет так
5
- # it is required only for .tar.gz distribution
6
- #lib_path = File.expand_path('../../lib', __FILE__)
7
- #$LOAD_PATH.unshift(lib_path) unless $LOAD_PATH.include?(lib_path)
8
-
9
- # подобный способ является правильным, только так классы Yanser и OptionParser будут независимы
10
2
  require 'yanser/yanser'
11
3
  require 'yanser/opt_parser'
4
+ require 'yanser/store'
12
5
 
13
- # class method, why not?
14
- options = Yanser::OptParser.parse(ARGV)
15
-
6
+ begin
7
+ options = Yanser::OptParser.parse(ARGV)
8
+ rescue
9
+ raise
10
+ end
16
11
 
17
- yanser = Yanser::Yanser.new(options)
12
+ # Handle here exceptions from YANAPI, use messages for it.
13
+ # Add "See <yanser --help>." to it.
14
+ begin
15
+ yanser = Yanser::Yanser.new(options)
16
+ result = yanser.start
17
+ rescue YANAPI::UserError
18
+ # Give up with an error message.
19
+ # The user must change the input.
20
+ raise
21
+ rescue YANAPI::ContentError
22
+ # Yahoo! says somethig.
23
+ raise
24
+ rescue YANAPI::ExternalError
25
+ # Give up with an error message.
26
+ # It is a persistent external error.
27
+ # We tried three times in Yanser, but it seems to go kept.
28
+ raise
29
+ end
18
30
 
19
- yanser.start
31
+ begin
32
+ storage = Yanser::Store.new(options)
33
+ storage.store(result)
34
+ rescue
35
+ raise
36
+ end
@@ -1,11 +1,12 @@
1
- # -*- conding: utf-8 -*-
1
+ # -*- coding: utf-8 -*-
2
2
 
3
3
  module Yanser
4
- class OptParserError < RuntimeError
4
+ class Error < RuntimeError; end
5
+ class OptParserError < Error
5
6
  # empty for now
6
7
  end
7
8
 
8
- class YanserError < RuntimeError
9
+ class YanserError < Error
9
10
  # empty for now
10
11
  end
11
12
  end
@@ -7,41 +7,54 @@ require 'yanser/error'
7
7
 
8
8
  module Yanser
9
9
  class OptParser
10
- # OP expects cmd_args to be an array like ARGV
11
- # dummy output for temporary usage
10
+
11
+ # Different possible values accepted by Yahoo!.
12
+ REGIONS = ['de', 'us', 'uk', 'ca', 'au', 'in', 'es',
13
+ 'br', 'ar', 'mx', 'e1', 'it', 'fr', 'sg']
14
+ TYPES = ['all', 'resolved', 'open', 'undecided']
15
+ SORT_TYPES = ['relevance', 'date_desc', 'date_asc']
16
+ DATE_RANGES = ['all', '7', '7-30', '30-60', '60-90', 'more90']
17
+ SEARCH_LOCATIONS = ['all', 'question', 'best_answer']
18
+ OUTPUT_TYPES = ['xml', 'json', 'php', 'rss']
19
+ FILTERS = ['question', 'best_answer']
20
+
21
+ # Main class method.
22
+ # OP expects cmd_args to be an array like ARGV.
12
23
  def self.parse(cmd_args)
13
24
  @@options = {}
14
25
  @@options[:query_params] = {}
15
26
 
16
27
  parser = create_parser
17
28
 
18
- # if no options provided print the help
29
+ # If no options provided print the help.
19
30
  if cmd_args.empty?
20
- msg = "You have to provide some options. " +
21
- "yanser --help provides more information."
22
- raise OptParserError, msg
23
- # $stderr.printf "You have to provide some options.\n\n"
24
- # puts parser
25
- # exit 1
31
+ puts('You have to provide some options.',
32
+ 'Please start with <yanser --help>.')
33
+ exit(1)
26
34
  end
27
35
 
28
36
  # Parse ARGV and provide the options hash.
29
37
  # Check if everything is correct and handle exceptions
30
38
  begin
31
- parser.parse!(cmd_args)
32
- # rescue all exceptions from OptionParser
33
- rescue => e
34
- raise OptParserError, e.message
35
- # $stderr.printf "#{e.message.capitalize}\n\n"
36
- # puts parser
37
- # exit 1
39
+ parser.parse(cmd_args)
40
+ rescue OptionParser::InvalidArgument => e
41
+ arg = e.message.split.last
42
+ puts "The provided argument #{arg} is currently not supported by Yahoo!"
43
+ puts 'Please colsult <yanser --help>.'
44
+ exit(1)
45
+ rescue OptionParser::InvalidOption => e
46
+ puts "You have provided an #{e.message}."
47
+ puts 'Please colsult <yanser --help>.'
48
+ exit(1)
49
+ rescue
50
+ raise
38
51
  end
39
52
 
40
53
  # Check to see if we got the required arguments needed.
41
- check_required_options(@@options)
54
+ # check_required_options(@@options)
42
55
 
43
56
  # Set the search method.
44
- @@options[:query_type] = set_query_type(@@options[:query_params])
57
+ @@options[:method] = set_query_type(@@options[:query_params])
45
58
 
46
59
  # Set some defaults such as :region and :search_in
47
60
  @@options = set_defaults(@@options)
@@ -50,34 +63,23 @@ module Yanser
50
63
  end # parse
51
64
 
52
65
  private
53
-
54
- # Check if the value of given region is correct.
55
- # Now 14 regions are supported by Yahoo! Answers.
56
- def self.prove_region(region)
57
- regions = ['de', 'us', 'uk', 'ca', 'au', 'in', 'es',
58
- 'br', 'ar', 'mx', 'e1', 'it', 'fr', 'sg']
59
-
60
- unless regions.include?(region)
61
- msg = "The provided search region #{region} is currently not supported by Yahoo!"
62
- raise OptParserError, msg
63
- end
64
- end
65
-
66
+
66
67
  # define one of the following query types: TermQuery|CategoryQuery|
67
68
  # QuestionQuery|UserQuery
68
69
  def self.set_query_type(params)
69
70
  case
70
71
  when (params[:category_id] || params[:category_name]) && ! params[:query]
71
- 'CategoryQuery'
72
+ 'getByCategory'
72
73
  when params[:query]
73
- 'TermQuery'
74
+ 'questionSearch'
74
75
  when params[:user_id]
75
- 'UserQuery'
76
+ 'getByUser'
76
77
  when params[:question_id]
77
- 'QuestionQuery'
78
+ 'getQuestion'
78
79
  end
79
80
  end
80
-
81
+
82
+ =begin
81
83
  def self.check_required_options(options)
82
84
  required_opts = [:appid]
83
85
 
@@ -85,31 +87,19 @@ module Yanser
85
87
  if options[:query_params].has_key?(opt)
86
88
  next
87
89
  else
88
- msg = "A required option --#{opt} is missing."
89
- raise OptParserError, msg
90
+ puts "The required option --#{opt} is missing."
91
+ puts 'Please colsult <yanser --help>.'
92
+ exit(1)
90
93
  end
91
94
  end
92
95
  end
93
-
94
- def self.provide_dir(dir)
95
- dir = File.expand_path(dir)
96
- #check for existens
97
- if File.directory?(dir)
98
- if File.writable?(dir)
99
- return dir
100
- else
101
- $stderr.puts 'The directory you have provided is not writable!'
102
- exit 1
103
- end
104
- else
105
- FileUtils.mkdir_p(dir)
106
- return dir
107
- end
108
- end # provide_dir
96
+ =end
97
+
109
98
 
110
99
  def self.set_defaults(opts)
111
100
  # fake method for now
112
- return opts
101
+
102
+ opts
113
103
  end # set_defaults
114
104
 
115
105
  def self.create_parser
@@ -133,7 +123,9 @@ module Yanser
133
123
  opts.separator ' Mandatory search arguments:'
134
124
 
135
125
  opts.on('-k', '--key-word KEYWORD',
136
- 'Provide a single keyword or a boolean expression.'
126
+ 'Provide a single keyword or a boolean expression.',
127
+ 'You might combine this option with an explicit',
128
+ '<category> name or id to restrict your search.'
137
129
  ) do |keyword|
138
130
  @@options[:query_params][:query] = keyword
139
131
  # not a solution!!!
@@ -141,10 +133,12 @@ module Yanser
141
133
 
142
134
  end
143
135
 
144
- opts.separator ''
136
+ opts.separator ' and/or'
145
137
 
146
138
  opts.on('-c', '--category CATEGORY',
147
- 'Provide a category name or ID.'
139
+ 'Provide a category name or ID. This is useful',
140
+ 'while searching for categories or in the combination',
141
+ 'with some key words.'
148
142
  ) do |category|
149
143
 
150
144
  if category =~ /^[[:digit:]]+$/
@@ -155,7 +149,7 @@ module Yanser
155
149
 
156
150
  end
157
151
 
158
- opts.separator ''
152
+ opts.separator ' or'
159
153
 
160
154
  opts.on('--user-id ID',
161
155
  'Provide an user ID of questions you search for.',
@@ -164,7 +158,7 @@ module Yanser
164
158
  @@options[:query_params][:user_id] = user_id
165
159
  end
166
160
 
167
- opts.separator ''
161
+ opts.separator ' or'
168
162
 
169
163
  opts.on('--question-id ID',
170
164
  'Provide a question ID of the question you search for.',
@@ -172,32 +166,33 @@ module Yanser
172
166
  ) do |question_id|
173
167
  @@options[:query_params][:question_id] = question_id
174
168
  end
175
-
169
+
170
+ #################
176
171
  opts.separator ''
177
172
  opts.separator ' Optional search arguments:'
178
-
179
- opts.on('-r', '--region REGION',
180
- 'Provide a geographic region to search in for terms.',
181
- 'Possible values are: de, us, uk, ca, au, in, es, br,',
182
- ' ar, mx, e1, it, fr, sg.',
183
- 'This defaults to en.'
184
- ) do |region|
185
- # todo
186
- prove_region(region)
187
- @@options[:query_params][:region] = region
173
+ opts.on('--date-range DATE', DATE_RANGES,
174
+ 'Provide some date range for the search. It defaults to <all>.',
175
+ "Possible values are: <#{DATE_RANGES.join('>, <')}>."
176
+ ) do |date|
177
+ @@options[:query_params][:date_range] = date
188
178
  end
189
-
179
+
190
180
  opts.separator ''
191
-
192
- opts.on('-o', '--output-dir DIR',
193
- 'Provide an output folder.',
194
- 'This directory will be created if it does not exist yet.'
195
- ) do |output_dir|
196
- @@options[:output_dir] = provide_dir(output_dir)
181
+ opts.on('-f', '--output-format FORMAT', OUTPUT_TYPES,
182
+ "Provide an output format: <#{OUTPUT_TYPES.join('>, <')}>.",
183
+ 'It defaults to <xml>, you may simply ommit this option.'
184
+ ) do |f|
185
+ @@options[:query_params][:output] = f
197
186
  end
198
187
 
199
188
  opts.separator ''
189
+ opts.on('--filter TYPE', FILTERS,
190
+ "Possible values are: <#{FILTERS.join('>, <')}>."
191
+ ) do |type|
192
+ @@options[:query_params][:filter] = type
193
+ end
200
194
 
195
+ opts.separator ''
201
196
  opts.on('-l', '--limit NUMBER', Integer,
202
197
  'Provide a number of answers you want to get from Yahoo.',
203
198
  'This argument is not mandatory, if you want to get',
@@ -206,12 +201,12 @@ module Yanser
206
201
  @@options[:limit] = limit
207
202
  end
208
203
 
209
- opts.separator ''
210
- opts.on('-f', '--output-format FORMAT',
211
- 'Provide an output format: xml, json, rss, php.',
212
- 'It defaults to xml, and you can simply ommit this option.'
213
- ) do |f|
214
- raise NotImplementedError, 'Only default output format is implemented'
204
+ opts.separator ''
205
+ opts.on('-o', '--output-dir DIR',
206
+ 'Provide an output folder.',
207
+ 'This directory will be created if it does not exist yet.'
208
+ ) do |output_dir|
209
+ @@options[:output_dir] = output_dir
215
210
  end
216
211
 
217
212
  opts.separator ''
@@ -222,23 +217,92 @@ module Yanser
222
217
  'It can be useful if you want to put many query results',
223
218
  'in the same output folder.'
224
219
  ) do |pref|
225
- raise NotImplementedError, 'No prefixes implemeted.'
220
+ @@options[:prefix] = pref
226
221
  end
227
222
 
228
223
  opts.separator ''
229
- opts.on('--key-word-list FILE',
230
- 'Provide a file with key words or phrases (boolean syntax',
231
- 'allowed) one search token per line.'
232
- ) do |file_name|
233
- raise NotImplementedError
224
+ opts.separator ''
225
+ opts.on('-r', '--region REGION', REGIONS,
226
+ 'Provide a geographic region to search in for terms.',
227
+ 'Possible values are: de, us, uk, ca, au, in, es, br,',
228
+ ' ar, mx, e1, it, fr, sg.',
229
+ 'This defaults to <en>.'
230
+ ) do |region|
231
+
232
+ @@options[:query_params][:region] = region
234
233
  end
235
234
 
236
235
  opts.separator ''
236
+ opts.on('--search-in PLACE', SEARCH_LOCATIONS,
237
+ "Possible values are: <#{SEARCH_LOCATIONS.join('>, <')}>."
238
+ ) do |place|
239
+ @@options[:query_params][:search_in] = place
240
+ end
241
+
242
+ opts.separator ''
243
+ opts.on('--start POSITION',
244
+ 'Provide the position of the question in the search list',
245
+ 'you want start with.'
246
+ ) do |pos|
247
+ @@options[:query_params][:start] = pos
248
+ end
249
+
250
+ opts.separator ''
251
+ opts.on('--sort ORDER', SORT_TYPES,
252
+ "Possible values are: <#{SORT_TYPES.join('>, <')}>."
253
+ ) do |order|
254
+ @@options[:query_params][:sort] = order
255
+ end
256
+
257
+ opts.separator ''
258
+ opts.on('--type TYPE', TYPES,
259
+ "Possible values are: <#{TYPES.join('>, <')}>."
260
+ ) do |type|
261
+ @@options[:query_params][:type] = type
262
+ end
263
+
264
+
265
+ #################
266
+ opts.separator ""
267
+ opts.separator ' Experimental options. They may not be implemented yet.'
237
268
  opts.on('--exp-file FILE',
238
269
  'Provide a formal description of the experiment.'
239
270
  ) do |file_name|
240
- raise NotImplementedError
271
+ warn 'Not yet implemented!'
272
+ end
273
+
274
+ opts.separator ''
275
+ opts.on('--key-word-list FILE',
276
+ 'Provide a file with key words or phrases',
277
+ '(boolean syntax allowed), one search token per line.'
278
+ ) do |file_name|
279
+ warn 'Not yet implemented!'
280
+ end
281
+
282
+ opts.separator ''
283
+ opts.on('--log [LOGFILE]',
284
+ 'SOME DESCRIPTION'
285
+ ) do |file|
286
+ warn 'Add description!'
287
+ warn 'Not yet implemented!'
288
+ end
289
+
290
+ opts.separator ''
291
+ opts.on('--extract',
292
+ 'SOME DESCRIPTION'
293
+ ) do
294
+ # some non xml representation in a human readable format
295
+ warn 'Add description!'
296
+ warn 'Not yet implemented!'
241
297
  end
298
+ opts.separator ''
299
+ opts.on('-t', '--time-interval SECONDS', Integer,
300
+ 'SOME DESCRIPTION'
301
+ ) do |t|
302
+ @@options[:interval] = t
303
+ warn 'Add description!'
304
+ end
305
+
242
306
  opts.separator ""
243
307
  opts.separator "Common options:"
244
308
 
@@ -261,20 +325,24 @@ __END__
261
325
  # List of options planned for implementation.
262
326
  --appid
263
327
  -k, --key-word
264
- -c, --category-id # Term & CategorySearch
328
+ -c, --category # Term & CategorySearch
265
329
  --user-id
266
330
  --question-id # QuestionSearch
267
331
  -r, --region
268
332
  -f, --output-format
269
- -l, --limit
270
333
  --date-range
271
334
  --sort
335
+ --start
272
336
  --filter
337
+ --callback # bad idea
338
+ --type
339
+ --search-in
273
340
 
274
341
 
342
+ -l, --limit
275
343
  --prefix #prefix for output files
276
344
  --key-word-list
277
- --exp-file
345
+ --exp-file # experiment description, yaml
278
346
  -o, --output-dir
279
347
  -h, --help
280
348
  -v, --version
@@ -306,8 +374,9 @@ instance interface
306
374
  :type => "all" | "resolved" | "open" | "undecided", # default 'all'
307
375
  :user_id => '123456'
308
376
  },
309
- :query_type => 'TermQuery'|'QuestionQuery'|'UserQuery'|'CategoryQuery',
377
+ :interval => Integer, # in seconds
378
+ :limit => Integer,
379
+ :method => 'TermQuery'|'QuestionQuery'|'UserQuery'|'CategoryQuery',
310
380
  :output_dir => 'some path',
311
- :prefix => 'some prefix', # prefix for output files
312
- :limit => Integer
381
+ :prefix => 'some prefix' # prefix for output files
313
382
  }
@@ -0,0 +1,48 @@
1
+ require 'fileutils'
2
+
3
+ module Yanser
4
+ class Store
5
+ def initialize(params)
6
+ @params = params
7
+ end
8
+
9
+ def store(result)
10
+ if dir = @params[:output_dir]
11
+ provide_dir(dir)
12
+ save(result)
13
+ else
14
+ puts result
15
+ end
16
+ end
17
+
18
+ private
19
+
20
+ # rework!
21
+ def provide_dir(dir)
22
+ dir = File.expand_path(dir)
23
+ #check for existens
24
+ if File.directory?(dir)
25
+ unless File.writable?(dir)
26
+ warn 'The directory you have provided is not writable!'
27
+ exit(1)
28
+ end
29
+ else
30
+ FileUtils.mkdir_p(dir)
31
+ end
32
+ end
33
+
34
+ # save results to a dir
35
+ # not a good implementation
36
+ # interface (filename, data)
37
+ def save(result)
38
+ filename = File.join(@params[:output_dir],
39
+ "yanser_output.xml")
40
+ File.open(filename, 'w') do |file|
41
+ file.puts result
42
+ end
43
+ end
44
+ end # Store
45
+ end
46
+
47
+
48
+
@@ -1,3 +1,3 @@
1
1
  module Yanser
2
- VERSION = '0.0.3'
2
+ VERSION = '0.1.0'
3
3
  end
@@ -1,29 +1,10 @@
1
- # this solution is not ideal since I force people to use 'rubygems'
2
- # possible it would be a better solution to prompt at this point
3
- # to install the lib in the way the user is accustomed to
4
- =begin
5
- begin
6
- require 'yanapi'
7
- rescue LoadError
8
- require 'rubygems'
9
- require 'yanapi'
10
- end
11
- =end
12
1
  require 'yanapi'
13
2
 
14
3
  # :title: YANSER, Yahoo! ANSwers harvestER
15
- # :main: Yanser
4
+ # :main: README
5
+
16
6
  # Main processing class.
17
- # Yanser encapsulates the main routine and instantiates
18
- # all other classes.
19
- #--
20
- # Yanser takes the users input and validates it.
21
- # It decides which search method to choose.
22
- # Then it collects all parameters and useful default values,
23
- # creates an XyzQuery with the starting point of 0 and gets the first results.
24
- # If more results were requested, Yanser creates a similar XyzQuery and gets
25
- # the next result set until the result limitation set by the user is met.
26
- #
7
+ # Yanser encapsulates the main routine and instantiates all other classes.
27
8
  module Yanser
28
9
 
29
10
  class Yanser
@@ -38,98 +19,85 @@ module Yanser
38
19
  # Yahoo! Answers returns maximum 50 results.
39
20
  MAX_RESULTS = 50
40
21
 
41
- def initialize(opts)
42
-
43
- # the minimal output of an OptionParser
44
- # {:query_type=>u|q|c|w, :query_params=>{appid, start, results, query}}
45
- # opts come from the OptionParser
46
- # they are supposed to be correct, no validation here
47
- @options = opts
48
-
49
- @options[:limit] = @options[:limit] || START_LIMIT + MAX_RESULTS
22
+ def initialize(params)
23
+ # Libs are not allowed to mutate the input.
24
+ @params = params.clone
50
25
 
51
- @query_params = @options[:query_params]
26
+ @params[:limit] = @params[:limit] || START_LIMIT + MAX_RESULTS
52
27
 
53
- @output_type = @query_params[:output] || 'xml'
28
+ @query_params = @params[:query_params]
54
29
 
30
+ @output = @query_params[:output] || 'xml'
31
+
32
+ # Yahoo counts results beginning with 0.
33
+ # <:results=50> means all questins from 0 to 49.
55
34
  @query_params[:start] = @query_params[:start] || 0
56
35
  end
57
36
 
58
- # TODO: implement some logging
37
+ # It returns the accumulated string or <nil>, if nothing found.
59
38
  def start
60
-
61
- if @options[:query_type] == 'QuestionQuery'
62
- query(@query_params)
63
- elsif @options[:limit] < MAX_RESULTS
64
- @query_params[:results] = @options[:limit]
65
- query(@query_params)
39
+ if @params[:method] == 'getQuestion'
40
+ response = get_response(@params)
66
41
  else
67
- @query_params[:results] = MAX_RESULTS
42
+ response = ''
68
43
 
69
- while query(@query_params) do
70
- # we get the next start point here
71
- @query_params[:start] += MAX_RESULTS
44
+ while @query_params[:start] < @params[:limit]
45
+ results_left = @params[:limit] - @query_params[:start]
72
46
 
73
- results_left = @options[:limit] - @query_params[:start]
74
- if results_left == 0
75
- break
76
- elsif results_left < MAX_RESULTS
47
+ if results_left < MAX_RESULTS
77
48
  @query_params[:results] = results_left
49
+ else
50
+ @query_params[:results] = MAX_RESULTS
51
+ end
52
+
53
+ r = get_response(@params)
54
+ if r
55
+ response << r
56
+ else
57
+ if response.empty?
58
+ response = nil
59
+ end
78
60
  break
79
61
  end
80
- end # while
81
-
82
- end # if
83
-
62
+ @query_params[:start] += @query_params[:results]
63
+ sleep(QUERY_INTERVAL)
64
+ end
65
+ end
66
+
67
+ response
84
68
  end # start
85
69
 
86
70
  private
87
-
88
- def query(params)
89
- q = create_query(params)
90
- tries = 0
71
+ # It returns a result string or <nil>.
72
+ # It tries three times in case of YANAPI::ExternalError,
73
+ # it gives up if an YANAPI::ContentError occures (returns <nil>).
74
+ def get_response(params)
75
+ query = create_query(params)
76
+ t = 0
91
77
  begin
92
- tries += 1
93
- result = q.get
94
- output(result)
95
- sleep(2)
96
- rescue YANAPI::EmptyResponse => e
97
- $sderr.puts e
98
- return false # do not iterate futher
99
- rescue => e # some errors to retry
100
- if (tries < 4)
101
- sleep(QUERY_INTERVAL**tries)
78
+ t += 1
79
+ response = query.get
80
+ rescue YANAPI::ExternalError => e
81
+ warn "External error: #{e}!"
82
+ if t <= 3
83
+ sleep(QUERY_INTERVAL**t)
102
84
  retry
103
85
  else
104
- $stderr.puts e
105
- return false # do not iterate futher
86
+ raise
87
+ end
106
88
  end
89
+
90
+ response
107
91
  end
108
- return true # we may iterate futher
109
- end
110
92
 
93
+ # It creates a new api object or raises an exception.
94
+ # YANAPI::UserError should not be rescued, it indicates that the user
95
+ # must correct the input hash.
111
96
  def create_query(params)
112
- eval("YANAPI::#{@options[:query_type]}.new(params)")
113
- end
114
-
115
- def output(result)
116
- if @options[:output_dir]
117
- save(result)
118
- else
119
- puts result
120
- end
121
- end
122
-
123
- # save results to a dir
124
- # this dir exists since has been proved by OptionParser
125
- # not a good implementation
126
- # interface (filename, data)
127
- def save(result)
128
- filename = File.join(@options[:output_dir], "#{@query_params[:start]}.#{@output_type}")
129
- file = File.new(filename, 'w')
130
- file.puts result
131
- file.close
97
+ YANAPI::API.new(params)
132
98
  end
133
99
 
134
100
  end # class Yanser
135
101
  end # module Yanser
102
+
103
+ __END__
@@ -1,138 +1,264 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  require 'test/unit'
3
3
  require 'yanser/opt_parser'
4
+ require 'stringio' # for helper methods
5
+
4
6
  include Yanser
7
+
5
8
  class TestOptionParser < Test::Unit::TestCase
6
- def setup
7
- @input = ['--appid', 'YahooDemo', '-r',
8
- 'de', '-l', '10', '-o', '/tmp',
9
- '-k' 'Haus AND grün']
10
- @output = {
11
- :query_params => {
12
- :appid => 'YahooDemo',
13
- :query => 'Haus AND grün',
14
- :region => 'de',
15
- :search_in => 'question'
16
- },
17
- :query_type => 'TermQuery',
18
- :output_dir => '/tmp',
19
- :limit => 10
20
- }
21
9
 
10
+ def setup
22
11
  @cmd_args = ['--appid', 'YahooDemo']
23
- @minimal_input = ['--appid', 'YahooDemo',
24
- '-k', 'Haus']
12
+ @minimal_input = ['--appid', 'YahooDemo', '-k', 'Haus']
13
+ @valid_opts = ['--appid', 'MyID',
14
+ '--category', '12345',
15
+ '--date-range', 'all',
16
+ '--exp-file', 'exp_file.yml',
17
+ '--filter', 'question',
18
+ '--help',
19
+ '--key-word', 'Haus',
20
+ '--key-word-list', 'some_file.txt',
21
+ '--limit', '1',
22
+ '--output-dir', '/tmp',
23
+ '--output-format', 'xml',
24
+ '--prefix', 'file1',
25
+ '--question-id', '12345',
26
+ '--region', 'de',
27
+ '--search-in', 'question',
28
+ '--sort', 'relevance',
29
+ '--start', '10',
30
+ '--time-interval', '10',
31
+ '--type', 'resolved',
32
+ '--user-id', '12345',
33
+ '--version'
34
+ ]
25
35
  end
26
36
 
27
37
  def teardown
28
38
  end
29
39
 
30
- # it should have a public class method 'parse'
40
+ # It should have a public class method 'parse'.
31
41
  def test_public_methods
32
42
  assert_respond_to(Yanser::OptParser, :parse)
33
43
  end
34
44
 
35
- # it should return a non empty parameter hash
45
+ # It should return a non empty parameter hash.
36
46
  def test_parse_method
37
- return_value = Yanser::OptParser.parse(@cmd_args)
38
- assert(return_value.instance_of? Hash)
39
- assert(! return_value.empty?)
47
+ return_value = Yanser::OptParser.parse(@minimal_input)
48
+ assert(return_value.instance_of?(Hash))
49
+ assert_equal(false, return_value.empty?)
40
50
  end
41
51
 
42
- # it should return a right complex output
43
- def test_output
44
- cmd_args = ['--appid', 'YahooDemo', '-r',
45
- 'de', '-l', '10', '-o', '/tmp',
46
- '-k' 'Haus AND grün'
47
- ]
48
- assert_equal(@output, Yanser::OptParser.parse(cmd_args))
52
+ # It should reject the empty input and exit.
53
+ def test_empty_input
54
+ out, err = intercept_output do
55
+ assert_raises(SystemExit) { OptParser.parse([]) }
56
+ end
57
+ assert_match(/You have to provide some options./, out)
49
58
  end
50
59
 
60
+ # It should accept correct options.
61
+ # Invalid options is the matter of OptionParser itself,
62
+ # do not test it here.
63
+ # We test only, that OP exits and does not raise an exception.
64
+ def test_accept_correct_options
65
+ # this options we should treat separately
66
+ @valid_opts.delete('--help')
67
+ @valid_opts.delete('--version')
68
+ assert_nothing_raised { OptParser.parse(@valid_opts) }
69
+
70
+ stdout, stderr = intercept_output do
71
+ assert_raises(SystemExit) { OptParser.parse(['--invalid-option']) }
72
+ end
73
+
74
+ assert_match(/You have provided an invalid option:/, stdout)
75
+ end
76
+
77
+ # It should successfully exit with some options.
78
+ def test_successful_exit
79
+ quietly do
80
+ success_args = ['-h', '--help', '-v', '--version']
81
+ success_args.each do |arg|
82
+ assert_raises(SystemExit) { OptParser.parse(arg.split) }
83
+ end
84
+ end
85
+ end
86
+
87
+ =begin
88
+ # It is done in YANAPI.
89
+ # It should require mandatory cmd arguments.
90
+ def test_mandatory_args
91
+ flunk('Rewrite!')
92
+ args = ['-k', 'Haus']
93
+ assert_raises(OptParserError) {OptParser.parse args}
94
+ end
95
+ =end
96
+
97
+ # It should accept only valid date arguments.
98
+ def test_date_arguments
99
+ validate(OptParser::DATE_RANGES, '--date-range', :date_range)
100
+ end
101
+
102
+ # It should accept a file and parse it.
103
+ def test_exp_file
104
+ flunk('Not implemented! It should accept a file and parse it.')
105
+ end
106
+
107
+ # It should accept only valid filter arguments.
108
+ def test_filter_arguments
109
+ validate(OptParser::FILTERS, '--filter', :filter)
110
+ end
111
+
112
+ # It should accept only numeric parameters for limits.
113
+ def test_limit_arguments
114
+ flunk('Not implemented! It should accept only numeric paramters for limits.')
115
+ end
51
116
 
52
- # it should set up the right method
53
- def test_method
117
+ # It should accept only valid output formats.
118
+ def test_output_arguments
119
+ validate(OptParser::OUTPUT_TYPES, '--output-format', :output)
120
+ end
121
+
122
+ # It should accept only valid regions.
123
+ def test_region_arguments
124
+ validate(OptParser::REGIONS, '-r', :region)
125
+ end
126
+
127
+ # It should accept only valid search locations.
128
+ def test_search_locations
129
+ validate(OptParser::SEARCH_LOCATIONS, '--search-in', :search_in)
130
+ end
131
+
132
+ # It should accept only valid sort types.
133
+ def test_sort_types
134
+ validate(OptParser::SORT_TYPES, '--sort', :sort)
135
+ end
136
+
137
+ # It should accept only valid types.
138
+ def test_types
139
+ validate(OptParser::TYPES, '--type', :type)
140
+ end
141
+
142
+ # It should set up the right search method.
143
+ def test_correct_setting_of_the_method
54
144
 
55
145
  cmd_args = ['-k', 'Haus'] | @cmd_args
56
- assert_equal('TermQuery', Yanser::OptParser.parse(cmd_args)[:query_type])
146
+ assert_equal('questionSearch', OptParser.parse(cmd_args)[:method])
57
147
 
58
148
  cmd_args = ['-k', 'Haus', '-c', '09876543'] | @cmd_args
59
- assert_equal('TermQuery', Yanser::OptParser.parse(cmd_args)[:query_type])
149
+ assert_equal('questionSearch', OptParser.parse(cmd_args)[:method])
60
150
 
61
151
  cmd_args = ['-c', '09876543'] | @cmd_args
62
- assert_equal('CategoryQuery', Yanser::OptParser.parse(cmd_args)[:query_type])
152
+ assert_equal('getByCategory', OptParser.parse(cmd_args)[:method])
63
153
 
64
154
  cmd_args = ['-c', 'Ausgehen'] | @cmd_args
65
- assert_equal('CategoryQuery', Yanser::OptParser.parse(cmd_args)[:query_type])
155
+ assert_equal('getByCategory', OptParser.parse(cmd_args)[:method])
66
156
 
67
157
  cmd_args = ['--user-id', '12345'] | @cmd_args
68
- assert_equal('UserQuery', Yanser::OptParser.parse(cmd_args)[:query_type])
158
+ assert_equal('getByUser', OptParser.parse(cmd_args)[:method])
69
159
 
70
160
  cmd_args = ['--question-id', '12345'] | @cmd_args
71
- assert_equal('QuestionQuery', Yanser::OptParser.parse(cmd_args)[:query_type])
161
+ assert_equal('getQuestion', OptParser.parse(cmd_args)[:method])
72
162
  end
73
163
 
74
- # it should set either :category_id or :category_name
164
+ # It should set either :category_id or :category_name.
75
165
  def test_category_identifier
76
166
  args = ['-k', 'Haus', '-c', '123'] | @cmd_args
77
167
  opts = Yanser::OptParser.parse(args)
78
168
  assert_equal('123', opts[:query_params][:category_id])
79
- assert(! opts[:query_params].has_key?(:category_name))
169
+ assert_equal(false, opts[:query_params].has_key?(:category_name))
80
170
 
81
171
  args = ['-k', 'Haus', '-c', 'Haushalt'] | @cmd_args
82
172
  opts = Yanser::OptParser.parse(args)
83
173
  assert_equal('Haushalt', opts[:query_params][:category_name])
84
- assert(! opts[:query_params].has_key?(:category_id))
85
-
174
+ assert_equal(false, opts[:query_params].has_key?(:category_id))
86
175
  end
87
176
 
88
- def test_output_dir
177
+ # It should set reasonable defaults.
178
+ # Yanser may set defaults, Yanapi provides minimal output, i.e. if Yahoo
179
+ # changes defaults, every user can be affected. That's whe we offer some
180
+ # reasonable defaults for our experiments.
181
+ # :region => 'de'
182
+ # :data_range => 'all'
183
+ def test_default_values
184
+ flunk('Not implemented! It should set reasonable defaults.')
185
+
89
186
  end
90
187
 
91
- def test_query
188
+ # It should return a right complex output.
189
+ def test_output
190
+ cmd_args = ['--appid', 'YahooDemo', '-r',
191
+ 'de', '-l', '10', '-o', '/tmp',
192
+ '-k' 'Haus AND grün'
193
+ ]
194
+ output = {
195
+ :query_params => {
196
+ :appid => 'YahooDemo',
197
+ :query => 'Haus AND grün',
198
+ :region => 'de',
199
+ :search_in => 'question'
200
+ },
201
+ :method => 'questionSearch',
202
+ :output_dir => '/tmp',
203
+ :limit => 10
204
+ }
205
+ assert_equal(output, OptParser.parse(cmd_args))
92
206
  end
93
207
 
94
- # it should reject the empty input
95
- def test_empty_input
96
- assert_raises(OptParserError) {OptParser.parse []}
208
+ # It should set correct parameters for logging.
209
+ def test_log_arguments
210
+ flunk('Not implemented. It should set correct parameters for logging.')
97
211
  end
98
212
 
99
- # it should accept only valid regions
100
- def test_region_validity
101
- regions = ['de', 'us', 'uk', 'ca', 'au', 'in', 'es',
102
- 'br', 'ar', 'mx', 'e1', 'it', 'fr', 'sg']
103
- regions.each do |r|
104
- input = @minimal_input | ['-r', r]
105
- assert_nothing_raised(Yanser::OptParserError) {Yanser::OptParser.parse(input)}
213
+ # It should set correct parameters for data extraction.
214
+ def test_extraction_arguments
215
+ flunk('Not implemented. It should set correct parameters for data extraction.')
216
+ end
217
+ ################################################################################
218
+ # It is a helper method, many testable units provide some verbose output
219
+ # to stderr and/or stdout. It is usefull to suppress any kind of verbosity.
220
+ def quietly(&b)
221
+ begin
222
+ orig_stderr = $stderr.clone
223
+ orig_stdout = $stdout.clone
224
+ $stderr.reopen(File.new('/dev/null', 'w'))
225
+ $stdout.reopen(File.new('/dev/null', 'w'))
226
+ b.call
227
+ ensure
228
+ $stderr.reopen(orig_stderr)
229
+ $stdout.reopen(orig_stdout)
106
230
  end
107
- input = @minimal_input | ['-r', 'abc']
108
- assert_raises(Yanser::OptParserError) {Yanser::OptParser.parse(input)}
109
231
  end
110
232
 
111
- # it should set reasonable defaults
112
- def test_default_values
233
+ # It is a helper method for validation of used parameter values.
234
+ def validate(params, option, key)
235
+ params.each do |p|
236
+ input = @minimal_input | [option, p]
237
+ params = {}
238
+ assert_nothing_raised(SystemExit) { params = OptParser.parse(input) }
239
+ assert_equal(p, params[:query_params][key])
240
+ end
241
+
242
+ input = @minimal_input | [option, 'invalid']
243
+ stdout, stderr = intercept_output do
244
+ assert_raises(SystemExit) { OptParser.parse(input) }
245
+ end
246
+ assert_match(/The provided .+ is currently not supported by Yahoo!/,
247
+ stdout)
113
248
  end
114
249
 
115
- # it should accept only valid query type combinations
116
- def test_qtype_combinations
117
- end
250
+ # It is a helper method for handling stdout and stderr as strings.
251
+ def intercept_output
252
+ orig_stdout = $stdout
253
+ orig_stderr = $stderr
254
+ $stdout = StringIO.new
255
+ $stderr = StringIO.new
118
256
 
119
- # it should require mandatory cmd arguments
120
- def test_mandatory_args
121
- args = ['-k', 'Haus']
122
- assert_raises(OptParserError) {OptParser.parse args}
123
- end
257
+ yield
124
258
 
125
- # it should successfully exit with some options
126
- def test_successful_exit
127
- orig_stderr = $stderr.clone
128
- orig_stdout = $stdout.clone
129
- $stderr.reopen(File.new('/dev/null', 'w'))
130
- $stdout.reopen(File.new('/dev/null', 'w'))
131
- success_args = ['-h', '--help', '-v', '--version']
132
- success_args.each do |arg|
133
- assert_raises(SystemExit) {OptParser.parse(arg.to_a)}
134
- end
135
- $stderr.reopen(orig_stderr)
136
- $stdout.reopen(orig_stdout)
259
+ return $stdout.string, $stderr.string
260
+ ensure
261
+ $stdout = orig_stdout
262
+ $stderr = orig_stderr
137
263
  end
138
264
  end
@@ -10,6 +10,7 @@ class TestYanser < Test::Unit::TestCase
10
10
  def teardown
11
11
  end
12
12
 
13
+ # It should have a public method <start>.
13
14
  def test_public_methods
14
15
  yanser = Yanser::Yanser.new(
15
16
  :query_params => {
@@ -26,10 +27,15 @@ class TestYanser < Test::Unit::TestCase
26
27
  assert_respond_to(yanser, :start)
27
28
  end
28
29
 
30
+ # It should have a constant <VERSION>.
29
31
  def test_constants
30
32
  assert(Yanser::VERSION.instance_of?(String))
31
33
  end
32
- def test_mandatory_options
34
+
35
+ # It should accept input according to the defined interface.
36
+ def test_input
37
+ flunk('Not implemented. It should accept input according to the defined interface.')
33
38
  end
39
+
34
40
 
35
41
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yanser
3
3
  version: !ruby/object:Gem::Version
4
- hash: 25
4
+ hash: 27
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
+ - 1
8
9
  - 0
9
- - 3
10
- version: 0.0.3
10
+ version: 0.1.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Andrei Beliankou
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-08-06 00:00:00 Z
18
+ date: 2011-08-12 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: yanapi
@@ -23,17 +23,17 @@ dependencies:
23
23
  requirement: &id001 !ruby/object:Gem::Requirement
24
24
  none: false
25
25
  requirements:
26
- - - "="
26
+ - - ">="
27
27
  - !ruby/object:Gem::Version
28
- hash: 25
28
+ hash: 17
29
29
  segments:
30
30
  - 0
31
+ - 3
31
32
  - 1
32
- - 1
33
- version: 0.1.1
33
+ version: 0.3.1
34
34
  type: :runtime
35
35
  version_requirements: *id001
36
- description: YANSER (Yahoo! ANSwers harvestER) is a convinient search tool providing access to the Yahoo! Answers Q&A collection. Based on YANAPI it provides a simple CLI and helps to search for Questions and Answers which contain a set of key words, belong to a specific semantic domain or are posted by a certain user. Yanser is a research tool in the field of Computational Linguistics.
36
+ description: YANSER (Yahoo! ANSwers harvestER) is a convinient search tool providing access to the Yahoo! Answers Q&A collection. Based on YANAPI it provides a simple CLI and helps to search for Questions and Answers which contain a set of key words, belong to a specific semantic domain or are posted by a certain user. Yanser is a research tool in the field of Computational Linguistics e.g for Question Answering.
37
37
  email: a.belenkow@uni-trier.de
38
38
  executables:
39
39
  - yanser
@@ -42,25 +42,27 @@ extensions: []
42
42
  extra_rdoc_files:
43
43
  - README
44
44
  - LICENSE
45
+ - CHANGELOG
45
46
  files:
46
- - lib/tester.rb
47
47
  - lib/yanser/error.rb
48
+ - lib/yanser/store.rb
48
49
  - lib/yanser/yanser.rb
49
50
  - lib/yanser/opt_parser.rb
50
51
  - lib/yanser/version.rb
51
52
  - bin/yanser
52
- - LICENSE
53
- - Rakefile
54
53
  - README
54
+ - LICENSE
55
+ - CHANGELOG
55
56
  - test/test_opt_parser.rb
56
57
  - test/test_yanser.rb
58
+ - .yardopts
57
59
  homepage: http://www.uni-trier.de/index.php?id=34451
58
60
  licenses: []
59
61
 
60
62
  post_install_message:
61
63
  rdoc_options:
62
64
  - -m
63
- - README.rdoc
65
+ - README
64
66
  require_paths:
65
67
  - lib
66
68
  required_ruby_version: !ruby/object:Gem::Requirement
@@ -86,7 +88,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
86
88
  requirements: []
87
89
 
88
90
  rubyforge_project: yanser
89
- rubygems_version: 1.7.2
91
+ rubygems_version: 1.8.7
90
92
  signing_key:
91
93
  specification_version: 3
92
94
  summary: Yanser is a convinient search tool providing access to the Yahoo! Answers Q/A collection.
data/Rakefile DELETED
@@ -1,41 +0,0 @@
1
- # We need rake to user FileLists.
2
- require 'rake'
3
- # We can require 'rake/clean' to add 'clobber' and 'clean' tasks.
4
- require 'rake/clean'
5
-
6
-
7
- # I am not sure how to use this constant.
8
- SRC = FileList['**/*.rb']
9
-
10
- CLOBBER.include('doc', '**/*.html', '**/*.gem')
11
-
12
- # testing
13
- require 'rake/testtask'
14
- Rake::TestTask.new do |t|
15
- t.test_files = FileList.new('test/**/*.rb').to_a
16
- # rake starts an other ruby process with a new options set.
17
- # ruby --some-option -S rake is not sufficient to propagate
18
- # the option "--some-option".
19
- t.ruby_opts = ['-rubygems']
20
- end
21
-
22
- # Build the gem package
23
- load 'yanser.gemspec'
24
- require 'rubygems/package_task'
25
- Gem::PackageTask.new(GEMSPEC).define
26
-
27
- # Generate documentation
28
- require 'rdoc/task'
29
- RDoc::Task.new do |rdoc|
30
- rdoc.rdoc_files.include('README', 'LICENSE', 'lib/**/*')
31
- end
32
-
33
- desc "Open an irb session preloaded with this library."
34
- task :console do
35
- sh "irb -rubygems -I lib -r yanser/opt_parser -r yanser/yanser"
36
- end
37
-
38
- desc 'Run the main executable file of the project.'
39
- task :start do
40
- sh "ruby -w -rubygems -I lib bin/yanser #{ENV['cmd']}"
41
- end
@@ -1,18 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- require 'yanser'
4
-
5
- params = {
6
- :query_method => 'TermQuery',
7
- :query_params => {
8
- :appid => 'YahooDemo',
9
- :query => 'Köln',
10
- :region => 'de',
11
- :results => 5,
12
- :start => 0
13
- }
14
- }
15
-
16
- y = Yanser.new(params)
17
-
18
- y.start