yanser 0.0.3 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,6 @@
1
+ --private
2
+ --protected
3
+ --title 'YANSER - Yahoo! ANSwers harvestER'
4
+ -
5
+ CHANGELOG
6
+ LICENSE
@@ -0,0 +1,24 @@
1
+ === COMPLETED
2
+ ==== 0.1.0
3
+ Yanser supportst YANAPI 0.3.1 and further now.
4
+ ==== 0.0.3
5
+ Yanser depends on the old interface of YANAPI. No functional improvements.
6
+ ==== 0.0.2
7
+ Small changes in the documentation.
8
+ ==== 0.0.1
9
+ Initial release of Yanser.
10
+
11
+
12
+ === PLANNED
13
+
14
+ ==== 0.0.2
15
+ ==== 0.0.3
16
+ ==== 0.4.0
17
+ ==== 0.5.0
18
+ ==== 0.6.0
19
+ ==== 0.7.0
20
+ ==== 0.8.0
21
+ ==== 0.9.0
22
+ ==== 1.0.0
23
+
24
+
data/README CHANGED
@@ -2,20 +2,36 @@
2
2
 
3
3
  * {RubyGems}[http://rubygems.org/gems/yanser]
4
4
  * Developers {Homepage}[http://www.uni-trier.de/index.php?id=24140]
5
- * {YANAPI Project Page}[http://yanser.rubyforge.org/]
5
+ * {YANSER Project Page}[http://yanser.rubyforge.org/]
6
6
 
7
7
  == DESCRIPTION
8
8
 
9
- YANSER (Yahoo! ANSwers harvestER) is a convinient search tool providing access to the Yahoo! Answers Q&A collection. Based on YANAPI it provides a simple CLI and helps to search for Questions and Answers which contain a set of key words, belong to a specific semantic domain or are posted by a certain user. Yanser is a research tool in the field of Computational Linguistics.
9
+ YANSER (Yahoo! ANSwers harvestER) is a convinient search tool providing access
10
+ to the Yahoo! Answers Q&A collection. Based on YANAPI it provides a simple CLI
11
+ and helps to search for Questions and Answers which contain a set of key words,
12
+ belong to a specific semantic domain or are posted by a certain user.
13
+
14
+ Yanser is a research tool in the field of Computational Linguistics.
10
15
 
11
16
  == SYNOPSIS
12
17
  $ yanser --help
13
18
 
19
+ == FEATURE LIST
20
+
21
+ == INSTALLATION
22
+
23
+ == USAGE
24
+
25
+ == CHANGELOG
26
+
27
+ See CHANGELOG.
28
+
29
+ == CONTRIBUTORS
14
30
 
15
31
 
16
32
  == LICENSE
17
33
 
18
34
  YANSER is a copyrighted software by Andrei Beliankou, 2011.
35
+
19
36
  You may use, redistribute and change it under the terms
20
37
  provided in the LICENSE file.
21
-
data/bin/yanser CHANGED
@@ -1,19 +1,36 @@
1
1
  # -*- coding: utf-8; mode: ruby -*-
2
-
3
- # это неверное решение, на самом деле я не должен напрямую указывать,
4
- # где находятся файлы, но пока пусть будет так
5
- # it is required only for .tar.gz distribution
6
- #lib_path = File.expand_path('../../lib', __FILE__)
7
- #$LOAD_PATH.unshift(lib_path) unless $LOAD_PATH.include?(lib_path)
8
-
9
- # подобный способ является правильным, только так классы Yanser и OptionParser будут независимы
10
2
  require 'yanser/yanser'
11
3
  require 'yanser/opt_parser'
4
+ require 'yanser/store'
12
5
 
13
- # class method, why not?
14
- options = Yanser::OptParser.parse(ARGV)
15
-
6
+ begin
7
+ options = Yanser::OptParser.parse(ARGV)
8
+ rescue
9
+ raise
10
+ end
16
11
 
17
- yanser = Yanser::Yanser.new(options)
12
+ # Handle here exceptions from YANAPI, use messages for it.
13
+ # Add "See <yanser --help>." to it.
14
+ begin
15
+ yanser = Yanser::Yanser.new(options)
16
+ result = yanser.start
17
+ rescue YANAPI::UserError
18
+ # Give up with an error message.
19
+ # The user must change the input.
20
+ raise
21
+ rescue YANAPI::ContentError
22
+ # Yahoo! says somethig.
23
+ raise
24
+ rescue YANAPI::ExternalError
25
+ # Give up with an error message.
26
+ # It is a persistent external error.
27
+ # We tried three times in Yanser, but it seems to go kept.
28
+ raise
29
+ end
18
30
 
19
- yanser.start
31
+ begin
32
+ storage = Yanser::Store.new(options)
33
+ storage.store(result)
34
+ rescue
35
+ raise
36
+ end
@@ -1,11 +1,12 @@
1
- # -*- conding: utf-8 -*-
1
+ # -*- coding: utf-8 -*-
2
2
 
3
3
  module Yanser
4
- class OptParserError < RuntimeError
4
+ class Error < RuntimeError; end
5
+ class OptParserError < Error
5
6
  # empty for now
6
7
  end
7
8
 
8
- class YanserError < RuntimeError
9
+ class YanserError < Error
9
10
  # empty for now
10
11
  end
11
12
  end
@@ -7,41 +7,54 @@ require 'yanser/error'
7
7
 
8
8
  module Yanser
9
9
  class OptParser
10
- # OP expects cmd_args to be an array like ARGV
11
- # dummy output for temporary usage
10
+
11
+ # Different possible values accepted by Yahoo!.
12
+ REGIONS = ['de', 'us', 'uk', 'ca', 'au', 'in', 'es',
13
+ 'br', 'ar', 'mx', 'e1', 'it', 'fr', 'sg']
14
+ TYPES = ['all', 'resolved', 'open', 'undecided']
15
+ SORT_TYPES = ['relevance', 'date_desc', 'date_asc']
16
+ DATE_RANGES = ['all', '7', '7-30', '30-60', '60-90', 'more90']
17
+ SEARCH_LOCATIONS = ['all', 'question', 'best_answer']
18
+ OUTPUT_TYPES = ['xml', 'json', 'php', 'rss']
19
+ FILTERS = ['question', 'best_answer']
20
+
21
+ # Main class method.
22
+ # OP expects cmd_args to be an array like ARGV.
12
23
  def self.parse(cmd_args)
13
24
  @@options = {}
14
25
  @@options[:query_params] = {}
15
26
 
16
27
  parser = create_parser
17
28
 
18
- # if no options provided print the help
29
+ # If no options provided print the help.
19
30
  if cmd_args.empty?
20
- msg = "You have to provide some options. " +
21
- "yanser --help provides more information."
22
- raise OptParserError, msg
23
- # $stderr.printf "You have to provide some options.\n\n"
24
- # puts parser
25
- # exit 1
31
+ puts('You have to provide some options.',
32
+ 'Please start with <yanser --help>.')
33
+ exit(1)
26
34
  end
27
35
 
28
36
  # Parse ARGV and provide the options hash.
29
37
  # Check if everything is correct and handle exceptions
30
38
  begin
31
- parser.parse!(cmd_args)
32
- # rescue all exceptions from OptionParser
33
- rescue => e
34
- raise OptParserError, e.message
35
- # $stderr.printf "#{e.message.capitalize}\n\n"
36
- # puts parser
37
- # exit 1
39
+ parser.parse(cmd_args)
40
+ rescue OptionParser::InvalidArgument => e
41
+ arg = e.message.split.last
42
+ puts "The provided argument #{arg} is currently not supported by Yahoo!"
43
+ puts 'Please colsult <yanser --help>.'
44
+ exit(1)
45
+ rescue OptionParser::InvalidOption => e
46
+ puts "You have provided an #{e.message}."
47
+ puts 'Please colsult <yanser --help>.'
48
+ exit(1)
49
+ rescue
50
+ raise
38
51
  end
39
52
 
40
53
  # Check to see if we got the required arguments needed.
41
- check_required_options(@@options)
54
+ # check_required_options(@@options)
42
55
 
43
56
  # Set the search method.
44
- @@options[:query_type] = set_query_type(@@options[:query_params])
57
+ @@options[:method] = set_query_type(@@options[:query_params])
45
58
 
46
59
  # Set some defaults such as :region and :search_in
47
60
  @@options = set_defaults(@@options)
@@ -50,34 +63,23 @@ module Yanser
50
63
  end # parse
51
64
 
52
65
  private
53
-
54
- # Check if the value of given region is correct.
55
- # Now 14 regions are supported by Yahoo! Answers.
56
- def self.prove_region(region)
57
- regions = ['de', 'us', 'uk', 'ca', 'au', 'in', 'es',
58
- 'br', 'ar', 'mx', 'e1', 'it', 'fr', 'sg']
59
-
60
- unless regions.include?(region)
61
- msg = "The provided search region #{region} is currently not supported by Yahoo!"
62
- raise OptParserError, msg
63
- end
64
- end
65
-
66
+
66
67
  # define one of the following query types: TermQuery|CategoryQuery|
67
68
  # QuestionQuery|UserQuery
68
69
  def self.set_query_type(params)
69
70
  case
70
71
  when (params[:category_id] || params[:category_name]) && ! params[:query]
71
- 'CategoryQuery'
72
+ 'getByCategory'
72
73
  when params[:query]
73
- 'TermQuery'
74
+ 'questionSearch'
74
75
  when params[:user_id]
75
- 'UserQuery'
76
+ 'getByUser'
76
77
  when params[:question_id]
77
- 'QuestionQuery'
78
+ 'getQuestion'
78
79
  end
79
80
  end
80
-
81
+
82
+ =begin
81
83
  def self.check_required_options(options)
82
84
  required_opts = [:appid]
83
85
 
@@ -85,31 +87,19 @@ module Yanser
85
87
  if options[:query_params].has_key?(opt)
86
88
  next
87
89
  else
88
- msg = "A required option --#{opt} is missing."
89
- raise OptParserError, msg
90
+ puts "The required option --#{opt} is missing."
91
+ puts 'Please colsult <yanser --help>.'
92
+ exit(1)
90
93
  end
91
94
  end
92
95
  end
93
-
94
- def self.provide_dir(dir)
95
- dir = File.expand_path(dir)
96
- #check for existens
97
- if File.directory?(dir)
98
- if File.writable?(dir)
99
- return dir
100
- else
101
- $stderr.puts 'The directory you have provided is not writable!'
102
- exit 1
103
- end
104
- else
105
- FileUtils.mkdir_p(dir)
106
- return dir
107
- end
108
- end # provide_dir
96
+ =end
97
+
109
98
 
110
99
  def self.set_defaults(opts)
111
100
  # fake method for now
112
- return opts
101
+
102
+ opts
113
103
  end # set_defaults
114
104
 
115
105
  def self.create_parser
@@ -133,7 +123,9 @@ module Yanser
133
123
  opts.separator ' Mandatory search arguments:'
134
124
 
135
125
  opts.on('-k', '--key-word KEYWORD',
136
- 'Provide a single keyword or a boolean expression.'
126
+ 'Provide a single keyword or a boolean expression.',
127
+ 'You might combine this option with an explicit',
128
+ '<category> name or id to restrict your search.'
137
129
  ) do |keyword|
138
130
  @@options[:query_params][:query] = keyword
139
131
  # not a solution!!!
@@ -141,10 +133,12 @@ module Yanser
141
133
 
142
134
  end
143
135
 
144
- opts.separator ''
136
+ opts.separator ' and/or'
145
137
 
146
138
  opts.on('-c', '--category CATEGORY',
147
- 'Provide a category name or ID.'
139
+ 'Provide a category name or ID. This is useful',
140
+ 'while searching for categories or in the combination',
141
+ 'with some key words.'
148
142
  ) do |category|
149
143
 
150
144
  if category =~ /^[[:digit:]]+$/
@@ -155,7 +149,7 @@ module Yanser
155
149
 
156
150
  end
157
151
 
158
- opts.separator ''
152
+ opts.separator ' or'
159
153
 
160
154
  opts.on('--user-id ID',
161
155
  'Provide an user ID of questions you search for.',
@@ -164,7 +158,7 @@ module Yanser
164
158
  @@options[:query_params][:user_id] = user_id
165
159
  end
166
160
 
167
- opts.separator ''
161
+ opts.separator ' or'
168
162
 
169
163
  opts.on('--question-id ID',
170
164
  'Provide a question ID of the question you search for.',
@@ -172,32 +166,33 @@ module Yanser
172
166
  ) do |question_id|
173
167
  @@options[:query_params][:question_id] = question_id
174
168
  end
175
-
169
+
170
+ #################
176
171
  opts.separator ''
177
172
  opts.separator ' Optional search arguments:'
178
-
179
- opts.on('-r', '--region REGION',
180
- 'Provide a geographic region to search in for terms.',
181
- 'Possible values are: de, us, uk, ca, au, in, es, br,',
182
- ' ar, mx, e1, it, fr, sg.',
183
- 'This defaults to en.'
184
- ) do |region|
185
- # todo
186
- prove_region(region)
187
- @@options[:query_params][:region] = region
173
+ opts.on('--date-range DATE', DATE_RANGES,
174
+ 'Provide some date range for the search. It defaults to <all>.',
175
+ "Possible values are: <#{DATE_RANGES.join('>, <')}>."
176
+ ) do |date|
177
+ @@options[:query_params][:date_range] = date
188
178
  end
189
-
179
+
190
180
  opts.separator ''
191
-
192
- opts.on('-o', '--output-dir DIR',
193
- 'Provide an output folder.',
194
- 'This directory will be created if it does not exist yet.'
195
- ) do |output_dir|
196
- @@options[:output_dir] = provide_dir(output_dir)
181
+ opts.on('-f', '--output-format FORMAT', OUTPUT_TYPES,
182
+ "Provide an output format: <#{OUTPUT_TYPES.join('>, <')}>.",
183
+ 'It defaults to <xml>, you may simply ommit this option.'
184
+ ) do |f|
185
+ @@options[:query_params][:output] = f
197
186
  end
198
187
 
199
188
  opts.separator ''
189
+ opts.on('--filter TYPE', FILTERS,
190
+ "Possible values are: <#{FILTERS.join('>, <')}>."
191
+ ) do |type|
192
+ @@options[:query_params][:filter] = type
193
+ end
200
194
 
195
+ opts.separator ''
201
196
  opts.on('-l', '--limit NUMBER', Integer,
202
197
  'Provide a number of answers you want to get from Yahoo.',
203
198
  'This argument is not mandatory, if you want to get',
@@ -206,12 +201,12 @@ module Yanser
206
201
  @@options[:limit] = limit
207
202
  end
208
203
 
209
- opts.separator ''
210
- opts.on('-f', '--output-format FORMAT',
211
- 'Provide an output format: xml, json, rss, php.',
212
- 'It defaults to xml, and you can simply ommit this option.'
213
- ) do |f|
214
- raise NotImplementedError, 'Only default output format is implemented'
204
+ opts.separator ''
205
+ opts.on('-o', '--output-dir DIR',
206
+ 'Provide an output folder.',
207
+ 'This directory will be created if it does not exist yet.'
208
+ ) do |output_dir|
209
+ @@options[:output_dir] = output_dir
215
210
  end
216
211
 
217
212
  opts.separator ''
@@ -222,23 +217,92 @@ module Yanser
222
217
  'It can be useful if you want to put many query results',
223
218
  'in the same output folder.'
224
219
  ) do |pref|
225
- raise NotImplementedError, 'No prefixes implemeted.'
220
+ @@options[:prefix] = pref
226
221
  end
227
222
 
228
223
  opts.separator ''
229
- opts.on('--key-word-list FILE',
230
- 'Provide a file with key words or phrases (boolean syntax',
231
- 'allowed) one search token per line.'
232
- ) do |file_name|
233
- raise NotImplementedError
224
+ opts.separator ''
225
+ opts.on('-r', '--region REGION', REGIONS,
226
+ 'Provide a geographic region to search in for terms.',
227
+ 'Possible values are: de, us, uk, ca, au, in, es, br,',
228
+ ' ar, mx, e1, it, fr, sg.',
229
+ 'This defaults to <en>.'
230
+ ) do |region|
231
+
232
+ @@options[:query_params][:region] = region
234
233
  end
235
234
 
236
235
  opts.separator ''
236
+ opts.on('--search-in PLACE', SEARCH_LOCATIONS,
237
+ "Possible values are: <#{SEARCH_LOCATIONS.join('>, <')}>."
238
+ ) do |place|
239
+ @@options[:query_params][:search_in] = place
240
+ end
241
+
242
+ opts.separator ''
243
+ opts.on('--start POSITION',
244
+ 'Provide the position of the question in the search list',
245
+ 'you want start with.'
246
+ ) do |pos|
247
+ @@options[:query_params][:start] = pos
248
+ end
249
+
250
+ opts.separator ''
251
+ opts.on('--sort ORDER', SORT_TYPES,
252
+ "Possible values are: <#{SORT_TYPES.join('>, <')}>."
253
+ ) do |order|
254
+ @@options[:query_params][:sort] = order
255
+ end
256
+
257
+ opts.separator ''
258
+ opts.on('--type TYPE', TYPES,
259
+ "Possible values are: <#{TYPES.join('>, <')}>."
260
+ ) do |type|
261
+ @@options[:query_params][:type] = type
262
+ end
263
+
264
+
265
+ #################
266
+ opts.separator ""
267
+ opts.separator ' Experimental options. They may not be implemented yet.'
237
268
  opts.on('--exp-file FILE',
238
269
  'Provide a formal description of the experiment.'
239
270
  ) do |file_name|
240
- raise NotImplementedError
271
+ warn 'Not yet implemented!'
272
+ end
273
+
274
+ opts.separator ''
275
+ opts.on('--key-word-list FILE',
276
+ 'Provide a file with key words or phrases',
277
+ '(boolean syntax allowed), one search token per line.'
278
+ ) do |file_name|
279
+ warn 'Not yet implemented!'
280
+ end
281
+
282
+ opts.separator ''
283
+ opts.on('--log [LOGFILE]',
284
+ 'SOME DESCRIPTION'
285
+ ) do |file|
286
+ warn 'Add description!'
287
+ warn 'Not yet implemented!'
288
+ end
289
+
290
+ opts.separator ''
291
+ opts.on('--extract',
292
+ 'SOME DESCRIPTION'
293
+ ) do
294
+ # some non xml representation in a human readable format
295
+ warn 'Add description!'
296
+ warn 'Not yet implemented!'
241
297
  end
298
+ opts.separator ''
299
+ opts.on('-t', '--time-interval SECONDS', Integer,
300
+ 'SOME DESCRIPTION'
301
+ ) do |t|
302
+ @@options[:interval] = t
303
+ warn 'Add description!'
304
+ end
305
+
242
306
  opts.separator ""
243
307
  opts.separator "Common options:"
244
308
 
@@ -261,20 +325,24 @@ __END__
261
325
  # List of options planned for implementation.
262
326
  --appid
263
327
  -k, --key-word
264
- -c, --category-id # Term & CategorySearch
328
+ -c, --category # Term & CategorySearch
265
329
  --user-id
266
330
  --question-id # QuestionSearch
267
331
  -r, --region
268
332
  -f, --output-format
269
- -l, --limit
270
333
  --date-range
271
334
  --sort
335
+ --start
272
336
  --filter
337
+ --callback # bad idea
338
+ --type
339
+ --search-in
273
340
 
274
341
 
342
+ -l, --limit
275
343
  --prefix #prefix for output files
276
344
  --key-word-list
277
- --exp-file
345
+ --exp-file # experiment description, yaml
278
346
  -o, --output-dir
279
347
  -h, --help
280
348
  -v, --version
@@ -306,8 +374,9 @@ instance interface
306
374
  :type => "all" | "resolved" | "open" | "undecided", # default 'all'
307
375
  :user_id => '123456'
308
376
  },
309
- :query_type => 'TermQuery'|'QuestionQuery'|'UserQuery'|'CategoryQuery',
377
+ :interval => Integer, # in seconds
378
+ :limit => Integer,
379
+ :method => 'TermQuery'|'QuestionQuery'|'UserQuery'|'CategoryQuery',
310
380
  :output_dir => 'some path',
311
- :prefix => 'some prefix', # prefix for output files
312
- :limit => Integer
381
+ :prefix => 'some prefix' # prefix for output files
313
382
  }
@@ -0,0 +1,48 @@
1
+ require 'fileutils'
2
+
3
+ module Yanser
4
+ class Store
5
+ def initialize(params)
6
+ @params = params
7
+ end
8
+
9
+ def store(result)
10
+ if dir = @params[:output_dir]
11
+ provide_dir(dir)
12
+ save(result)
13
+ else
14
+ puts result
15
+ end
16
+ end
17
+
18
+ private
19
+
20
+ # rework!
21
+ def provide_dir(dir)
22
+ dir = File.expand_path(dir)
23
+ #check for existens
24
+ if File.directory?(dir)
25
+ unless File.writable?(dir)
26
+ warn 'The directory you have provided is not writable!'
27
+ exit(1)
28
+ end
29
+ else
30
+ FileUtils.mkdir_p(dir)
31
+ end
32
+ end
33
+
34
+ # save results to a dir
35
+ # not a good implementation
36
+ # interface (filename, data)
37
+ def save(result)
38
+ filename = File.join(@params[:output_dir],
39
+ "yanser_output.xml")
40
+ File.open(filename, 'w') do |file|
41
+ file.puts result
42
+ end
43
+ end
44
+ end # Store
45
+ end
46
+
47
+
48
+
@@ -1,3 +1,3 @@
1
1
  module Yanser
2
- VERSION = '0.0.3'
2
+ VERSION = '0.1.0'
3
3
  end
@@ -1,29 +1,10 @@
1
- # this solution is not ideal since I force people to use 'rubygems'
2
- # possible it would be a better solution to prompt at this point
3
- # to install the lib in the way the user is accustomed to
4
- =begin
5
- begin
6
- require 'yanapi'
7
- rescue LoadError
8
- require 'rubygems'
9
- require 'yanapi'
10
- end
11
- =end
12
1
  require 'yanapi'
13
2
 
14
3
  # :title: YANSER, Yahoo! ANSwers harvestER
15
- # :main: Yanser
4
+ # :main: README
5
+
16
6
  # Main processing class.
17
- # Yanser encapsulates the main routine and instantiates
18
- # all other classes.
19
- #--
20
- # Yanser takes the users input and validates it.
21
- # It decides which search method to choose.
22
- # Then it collects all parameters and useful default values,
23
- # creates an XyzQuery with the starting point of 0 and gets the first results.
24
- # If more results were requested, Yanser creates a similar XyzQuery and gets
25
- # the next result set until the result limitation set by the user is met.
26
- #
7
+ # Yanser encapsulates the main routine and instantiates all other classes.
27
8
  module Yanser
28
9
 
29
10
  class Yanser
@@ -38,98 +19,85 @@ module Yanser
38
19
  # Yahoo! Answers returns maximum 50 results.
39
20
  MAX_RESULTS = 50
40
21
 
41
- def initialize(opts)
42
-
43
- # the minimal output of an OptionParser
44
- # {:query_type=>u|q|c|w, :query_params=>{appid, start, results, query}}
45
- # opts come from the OptionParser
46
- # they are supposed to be correct, no validation here
47
- @options = opts
48
-
49
- @options[:limit] = @options[:limit] || START_LIMIT + MAX_RESULTS
22
+ def initialize(params)
23
+ # Libs are not allowed to mutate the input.
24
+ @params = params.clone
50
25
 
51
- @query_params = @options[:query_params]
26
+ @params[:limit] = @params[:limit] || START_LIMIT + MAX_RESULTS
52
27
 
53
- @output_type = @query_params[:output] || 'xml'
28
+ @query_params = @params[:query_params]
54
29
 
30
+ @output = @query_params[:output] || 'xml'
31
+
32
+ # Yahoo counts results beginning with 0.
33
+ # <:results=50> means all questins from 0 to 49.
55
34
  @query_params[:start] = @query_params[:start] || 0
56
35
  end
57
36
 
58
- # TODO: implement some logging
37
+ # It returns the accumulated string or <nil>, if nothing found.
59
38
  def start
60
-
61
- if @options[:query_type] == 'QuestionQuery'
62
- query(@query_params)
63
- elsif @options[:limit] < MAX_RESULTS
64
- @query_params[:results] = @options[:limit]
65
- query(@query_params)
39
+ if @params[:method] == 'getQuestion'
40
+ response = get_response(@params)
66
41
  else
67
- @query_params[:results] = MAX_RESULTS
42
+ response = ''
68
43
 
69
- while query(@query_params) do
70
- # we get the next start point here
71
- @query_params[:start] += MAX_RESULTS
44
+ while @query_params[:start] < @params[:limit]
45
+ results_left = @params[:limit] - @query_params[:start]
72
46
 
73
- results_left = @options[:limit] - @query_params[:start]
74
- if results_left == 0
75
- break
76
- elsif results_left < MAX_RESULTS
47
+ if results_left < MAX_RESULTS
77
48
  @query_params[:results] = results_left
49
+ else
50
+ @query_params[:results] = MAX_RESULTS
51
+ end
52
+
53
+ r = get_response(@params)
54
+ if r
55
+ response << r
56
+ else
57
+ if response.empty?
58
+ response = nil
59
+ end
78
60
  break
79
61
  end
80
- end # while
81
-
82
- end # if
83
-
62
+ @query_params[:start] += @query_params[:results]
63
+ sleep(QUERY_INTERVAL)
64
+ end
65
+ end
66
+
67
+ response
84
68
  end # start
85
69
 
86
70
  private
87
-
88
- def query(params)
89
- q = create_query(params)
90
- tries = 0
71
+ # It returns a result string or <nil>.
72
+ # It tries three times in case of YANAPI::ExternalError,
73
+ # it gives up if an YANAPI::ContentError occures (returns <nil>).
74
+ def get_response(params)
75
+ query = create_query(params)
76
+ t = 0
91
77
  begin
92
- tries += 1
93
- result = q.get
94
- output(result)
95
- sleep(2)
96
- rescue YANAPI::EmptyResponse => e
97
- $sderr.puts e
98
- return false # do not iterate futher
99
- rescue => e # some errors to retry
100
- if (tries < 4)
101
- sleep(QUERY_INTERVAL**tries)
78
+ t += 1
79
+ response = query.get
80
+ rescue YANAPI::ExternalError => e
81
+ warn "External error: #{e}!"
82
+ if t <= 3
83
+ sleep(QUERY_INTERVAL**t)
102
84
  retry
103
85
  else
104
- $stderr.puts e
105
- return false # do not iterate futher
86
+ raise
87
+ end
106
88
  end
89
+
90
+ response
107
91
  end
108
- return true # we may iterate futher
109
- end
110
92
 
93
+ # It creates a new api object or raises an exception.
94
+ # YANAPI::UserError should not be rescued, it indicates that the user
95
+ # must correct the input hash.
111
96
  def create_query(params)
112
- eval("YANAPI::#{@options[:query_type]}.new(params)")
113
- end
114
-
115
- def output(result)
116
- if @options[:output_dir]
117
- save(result)
118
- else
119
- puts result
120
- end
121
- end
122
-
123
- # save results to a dir
124
- # this dir exists since has been proved by OptionParser
125
- # not a good implementation
126
- # interface (filename, data)
127
- def save(result)
128
- filename = File.join(@options[:output_dir], "#{@query_params[:start]}.#{@output_type}")
129
- file = File.new(filename, 'w')
130
- file.puts result
131
- file.close
97
+ YANAPI::API.new(params)
132
98
  end
133
99
 
134
100
  end # class Yanser
135
101
  end # module Yanser
102
+
103
+ __END__
@@ -1,138 +1,264 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  require 'test/unit'
3
3
  require 'yanser/opt_parser'
4
+ require 'stringio' # for helper methods
5
+
4
6
  include Yanser
7
+
5
8
  class TestOptionParser < Test::Unit::TestCase
6
- def setup
7
- @input = ['--appid', 'YahooDemo', '-r',
8
- 'de', '-l', '10', '-o', '/tmp',
9
- '-k' 'Haus AND grün']
10
- @output = {
11
- :query_params => {
12
- :appid => 'YahooDemo',
13
- :query => 'Haus AND grün',
14
- :region => 'de',
15
- :search_in => 'question'
16
- },
17
- :query_type => 'TermQuery',
18
- :output_dir => '/tmp',
19
- :limit => 10
20
- }
21
9
 
10
+ def setup
22
11
  @cmd_args = ['--appid', 'YahooDemo']
23
- @minimal_input = ['--appid', 'YahooDemo',
24
- '-k', 'Haus']
12
+ @minimal_input = ['--appid', 'YahooDemo', '-k', 'Haus']
13
+ @valid_opts = ['--appid', 'MyID',
14
+ '--category', '12345',
15
+ '--date-range', 'all',
16
+ '--exp-file', 'exp_file.yml',
17
+ '--filter', 'question',
18
+ '--help',
19
+ '--key-word', 'Haus',
20
+ '--key-word-list', 'some_file.txt',
21
+ '--limit', '1',
22
+ '--output-dir', '/tmp',
23
+ '--output-format', 'xml',
24
+ '--prefix', 'file1',
25
+ '--question-id', '12345',
26
+ '--region', 'de',
27
+ '--search-in', 'question',
28
+ '--sort', 'relevance',
29
+ '--start', '10',
30
+ '--time-interval', '10',
31
+ '--type', 'resolved',
32
+ '--user-id', '12345',
33
+ '--version'
34
+ ]
25
35
  end
26
36
 
27
37
  def teardown
28
38
  end
29
39
 
30
- # it should have a public class method 'parse'
40
+ # It should have a public class method 'parse'.
31
41
  def test_public_methods
32
42
  assert_respond_to(Yanser::OptParser, :parse)
33
43
  end
34
44
 
35
- # it should return a non empty parameter hash
45
+ # It should return a non empty parameter hash.
36
46
  def test_parse_method
37
- return_value = Yanser::OptParser.parse(@cmd_args)
38
- assert(return_value.instance_of? Hash)
39
- assert(! return_value.empty?)
47
+ return_value = Yanser::OptParser.parse(@minimal_input)
48
+ assert(return_value.instance_of?(Hash))
49
+ assert_equal(false, return_value.empty?)
40
50
  end
41
51
 
42
- # it should return a right complex output
43
- def test_output
44
- cmd_args = ['--appid', 'YahooDemo', '-r',
45
- 'de', '-l', '10', '-o', '/tmp',
46
- '-k' 'Haus AND grün'
47
- ]
48
- assert_equal(@output, Yanser::OptParser.parse(cmd_args))
52
+ # It should reject the empty input and exit.
53
+ def test_empty_input
54
+ out, err = intercept_output do
55
+ assert_raises(SystemExit) { OptParser.parse([]) }
56
+ end
57
+ assert_match(/You have to provide some options./, out)
49
58
  end
50
59
 
60
+ # It should accept correct options.
61
+ # Invalid options is the matter of OptionParser itself,
62
+ # do not test it here.
63
+ # We test only, that OP exits and does not raise an exception.
64
+ def test_accept_correct_options
65
+ # this options we should treat separately
66
+ @valid_opts.delete('--help')
67
+ @valid_opts.delete('--version')
68
+ assert_nothing_raised { OptParser.parse(@valid_opts) }
69
+
70
+ stdout, stderr = intercept_output do
71
+ assert_raises(SystemExit) { OptParser.parse(['--invalid-option']) }
72
+ end
73
+
74
+ assert_match(/You have provided an invalid option:/, stdout)
75
+ end
76
+
77
+ # It should successfully exit with some options.
78
+ def test_successful_exit
79
+ quietly do
80
+ success_args = ['-h', '--help', '-v', '--version']
81
+ success_args.each do |arg|
82
+ assert_raises(SystemExit) { OptParser.parse(arg.split) }
83
+ end
84
+ end
85
+ end
86
+
87
+ =begin
88
+ # It is done in YANAPI.
89
+ # It should require mandatory cmd arguments.
90
+ def test_mandatory_args
91
+ flunk('Rewrite!')
92
+ args = ['-k', 'Haus']
93
+ assert_raises(OptParserError) {OptParser.parse args}
94
+ end
95
+ =end
96
+
97
+ # It should accept only valid date arguments.
98
+ def test_date_arguments
99
+ validate(OptParser::DATE_RANGES, '--date-range', :date_range)
100
+ end
101
+
102
+ # It should accept a file and parse it.
103
+ def test_exp_file
104
+ flunk('Not implemented! It should accept a file and parse it.')
105
+ end
106
+
107
+ # It should accept only valid filter arguments.
108
+ def test_filter_arguments
109
+ validate(OptParser::FILTERS, '--filter', :filter)
110
+ end
111
+
112
+ # It should accept only numeric parameters for limits.
113
+ def test_limit_arguments
114
+ flunk('Not implemented! It should accept only numeric paramters for limits.')
115
+ end
51
116
 
52
- # it should set up the right method
53
- def test_method
117
+ # It should accept only valid output formats.
118
+ def test_output_arguments
119
+ validate(OptParser::OUTPUT_TYPES, '--output-format', :output)
120
+ end
121
+
122
+ # It should accept only valid regions.
123
+ def test_region_arguments
124
+ validate(OptParser::REGIONS, '-r', :region)
125
+ end
126
+
127
+ # It should accept only valid search locations.
128
+ def test_search_locations
129
+ validate(OptParser::SEARCH_LOCATIONS, '--search-in', :search_in)
130
+ end
131
+
132
+ # It should accept only valid sort types.
133
+ def test_sort_types
134
+ validate(OptParser::SORT_TYPES, '--sort', :sort)
135
+ end
136
+
137
+ # It should accept only valid types.
138
+ def test_types
139
+ validate(OptParser::TYPES, '--type', :type)
140
+ end
141
+
142
+ # It should set up the right search method.
143
+ def test_correct_setting_of_the_method
54
144
 
55
145
  cmd_args = ['-k', 'Haus'] | @cmd_args
56
- assert_equal('TermQuery', Yanser::OptParser.parse(cmd_args)[:query_type])
146
+ assert_equal('questionSearch', OptParser.parse(cmd_args)[:method])
57
147
 
58
148
  cmd_args = ['-k', 'Haus', '-c', '09876543'] | @cmd_args
59
- assert_equal('TermQuery', Yanser::OptParser.parse(cmd_args)[:query_type])
149
+ assert_equal('questionSearch', OptParser.parse(cmd_args)[:method])
60
150
 
61
151
  cmd_args = ['-c', '09876543'] | @cmd_args
62
- assert_equal('CategoryQuery', Yanser::OptParser.parse(cmd_args)[:query_type])
152
+ assert_equal('getByCategory', OptParser.parse(cmd_args)[:method])
63
153
 
64
154
  cmd_args = ['-c', 'Ausgehen'] | @cmd_args
65
- assert_equal('CategoryQuery', Yanser::OptParser.parse(cmd_args)[:query_type])
155
+ assert_equal('getByCategory', OptParser.parse(cmd_args)[:method])
66
156
 
67
157
  cmd_args = ['--user-id', '12345'] | @cmd_args
68
- assert_equal('UserQuery', Yanser::OptParser.parse(cmd_args)[:query_type])
158
+ assert_equal('getByUser', OptParser.parse(cmd_args)[:method])
69
159
 
70
160
  cmd_args = ['--question-id', '12345'] | @cmd_args
71
- assert_equal('QuestionQuery', Yanser::OptParser.parse(cmd_args)[:query_type])
161
+ assert_equal('getQuestion', OptParser.parse(cmd_args)[:method])
72
162
  end
73
163
 
74
- # it should set either :category_id or :category_name
164
+ # It should set either :category_id or :category_name.
75
165
  def test_category_identifier
76
166
  args = ['-k', 'Haus', '-c', '123'] | @cmd_args
77
167
  opts = Yanser::OptParser.parse(args)
78
168
  assert_equal('123', opts[:query_params][:category_id])
79
- assert(! opts[:query_params].has_key?(:category_name))
169
+ assert_equal(false, opts[:query_params].has_key?(:category_name))
80
170
 
81
171
  args = ['-k', 'Haus', '-c', 'Haushalt'] | @cmd_args
82
172
  opts = Yanser::OptParser.parse(args)
83
173
  assert_equal('Haushalt', opts[:query_params][:category_name])
84
- assert(! opts[:query_params].has_key?(:category_id))
85
-
174
+ assert_equal(false, opts[:query_params].has_key?(:category_id))
86
175
  end
87
176
 
88
- def test_output_dir
177
+ # It should set reasonable defaults.
178
+ # Yanser may set defaults, Yanapi provides minimal output, i.e. if Yahoo
179
+ # changes defaults, every user can be affected. That's whe we offer some
180
+ # reasonable defaults for our experiments.
181
+ # :region => 'de'
182
+ # :data_range => 'all'
183
+ def test_default_values
184
+ flunk('Not implemented! It should set reasonable defaults.')
185
+
89
186
  end
90
187
 
91
- def test_query
188
+ # It should return a right complex output.
189
+ def test_output
190
+ cmd_args = ['--appid', 'YahooDemo', '-r',
191
+ 'de', '-l', '10', '-o', '/tmp',
192
+ '-k' 'Haus AND grün'
193
+ ]
194
+ output = {
195
+ :query_params => {
196
+ :appid => 'YahooDemo',
197
+ :query => 'Haus AND grün',
198
+ :region => 'de',
199
+ :search_in => 'question'
200
+ },
201
+ :method => 'questionSearch',
202
+ :output_dir => '/tmp',
203
+ :limit => 10
204
+ }
205
+ assert_equal(output, OptParser.parse(cmd_args))
92
206
  end
93
207
 
94
- # it should reject the empty input
95
- def test_empty_input
96
- assert_raises(OptParserError) {OptParser.parse []}
208
+ # It should set correct parameters for logging.
209
+ def test_log_arguments
210
+ flunk('Not implemented. It should set correct parameters for logging.')
97
211
  end
98
212
 
99
- # it should accept only valid regions
100
- def test_region_validity
101
- regions = ['de', 'us', 'uk', 'ca', 'au', 'in', 'es',
102
- 'br', 'ar', 'mx', 'e1', 'it', 'fr', 'sg']
103
- regions.each do |r|
104
- input = @minimal_input | ['-r', r]
105
- assert_nothing_raised(Yanser::OptParserError) {Yanser::OptParser.parse(input)}
213
+ # It should set correct parameters for data extraction.
214
+ def test_extraction_arguments
215
+ flunk('Not implemented. It should set correct parameters for data extraction.')
216
+ end
217
+ ################################################################################
218
+ # It is a helper method, many testable units provide some verbose output
219
+ # to stderr and/or stdout. It is usefull to suppress any kind of verbosity.
220
+ def quietly(&b)
221
+ begin
222
+ orig_stderr = $stderr.clone
223
+ orig_stdout = $stdout.clone
224
+ $stderr.reopen(File.new('/dev/null', 'w'))
225
+ $stdout.reopen(File.new('/dev/null', 'w'))
226
+ b.call
227
+ ensure
228
+ $stderr.reopen(orig_stderr)
229
+ $stdout.reopen(orig_stdout)
106
230
  end
107
- input = @minimal_input | ['-r', 'abc']
108
- assert_raises(Yanser::OptParserError) {Yanser::OptParser.parse(input)}
109
231
  end
110
232
 
111
- # it should set reasonable defaults
112
- def test_default_values
233
+ # It is a helper method for validation of used parameter values.
234
+ def validate(params, option, key)
235
+ params.each do |p|
236
+ input = @minimal_input | [option, p]
237
+ params = {}
238
+ assert_nothing_raised(SystemExit) { params = OptParser.parse(input) }
239
+ assert_equal(p, params[:query_params][key])
240
+ end
241
+
242
+ input = @minimal_input | [option, 'invalid']
243
+ stdout, stderr = intercept_output do
244
+ assert_raises(SystemExit) { OptParser.parse(input) }
245
+ end
246
+ assert_match(/The provided .+ is currently not supported by Yahoo!/,
247
+ stdout)
113
248
  end
114
249
 
115
- # it should accept only valid query type combinations
116
- def test_qtype_combinations
117
- end
250
+ # It is a helper method for handling stdout and stderr as strings.
251
+ def intercept_output
252
+ orig_stdout = $stdout
253
+ orig_stderr = $stderr
254
+ $stdout = StringIO.new
255
+ $stderr = StringIO.new
118
256
 
119
- # it should require mandatory cmd arguments
120
- def test_mandatory_args
121
- args = ['-k', 'Haus']
122
- assert_raises(OptParserError) {OptParser.parse args}
123
- end
257
+ yield
124
258
 
125
- # it should successfully exit with some options
126
- def test_successful_exit
127
- orig_stderr = $stderr.clone
128
- orig_stdout = $stdout.clone
129
- $stderr.reopen(File.new('/dev/null', 'w'))
130
- $stdout.reopen(File.new('/dev/null', 'w'))
131
- success_args = ['-h', '--help', '-v', '--version']
132
- success_args.each do |arg|
133
- assert_raises(SystemExit) {OptParser.parse(arg.to_a)}
134
- end
135
- $stderr.reopen(orig_stderr)
136
- $stdout.reopen(orig_stdout)
259
+ return $stdout.string, $stderr.string
260
+ ensure
261
+ $stdout = orig_stdout
262
+ $stderr = orig_stderr
137
263
  end
138
264
  end
@@ -10,6 +10,7 @@ class TestYanser < Test::Unit::TestCase
10
10
  def teardown
11
11
  end
12
12
 
13
+ # It should have a public method <start>.
13
14
  def test_public_methods
14
15
  yanser = Yanser::Yanser.new(
15
16
  :query_params => {
@@ -26,10 +27,15 @@ class TestYanser < Test::Unit::TestCase
26
27
  assert_respond_to(yanser, :start)
27
28
  end
28
29
 
30
+ # It should have a constant <VERSION>.
29
31
  def test_constants
30
32
  assert(Yanser::VERSION.instance_of?(String))
31
33
  end
32
- def test_mandatory_options
34
+
35
+ # It should accept input according to the defined interface.
36
+ def test_input
37
+ flunk('Not implemented. It should accept input according to the defined interface.')
33
38
  end
39
+
34
40
 
35
41
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yanser
3
3
  version: !ruby/object:Gem::Version
4
- hash: 25
4
+ hash: 27
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
+ - 1
8
9
  - 0
9
- - 3
10
- version: 0.0.3
10
+ version: 0.1.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Andrei Beliankou
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-08-06 00:00:00 Z
18
+ date: 2011-08-12 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: yanapi
@@ -23,17 +23,17 @@ dependencies:
23
23
  requirement: &id001 !ruby/object:Gem::Requirement
24
24
  none: false
25
25
  requirements:
26
- - - "="
26
+ - - ">="
27
27
  - !ruby/object:Gem::Version
28
- hash: 25
28
+ hash: 17
29
29
  segments:
30
30
  - 0
31
+ - 3
31
32
  - 1
32
- - 1
33
- version: 0.1.1
33
+ version: 0.3.1
34
34
  type: :runtime
35
35
  version_requirements: *id001
36
- description: YANSER (Yahoo! ANSwers harvestER) is a convinient search tool providing access to the Yahoo! Answers Q&A collection. Based on YANAPI it provides a simple CLI and helps to search for Questions and Answers which contain a set of key words, belong to a specific semantic domain or are posted by a certain user. Yanser is a research tool in the field of Computational Linguistics.
36
+ description: YANSER (Yahoo! ANSwers harvestER) is a convinient search tool providing access to the Yahoo! Answers Q&A collection. Based on YANAPI it provides a simple CLI and helps to search for Questions and Answers which contain a set of key words, belong to a specific semantic domain or are posted by a certain user. Yanser is a research tool in the field of Computational Linguistics e.g for Question Answering.
37
37
  email: a.belenkow@uni-trier.de
38
38
  executables:
39
39
  - yanser
@@ -42,25 +42,27 @@ extensions: []
42
42
  extra_rdoc_files:
43
43
  - README
44
44
  - LICENSE
45
+ - CHANGELOG
45
46
  files:
46
- - lib/tester.rb
47
47
  - lib/yanser/error.rb
48
+ - lib/yanser/store.rb
48
49
  - lib/yanser/yanser.rb
49
50
  - lib/yanser/opt_parser.rb
50
51
  - lib/yanser/version.rb
51
52
  - bin/yanser
52
- - LICENSE
53
- - Rakefile
54
53
  - README
54
+ - LICENSE
55
+ - CHANGELOG
55
56
  - test/test_opt_parser.rb
56
57
  - test/test_yanser.rb
58
+ - .yardopts
57
59
  homepage: http://www.uni-trier.de/index.php?id=34451
58
60
  licenses: []
59
61
 
60
62
  post_install_message:
61
63
  rdoc_options:
62
64
  - -m
63
- - README.rdoc
65
+ - README
64
66
  require_paths:
65
67
  - lib
66
68
  required_ruby_version: !ruby/object:Gem::Requirement
@@ -86,7 +88,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
86
88
  requirements: []
87
89
 
88
90
  rubyforge_project: yanser
89
- rubygems_version: 1.7.2
91
+ rubygems_version: 1.8.7
90
92
  signing_key:
91
93
  specification_version: 3
92
94
  summary: Yanser is a convinient search tool providing access to the Yahoo! Answers Q/A collection.
data/Rakefile DELETED
@@ -1,41 +0,0 @@
1
- # We need rake to user FileLists.
2
- require 'rake'
3
- # We can require 'rake/clean' to add 'clobber' and 'clean' tasks.
4
- require 'rake/clean'
5
-
6
-
7
- # I am not sure how to use this constant.
8
- SRC = FileList['**/*.rb']
9
-
10
- CLOBBER.include('doc', '**/*.html', '**/*.gem')
11
-
12
- # testing
13
- require 'rake/testtask'
14
- Rake::TestTask.new do |t|
15
- t.test_files = FileList.new('test/**/*.rb').to_a
16
- # rake starts an other ruby process with a new options set.
17
- # ruby --some-option -S rake is not sufficient to propagate
18
- # the option "--some-option".
19
- t.ruby_opts = ['-rubygems']
20
- end
21
-
22
- # Build the gem package
23
- load 'yanser.gemspec'
24
- require 'rubygems/package_task'
25
- Gem::PackageTask.new(GEMSPEC).define
26
-
27
- # Generate documentation
28
- require 'rdoc/task'
29
- RDoc::Task.new do |rdoc|
30
- rdoc.rdoc_files.include('README', 'LICENSE', 'lib/**/*')
31
- end
32
-
33
- desc "Open an irb session preloaded with this library."
34
- task :console do
35
- sh "irb -rubygems -I lib -r yanser/opt_parser -r yanser/yanser"
36
- end
37
-
38
- desc 'Run the main executable file of the project.'
39
- task :start do
40
- sh "ruby -w -rubygems -I lib bin/yanser #{ENV['cmd']}"
41
- end
@@ -1,18 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
-
3
- require 'yanser'
4
-
5
- params = {
6
- :query_method => 'TermQuery',
7
- :query_params => {
8
- :appid => 'YahooDemo',
9
- :query => 'Köln',
10
- :region => 'de',
11
- :results => 5,
12
- :start => 0
13
- }
14
- }
15
-
16
- y = Yanser.new(params)
17
-
18
- y.start