yanser 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +21 -1
- data/Rakefile +31 -4
- data/bin/yanser +8 -8
- data/lib/yanser/error.rb +11 -0
- data/lib/yanser/opt_parser.rb +313 -0
- data/lib/yanser/version.rb +3 -0
- data/lib/yanser/yanser.rb +135 -0
- data/test/test_opt_parser.rb +138 -0
- data/test/test_yanser.rb +7 -3
- metadata +13 -22
- data/README.rdoc +0 -21
- data/lib/option_parser.rb +0 -263
- data/lib/version.rb +0 -3
- data/lib/yanser.rb +0 -121
- data/test/data/bad_xml.txt +0 -236
- data/test/data/empty_result.txt +0 -13
- data/test/data/error_code.txt +0 -237
- data/test/data/response_with_error.txt +0 -15
- data/test/data/successfull_response.txt +0 -237
- data/test/test_option_parser.rb +0 -91
- data/test/yanapi/test_query.rb +0 -112
- data/test/yanapi/test_term_query.rb +0 -64
data/README
CHANGED
@@ -1 +1,21 @@
|
|
1
|
-
|
1
|
+
= YANSER
|
2
|
+
|
3
|
+
* {RubyGems}[http://rubygems.org/gems/yanser]
|
4
|
+
* Developers {Homepage}[http://www.uni-trier.de/index.php?id=24140]
|
5
|
+
* {YANAPI Project Page}[http://yanser.rubyforge.org/]
|
6
|
+
|
7
|
+
== DESCRIPTION
|
8
|
+
|
9
|
+
YANSER (Yahoo! ANSwers harvestER) is a convinient search tool providing access to the Yahoo! Answers Q&A collection. Based on YANAPI it provides a simple CLI and helps to search for Questions and Answers which contain a set of key words, belong to a specific semantic domain or are posted by a certain user. Yanser is a research tool in the field of Computational Linguistics.
|
10
|
+
|
11
|
+
== SYNOPSIS
|
12
|
+
$ yanser --help
|
13
|
+
|
14
|
+
|
15
|
+
|
16
|
+
== LICENSE
|
17
|
+
|
18
|
+
YANSER is a copyrighted software by Andrei Beliankou, 2011.
|
19
|
+
You may use, redistribute and change it under the terms
|
20
|
+
provided in the LICENSE file.
|
21
|
+
|
data/Rakefile
CHANGED
@@ -1,14 +1,41 @@
|
|
1
|
-
|
1
|
+
# We need rake to user FileLists.
|
2
2
|
require 'rake'
|
3
|
-
#
|
3
|
+
# We can require 'rake/clean' to add 'clobber' and 'clean' tasks.
|
4
4
|
require 'rake/clean'
|
5
|
-
require 'rake/testtask'
|
6
5
|
|
7
6
|
|
7
|
+
# I am not sure how to use this constant.
|
8
8
|
SRC = FileList['**/*.rb']
|
9
9
|
|
10
10
|
CLOBBER.include('doc', '**/*.html', '**/*.gem')
|
11
11
|
|
12
|
+
# testing
|
13
|
+
require 'rake/testtask'
|
12
14
|
Rake::TestTask.new do |t|
|
13
|
-
t.test_files = FileList.new('test
|
15
|
+
t.test_files = FileList.new('test/**/*.rb').to_a
|
16
|
+
# rake starts an other ruby process with a new options set.
|
17
|
+
# ruby --some-option -S rake is not sufficient to propagate
|
18
|
+
# the option "--some-option".
|
19
|
+
t.ruby_opts = ['-rubygems']
|
20
|
+
end
|
21
|
+
|
22
|
+
# Build the gem package
|
23
|
+
load 'yanser.gemspec'
|
24
|
+
require 'rubygems/package_task'
|
25
|
+
Gem::PackageTask.new(GEMSPEC).define
|
26
|
+
|
27
|
+
# Generate documentation
|
28
|
+
require 'rdoc/task'
|
29
|
+
RDoc::Task.new do |rdoc|
|
30
|
+
rdoc.rdoc_files.include('README', 'LICENSE', 'lib/**/*')
|
31
|
+
end
|
32
|
+
|
33
|
+
desc "Open an irb session preloaded with this library."
|
34
|
+
task :console do
|
35
|
+
sh "irb -rubygems -I lib -r yanser/opt_parser -r yanser/yanser"
|
36
|
+
end
|
37
|
+
|
38
|
+
desc 'Run the main executable file of the project.'
|
39
|
+
task :start do
|
40
|
+
sh "ruby -w -rubygems -I lib bin/yanser #{ENV['cmd']}"
|
14
41
|
end
|
data/bin/yanser
CHANGED
@@ -1,19 +1,19 @@
|
|
1
|
-
|
2
|
-
# -*- coding: utf-8 -*-
|
1
|
+
# -*- coding: utf-8; mode: ruby -*-
|
3
2
|
|
4
3
|
# это неверное решение, на самом деле я не должен напрямую указывать,
|
5
4
|
# где находятся файлы, но пока пусть будет так
|
6
|
-
|
7
|
-
|
5
|
+
# it is required only for .tar.gz distribution
|
6
|
+
#lib_path = File.expand_path('../../lib', __FILE__)
|
7
|
+
#$LOAD_PATH.unshift(lib_path) unless $LOAD_PATH.include?(lib_path)
|
8
8
|
|
9
9
|
# подобный способ является правильным, только так классы Yanser и OptionParser будут независимы
|
10
|
-
require 'yanser'
|
11
|
-
require '
|
10
|
+
require 'yanser/yanser'
|
11
|
+
require 'yanser/opt_parser'
|
12
12
|
|
13
13
|
# class method, why not?
|
14
|
-
options =
|
14
|
+
options = Yanser::OptParser.parse(ARGV)
|
15
15
|
|
16
16
|
|
17
|
-
yanser = Yanser.new(options)
|
17
|
+
yanser = Yanser::Yanser.new(options)
|
18
18
|
|
19
19
|
yanser.start
|
data/lib/yanser/error.rb
ADDED
@@ -0,0 +1,313 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'optparse'
|
3
|
+
require 'fileutils'
|
4
|
+
|
5
|
+
require 'yanser/version'
|
6
|
+
require 'yanser/error'
|
7
|
+
|
8
|
+
module Yanser
|
9
|
+
class OptParser
|
10
|
+
# OP expects cmd_args to be an array like ARGV
|
11
|
+
# dummy output for temporary usage
|
12
|
+
def self.parse(cmd_args)
|
13
|
+
@@options = {}
|
14
|
+
@@options[:query_params] = {}
|
15
|
+
|
16
|
+
parser = create_parser
|
17
|
+
|
18
|
+
# if no options provided print the help
|
19
|
+
if cmd_args.empty?
|
20
|
+
msg = "You have to provide some options. " +
|
21
|
+
"yanser --help provides more information."
|
22
|
+
raise OptParserError, msg
|
23
|
+
# $stderr.printf "You have to provide some options.\n\n"
|
24
|
+
# puts parser
|
25
|
+
# exit 1
|
26
|
+
end
|
27
|
+
|
28
|
+
# Parse ARGV and provide the options hash.
|
29
|
+
# Check if everything is correct and handle exceptions
|
30
|
+
begin
|
31
|
+
parser.parse!(cmd_args)
|
32
|
+
# rescue all exceptions from OptionParser
|
33
|
+
rescue => e
|
34
|
+
raise OptParserError, e.message
|
35
|
+
# $stderr.printf "#{e.message.capitalize}\n\n"
|
36
|
+
# puts parser
|
37
|
+
# exit 1
|
38
|
+
end
|
39
|
+
|
40
|
+
# Check to see if we got the required arguments needed.
|
41
|
+
check_required_options(@@options)
|
42
|
+
|
43
|
+
# Set the search method.
|
44
|
+
@@options[:query_type] = set_query_type(@@options[:query_params])
|
45
|
+
|
46
|
+
# Set some defaults such as :region and :search_in
|
47
|
+
@@options = set_defaults(@@options)
|
48
|
+
|
49
|
+
return @@options
|
50
|
+
end # parse
|
51
|
+
|
52
|
+
private
|
53
|
+
|
54
|
+
# Check if the value of given region is correct.
|
55
|
+
# Now 14 regions are supported by Yahoo! Answers.
|
56
|
+
def self.prove_region(region)
|
57
|
+
regions = ['de', 'us', 'uk', 'ca', 'au', 'in', 'es',
|
58
|
+
'br', 'ar', 'mx', 'e1', 'it', 'fr', 'sg']
|
59
|
+
|
60
|
+
unless regions.include?(region)
|
61
|
+
msg = "The provided search region #{region} is currently not supported by Yahoo!"
|
62
|
+
raise OptParserError, msg
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# define one of the following query types: TermQuery|CategoryQuery|
|
67
|
+
# QuestionQuery|UserQuery
|
68
|
+
def self.set_query_type(params)
|
69
|
+
case
|
70
|
+
when (params[:category_id] || params[:category_name]) && ! params[:query]
|
71
|
+
'CategoryQuery'
|
72
|
+
when params[:query]
|
73
|
+
'TermQuery'
|
74
|
+
when params[:user_id]
|
75
|
+
'UserQuery'
|
76
|
+
when params[:question_id]
|
77
|
+
'QuestionQuery'
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def self.check_required_options(options)
|
82
|
+
required_opts = [:appid]
|
83
|
+
|
84
|
+
required_opts.each do |opt|
|
85
|
+
if options[:query_params].has_key?(opt)
|
86
|
+
next
|
87
|
+
else
|
88
|
+
msg = "A required option --#{opt} is missing."
|
89
|
+
raise OptParserError, msg
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
def self.provide_dir(dir)
|
95
|
+
dir = File.expand_path(dir)
|
96
|
+
#check for existens
|
97
|
+
if File.directory?(dir)
|
98
|
+
if File.writable?(dir)
|
99
|
+
return dir
|
100
|
+
else
|
101
|
+
$stderr.puts 'The directory you have provided is not writable!'
|
102
|
+
exit 1
|
103
|
+
end
|
104
|
+
else
|
105
|
+
FileUtils.mkdir_p(dir)
|
106
|
+
return dir
|
107
|
+
end
|
108
|
+
end # provide_dir
|
109
|
+
|
110
|
+
def self.set_defaults(opts)
|
111
|
+
# fake method for now
|
112
|
+
return opts
|
113
|
+
end # set_defaults
|
114
|
+
|
115
|
+
def self.create_parser
|
116
|
+
OptionParser.new do |opts|
|
117
|
+
opts.banner = 'Usage: yanser OPTIONS'
|
118
|
+
|
119
|
+
opts.separator ''
|
120
|
+
opts.separator 'Program specific options:'
|
121
|
+
|
122
|
+
opts.on('--appid APPID',
|
123
|
+
'Provide an ApplicationID given by Yahoo,',
|
124
|
+
'to test Yanser you can use <YahooDemo> as the APPID,',
|
125
|
+
'think in this case on limitations placed by Yahoo.',
|
126
|
+
'This option is required!'
|
127
|
+
) do |appid|
|
128
|
+
@@options[:query_params][:appid] = appid
|
129
|
+
end
|
130
|
+
|
131
|
+
|
132
|
+
opts.separator ''
|
133
|
+
opts.separator ' Mandatory search arguments:'
|
134
|
+
|
135
|
+
opts.on('-k', '--key-word KEYWORD',
|
136
|
+
'Provide a single keyword or a boolean expression.'
|
137
|
+
) do |keyword|
|
138
|
+
@@options[:query_params][:query] = keyword
|
139
|
+
# not a solution!!!
|
140
|
+
@@options[:query_params][:search_in] = 'question'
|
141
|
+
|
142
|
+
end
|
143
|
+
|
144
|
+
opts.separator ''
|
145
|
+
|
146
|
+
opts.on('-c', '--category CATEGORY',
|
147
|
+
'Provide a category name or ID.'
|
148
|
+
) do |category|
|
149
|
+
|
150
|
+
if category =~ /^[[:digit:]]+$/
|
151
|
+
@@options[:query_params][:category_id] = category
|
152
|
+
else
|
153
|
+
@@options[:query_params][:category_name] = category
|
154
|
+
end
|
155
|
+
|
156
|
+
end
|
157
|
+
|
158
|
+
opts.separator ''
|
159
|
+
|
160
|
+
opts.on('--user-id ID',
|
161
|
+
'Provide an user ID of questions you search for.',
|
162
|
+
'This way you can get question by a specific user.'
|
163
|
+
) do |user_id|
|
164
|
+
@@options[:query_params][:user_id] = user_id
|
165
|
+
end
|
166
|
+
|
167
|
+
opts.separator ''
|
168
|
+
|
169
|
+
opts.on('--question-id ID',
|
170
|
+
'Provide a question ID of the question you search for.',
|
171
|
+
'It returns a unique question.'
|
172
|
+
) do |question_id|
|
173
|
+
@@options[:query_params][:question_id] = question_id
|
174
|
+
end
|
175
|
+
|
176
|
+
opts.separator ''
|
177
|
+
opts.separator ' Optional search arguments:'
|
178
|
+
|
179
|
+
opts.on('-r', '--region REGION',
|
180
|
+
'Provide a geographic region to search in for terms.',
|
181
|
+
'Possible values are: de, us, uk, ca, au, in, es, br,',
|
182
|
+
' ar, mx, e1, it, fr, sg.',
|
183
|
+
'This defaults to en.'
|
184
|
+
) do |region|
|
185
|
+
# todo
|
186
|
+
prove_region(region)
|
187
|
+
@@options[:query_params][:region] = region
|
188
|
+
end
|
189
|
+
|
190
|
+
opts.separator ''
|
191
|
+
|
192
|
+
opts.on('-o', '--output-dir DIR',
|
193
|
+
'Provide an output folder.',
|
194
|
+
'This directory will be created if it does not exist yet.'
|
195
|
+
) do |output_dir|
|
196
|
+
@@options[:output_dir] = provide_dir(output_dir)
|
197
|
+
end
|
198
|
+
|
199
|
+
opts.separator ''
|
200
|
+
|
201
|
+
opts.on('-l', '--limit NUMBER', Integer,
|
202
|
+
'Provide a number of answers you want to get from Yahoo.',
|
203
|
+
'This argument is not mandatory, if you want to get',
|
204
|
+
'all answers simply ommit this argument.'
|
205
|
+
) do |limit|
|
206
|
+
@@options[:limit] = limit
|
207
|
+
end
|
208
|
+
|
209
|
+
opts.separator ''
|
210
|
+
opts.on('-f', '--output-format FORMAT',
|
211
|
+
'Provide an output format: xml, json, rss, php.',
|
212
|
+
'It defaults to xml, and you can simply ommit this option.'
|
213
|
+
) do |f|
|
214
|
+
raise NotImplementedError, 'Only default output format is implemented'
|
215
|
+
end
|
216
|
+
|
217
|
+
opts.separator ''
|
218
|
+
opts.on('--prefix PREFIX',
|
219
|
+
'Provide a prefix for the output files. By default',
|
220
|
+
'the filename begins with the index of the retrieved',
|
221
|
+
'question. You can alter this by providing a prefix.',
|
222
|
+
'It can be useful if you want to put many query results',
|
223
|
+
'in the same output folder.'
|
224
|
+
) do |pref|
|
225
|
+
raise NotImplementedError, 'No prefixes implemeted.'
|
226
|
+
end
|
227
|
+
|
228
|
+
opts.separator ''
|
229
|
+
opts.on('--key-word-list FILE',
|
230
|
+
'Provide a file with key words or phrases (boolean syntax',
|
231
|
+
'allowed) one search token per line.'
|
232
|
+
) do |file_name|
|
233
|
+
raise NotImplementedError
|
234
|
+
end
|
235
|
+
|
236
|
+
opts.separator ''
|
237
|
+
opts.on('--exp-file FILE',
|
238
|
+
'Provide a formal description of the experiment.'
|
239
|
+
) do |file_name|
|
240
|
+
raise NotImplementedError
|
241
|
+
end
|
242
|
+
opts.separator ""
|
243
|
+
opts.separator "Common options:"
|
244
|
+
|
245
|
+
opts.on_tail('-h', '--help', 'Show the help message.') do
|
246
|
+
puts opts
|
247
|
+
exit
|
248
|
+
end
|
249
|
+
|
250
|
+
opts.on_tail('-v', '--version', 'Show the program version.') do
|
251
|
+
puts VERSION
|
252
|
+
exit
|
253
|
+
end
|
254
|
+
end # parser
|
255
|
+
end # create_parser
|
256
|
+
|
257
|
+
end # OptionParser
|
258
|
+
end # module
|
259
|
+
|
260
|
+
__END__
|
261
|
+
# List of options planned for implementation.
|
262
|
+
--appid
|
263
|
+
-k, --key-word
|
264
|
+
-c, --category-id # Term & CategorySearch
|
265
|
+
--user-id
|
266
|
+
--question-id # QuestionSearch
|
267
|
+
-r, --region
|
268
|
+
-f, --output-format
|
269
|
+
-l, --limit
|
270
|
+
--date-range
|
271
|
+
--sort
|
272
|
+
--filter
|
273
|
+
|
274
|
+
|
275
|
+
--prefix #prefix for output files
|
276
|
+
--key-word-list
|
277
|
+
--exp-file
|
278
|
+
-o, --output-dir
|
279
|
+
-h, --help
|
280
|
+
-v, --version
|
281
|
+
-t, --time-interval
|
282
|
+
|
283
|
+
|
284
|
+
|
285
|
+
|
286
|
+
|
287
|
+
|
288
|
+
instance interface
|
289
|
+
|
290
|
+
{
|
291
|
+
:query_params => {
|
292
|
+
:appid => 'YahooDemo' | 'SomeStringWithYourID',
|
293
|
+
:callback => 'str',
|
294
|
+
:category_id => '123456',
|
295
|
+
:category_name => 'Wohnen',
|
296
|
+
:date_range => 'all'|'7'|'7-30'|'30-60'|'60-90'|'more90', # default: all
|
297
|
+
:filter => 'question'|'best_answer', # default: question
|
298
|
+
:output => 'xml'|'json'|'php'|'rss',
|
299
|
+
:query => 'Haus AND Grund',
|
300
|
+
:question_id => '123456',
|
301
|
+
:region => 'de'|'us'|'uk'|'ca'|'au'|'in'|'es'|'br'|'ar'|'mx'|'e1'|'it'|'fr'|'sg', # default 'en'
|
302
|
+
:results => integer, # 0..50, default: 10 (0 returns the default value)
|
303
|
+
:search_in => "all" | "question" | "best_answer", # default 'all'
|
304
|
+
:sort => 'relevance' | 'date_desc'| 'date_asc', # default 'relevance'
|
305
|
+
:start => integer, # <= 1000, default: 0
|
306
|
+
:type => "all" | "resolved" | "open" | "undecided", # default 'all'
|
307
|
+
:user_id => '123456'
|
308
|
+
},
|
309
|
+
:query_type => 'TermQuery'|'QuestionQuery'|'UserQuery'|'CategoryQuery',
|
310
|
+
:output_dir => 'some path',
|
311
|
+
:prefix => 'some prefix', # prefix for output files
|
312
|
+
:limit => Integer
|
313
|
+
}
|
@@ -0,0 +1,135 @@
|
|
1
|
+
# this solution is not ideal since I force people to use 'rubygems'
|
2
|
+
# possible it would be a better solution to prompt at this point
|
3
|
+
# to install the lib in the way the user is accustomed to
|
4
|
+
=begin
|
5
|
+
begin
|
6
|
+
require 'yanapi'
|
7
|
+
rescue LoadError
|
8
|
+
require 'rubygems'
|
9
|
+
require 'yanapi'
|
10
|
+
end
|
11
|
+
=end
|
12
|
+
require 'yanapi'
|
13
|
+
|
14
|
+
# :title: YANSER, Yahoo! ANSwers harvestER
|
15
|
+
# :main: Yanser
|
16
|
+
# Main processing class.
|
17
|
+
# Yanser encapsulates the main routine and instantiates
|
18
|
+
# all other classes.
|
19
|
+
#--
|
20
|
+
# Yanser takes the users input and validates it.
|
21
|
+
# It decides which search method to choose.
|
22
|
+
# Then it collects all parameters and useful default values,
|
23
|
+
# creates an XyzQuery with the starting point of 0 and gets the first results.
|
24
|
+
# If more results were requested, Yanser creates a similar XyzQuery and gets
|
25
|
+
# the next result set until the result limitation set by the user is met.
|
26
|
+
#
|
27
|
+
module Yanser
|
28
|
+
|
29
|
+
class Yanser
|
30
|
+
|
31
|
+
# Yahoo! Answers returns starting not more than at the 1000st question.
|
32
|
+
# It makes no sense to step over.
|
33
|
+
START_LIMIT = 1000
|
34
|
+
|
35
|
+
# We query the web service every two seconds.
|
36
|
+
QUERY_INTERVAL = 2
|
37
|
+
|
38
|
+
# Yahoo! Answers returns maximum 50 results.
|
39
|
+
MAX_RESULTS = 50
|
40
|
+
|
41
|
+
def initialize(opts)
|
42
|
+
|
43
|
+
# the minimal output of an OptionParser
|
44
|
+
# {:query_type=>u|q|c|w, :query_params=>{appid, start, results, query}}
|
45
|
+
# opts come from the OptionParser
|
46
|
+
# they are supposed to be correct, no validation here
|
47
|
+
@options = opts
|
48
|
+
|
49
|
+
@options[:limit] = @options[:limit] || START_LIMIT + MAX_RESULTS
|
50
|
+
|
51
|
+
@query_params = @options[:query_params]
|
52
|
+
|
53
|
+
@output_type = @query_params[:output] || 'xml'
|
54
|
+
|
55
|
+
@query_params[:start] = @query_params[:start] || 0
|
56
|
+
end
|
57
|
+
|
58
|
+
# TODO: implement some logging
|
59
|
+
def start
|
60
|
+
|
61
|
+
if @options[:query_type] == 'QuestionQuery'
|
62
|
+
query(@query_params)
|
63
|
+
elsif @options[:limit] < MAX_RESULTS
|
64
|
+
@query_params[:results] = @options[:limit]
|
65
|
+
query(@query_params)
|
66
|
+
else
|
67
|
+
@query_params[:results] = MAX_RESULTS
|
68
|
+
|
69
|
+
while query(@query_params) do
|
70
|
+
# we get the next start point here
|
71
|
+
@query_params[:start] += MAX_RESULTS
|
72
|
+
|
73
|
+
results_left = @options[:limit] - @query_params[:start]
|
74
|
+
if results_left == 0
|
75
|
+
break
|
76
|
+
elsif results_left < MAX_RESULTS
|
77
|
+
@query_params[:results] = results_left
|
78
|
+
break
|
79
|
+
end
|
80
|
+
end # while
|
81
|
+
|
82
|
+
end # if
|
83
|
+
|
84
|
+
end # start
|
85
|
+
|
86
|
+
private
|
87
|
+
|
88
|
+
def query(params)
|
89
|
+
q = create_query(params)
|
90
|
+
tries = 0
|
91
|
+
begin
|
92
|
+
tries += 1
|
93
|
+
result = q.get
|
94
|
+
output(result)
|
95
|
+
sleep(2)
|
96
|
+
rescue YANAPI::EmptyResponse => e
|
97
|
+
$sderr.puts e
|
98
|
+
return false # do not iterate futher
|
99
|
+
rescue => e # some errors to retry
|
100
|
+
if (tries < 4)
|
101
|
+
sleep(QUERY_INTERVAL**tries)
|
102
|
+
retry
|
103
|
+
else
|
104
|
+
$stderr.puts e
|
105
|
+
return false # do not iterate futher
|
106
|
+
end
|
107
|
+
end
|
108
|
+
return true # we may iterate futher
|
109
|
+
end
|
110
|
+
|
111
|
+
def create_query(params)
|
112
|
+
eval("YANAPI::#{@options[:query_type]}.new(params)")
|
113
|
+
end
|
114
|
+
|
115
|
+
def output(result)
|
116
|
+
if @options[:output_dir]
|
117
|
+
save(result)
|
118
|
+
else
|
119
|
+
puts result
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# save results to a dir
|
124
|
+
# this dir exists since has been proved by OptionParser
|
125
|
+
# not a good implementation
|
126
|
+
# interface (filename, data)
|
127
|
+
def save(result)
|
128
|
+
filename = File.join(@options[:output_dir], "#{@query_params[:start]}.#{@output_type}")
|
129
|
+
file = File.new(filename, 'w')
|
130
|
+
file.puts result
|
131
|
+
file.close
|
132
|
+
end
|
133
|
+
|
134
|
+
end # class Yanser
|
135
|
+
end # module Yanser
|
@@ -0,0 +1,138 @@
|
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
require 'test/unit'
|
3
|
+
require 'yanser/opt_parser'
|
4
|
+
include Yanser
|
5
|
+
class TestOptionParser < Test::Unit::TestCase
|
6
|
+
def setup
|
7
|
+
@input = ['--appid', 'YahooDemo', '-r',
|
8
|
+
'de', '-l', '10', '-o', '/tmp',
|
9
|
+
'-k' 'Haus AND grün']
|
10
|
+
@output = {
|
11
|
+
:query_params => {
|
12
|
+
:appid => 'YahooDemo',
|
13
|
+
:query => 'Haus AND grün',
|
14
|
+
:region => 'de',
|
15
|
+
:search_in => 'question'
|
16
|
+
},
|
17
|
+
:query_type => 'TermQuery',
|
18
|
+
:output_dir => '/tmp',
|
19
|
+
:limit => 10
|
20
|
+
}
|
21
|
+
|
22
|
+
@cmd_args = ['--appid', 'YahooDemo']
|
23
|
+
@minimal_input = ['--appid', 'YahooDemo',
|
24
|
+
'-k', 'Haus']
|
25
|
+
end
|
26
|
+
|
27
|
+
def teardown
|
28
|
+
end
|
29
|
+
|
30
|
+
# it should have a public class method 'parse'
|
31
|
+
def test_public_methods
|
32
|
+
assert_respond_to(Yanser::OptParser, :parse)
|
33
|
+
end
|
34
|
+
|
35
|
+
# it should return a non empty parameter hash
|
36
|
+
def test_parse_method
|
37
|
+
return_value = Yanser::OptParser.parse(@cmd_args)
|
38
|
+
assert(return_value.instance_of? Hash)
|
39
|
+
assert(! return_value.empty?)
|
40
|
+
end
|
41
|
+
|
42
|
+
# it should return a right complex output
|
43
|
+
def test_output
|
44
|
+
cmd_args = ['--appid', 'YahooDemo', '-r',
|
45
|
+
'de', '-l', '10', '-o', '/tmp',
|
46
|
+
'-k' 'Haus AND grün'
|
47
|
+
]
|
48
|
+
assert_equal(@output, Yanser::OptParser.parse(cmd_args))
|
49
|
+
end
|
50
|
+
|
51
|
+
|
52
|
+
# it should set up the right method
|
53
|
+
def test_method
|
54
|
+
|
55
|
+
cmd_args = ['-k', 'Haus'] | @cmd_args
|
56
|
+
assert_equal('TermQuery', Yanser::OptParser.parse(cmd_args)[:query_type])
|
57
|
+
|
58
|
+
cmd_args = ['-k', 'Haus', '-c', '09876543'] | @cmd_args
|
59
|
+
assert_equal('TermQuery', Yanser::OptParser.parse(cmd_args)[:query_type])
|
60
|
+
|
61
|
+
cmd_args = ['-c', '09876543'] | @cmd_args
|
62
|
+
assert_equal('CategoryQuery', Yanser::OptParser.parse(cmd_args)[:query_type])
|
63
|
+
|
64
|
+
cmd_args = ['-c', 'Ausgehen'] | @cmd_args
|
65
|
+
assert_equal('CategoryQuery', Yanser::OptParser.parse(cmd_args)[:query_type])
|
66
|
+
|
67
|
+
cmd_args = ['--user-id', '12345'] | @cmd_args
|
68
|
+
assert_equal('UserQuery', Yanser::OptParser.parse(cmd_args)[:query_type])
|
69
|
+
|
70
|
+
cmd_args = ['--question-id', '12345'] | @cmd_args
|
71
|
+
assert_equal('QuestionQuery', Yanser::OptParser.parse(cmd_args)[:query_type])
|
72
|
+
end
|
73
|
+
|
74
|
+
# it should set either :category_id or :category_name
|
75
|
+
def test_category_identifier
|
76
|
+
args = ['-k', 'Haus', '-c', '123'] | @cmd_args
|
77
|
+
opts = Yanser::OptParser.parse(args)
|
78
|
+
assert_equal('123', opts[:query_params][:category_id])
|
79
|
+
assert(! opts[:query_params].has_key?(:category_name))
|
80
|
+
|
81
|
+
args = ['-k', 'Haus', '-c', 'Haushalt'] | @cmd_args
|
82
|
+
opts = Yanser::OptParser.parse(args)
|
83
|
+
assert_equal('Haushalt', opts[:query_params][:category_name])
|
84
|
+
assert(! opts[:query_params].has_key?(:category_id))
|
85
|
+
|
86
|
+
end
|
87
|
+
|
88
|
+
def test_output_dir
|
89
|
+
end
|
90
|
+
|
91
|
+
def test_query
|
92
|
+
end
|
93
|
+
|
94
|
+
# it should reject the empty input
|
95
|
+
def test_empty_input
|
96
|
+
assert_raises(OptParserError) {OptParser.parse []}
|
97
|
+
end
|
98
|
+
|
99
|
+
# it should accept only valid regions
|
100
|
+
def test_region_validity
|
101
|
+
regions = ['de', 'us', 'uk', 'ca', 'au', 'in', 'es',
|
102
|
+
'br', 'ar', 'mx', 'e1', 'it', 'fr', 'sg']
|
103
|
+
regions.each do |r|
|
104
|
+
input = @minimal_input | ['-r', r]
|
105
|
+
assert_nothing_raised(Yanser::OptParserError) {Yanser::OptParser.parse(input)}
|
106
|
+
end
|
107
|
+
input = @minimal_input | ['-r', 'abc']
|
108
|
+
assert_raises(Yanser::OptParserError) {Yanser::OptParser.parse(input)}
|
109
|
+
end
|
110
|
+
|
111
|
+
# it should set reasonable defaults
|
112
|
+
def test_default_values
|
113
|
+
end
|
114
|
+
|
115
|
+
# it should accept only valid query type combinations
|
116
|
+
def test_qtype_combinations
|
117
|
+
end
|
118
|
+
|
119
|
+
# it should require mandatory cmd arguments
|
120
|
+
def test_mandatory_args
|
121
|
+
args = ['-k', 'Haus']
|
122
|
+
assert_raises(OptParserError) {OptParser.parse args}
|
123
|
+
end
|
124
|
+
|
125
|
+
# it should successfully exit with some options
|
126
|
+
def test_successful_exit
|
127
|
+
orig_stderr = $stderr.clone
|
128
|
+
orig_stdout = $stdout.clone
|
129
|
+
$stderr.reopen(File.new('/dev/null', 'w'))
|
130
|
+
$stdout.reopen(File.new('/dev/null', 'w'))
|
131
|
+
success_args = ['-h', '--help', '-v', '--version']
|
132
|
+
success_args.each do |arg|
|
133
|
+
assert_raises(SystemExit) {OptParser.parse(arg.to_a)}
|
134
|
+
end
|
135
|
+
$stderr.reopen(orig_stderr)
|
136
|
+
$stdout.reopen(orig_stdout)
|
137
|
+
end
|
138
|
+
end
|