yanser 0.0.3 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.yardopts +6 -0
- data/CHANGELOG +24 -0
- data/README +19 -3
- data/bin/yanser +30 -13
- data/lib/yanser/error.rb +4 -3
- data/lib/yanser/opt_parser.rb +166 -97
- data/lib/yanser/store.rb +48 -0
- data/lib/yanser/version.rb +1 -1
- data/lib/yanser/yanser.rb +58 -90
- data/test/test_opt_parser.rb +203 -77
- data/test/test_yanser.rb +7 -1
- metadata +16 -14
- data/Rakefile +0 -41
- data/lib/tester.rb +0 -18
data/.yardopts
ADDED
data/CHANGELOG
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
=== COMPLETED
|
2
|
+
==== 0.1.0
|
3
|
+
Yanser supportst YANAPI 0.3.1 and further now.
|
4
|
+
==== 0.0.3
|
5
|
+
Yanser depends on the old interface of YANAPI. No functional improvements.
|
6
|
+
==== 0.0.2
|
7
|
+
Small changes in the documentation.
|
8
|
+
==== 0.0.1
|
9
|
+
Initial release of Yanser.
|
10
|
+
|
11
|
+
|
12
|
+
=== PLANNED
|
13
|
+
|
14
|
+
==== 0.0.2
|
15
|
+
==== 0.0.3
|
16
|
+
==== 0.4.0
|
17
|
+
==== 0.5.0
|
18
|
+
==== 0.6.0
|
19
|
+
==== 0.7.0
|
20
|
+
==== 0.8.0
|
21
|
+
==== 0.9.0
|
22
|
+
==== 1.0.0
|
23
|
+
|
24
|
+
|
data/README
CHANGED
@@ -2,20 +2,36 @@
|
|
2
2
|
|
3
3
|
* {RubyGems}[http://rubygems.org/gems/yanser]
|
4
4
|
* Developers {Homepage}[http://www.uni-trier.de/index.php?id=24140]
|
5
|
-
* {
|
5
|
+
* {YANSER Project Page}[http://yanser.rubyforge.org/]
|
6
6
|
|
7
7
|
== DESCRIPTION
|
8
8
|
|
9
|
-
YANSER (Yahoo! ANSwers harvestER) is a convinient search tool providing access
|
9
|
+
YANSER (Yahoo! ANSwers harvestER) is a convinient search tool providing access
|
10
|
+
to the Yahoo! Answers Q&A collection. Based on YANAPI it provides a simple CLI
|
11
|
+
and helps to search for Questions and Answers which contain a set of key words,
|
12
|
+
belong to a specific semantic domain or are posted by a certain user.
|
13
|
+
|
14
|
+
Yanser is a research tool in the field of Computational Linguistics.
|
10
15
|
|
11
16
|
== SYNOPSIS
|
12
17
|
$ yanser --help
|
13
18
|
|
19
|
+
== FEATURE LIST
|
20
|
+
|
21
|
+
== INSTALLATION
|
22
|
+
|
23
|
+
== USAGE
|
24
|
+
|
25
|
+
== CHANGELOG
|
26
|
+
|
27
|
+
See CHANGELOG.
|
28
|
+
|
29
|
+
== CONTRIBUTORS
|
14
30
|
|
15
31
|
|
16
32
|
== LICENSE
|
17
33
|
|
18
34
|
YANSER is a copyrighted software by Andrei Beliankou, 2011.
|
35
|
+
|
19
36
|
You may use, redistribute and change it under the terms
|
20
37
|
provided in the LICENSE file.
|
21
|
-
|
data/bin/yanser
CHANGED
@@ -1,19 +1,36 @@
|
|
1
1
|
# -*- coding: utf-8; mode: ruby -*-
|
2
|
-
|
3
|
-
# это неверное решение, на самом деле я не должен напрямую указывать,
|
4
|
-
# где находятся файлы, но пока пусть будет так
|
5
|
-
# it is required only for .tar.gz distribution
|
6
|
-
#lib_path = File.expand_path('../../lib', __FILE__)
|
7
|
-
#$LOAD_PATH.unshift(lib_path) unless $LOAD_PATH.include?(lib_path)
|
8
|
-
|
9
|
-
# подобный способ является правильным, только так классы Yanser и OptionParser будут независимы
|
10
2
|
require 'yanser/yanser'
|
11
3
|
require 'yanser/opt_parser'
|
4
|
+
require 'yanser/store'
|
12
5
|
|
13
|
-
|
14
|
-
options = Yanser::OptParser.parse(ARGV)
|
15
|
-
|
6
|
+
begin
|
7
|
+
options = Yanser::OptParser.parse(ARGV)
|
8
|
+
rescue
|
9
|
+
raise
|
10
|
+
end
|
16
11
|
|
17
|
-
|
12
|
+
# Handle here exceptions from YANAPI, use messages for it.
|
13
|
+
# Add "See <yanser --help>." to it.
|
14
|
+
begin
|
15
|
+
yanser = Yanser::Yanser.new(options)
|
16
|
+
result = yanser.start
|
17
|
+
rescue YANAPI::UserError
|
18
|
+
# Give up with an error message.
|
19
|
+
# The user must change the input.
|
20
|
+
raise
|
21
|
+
rescue YANAPI::ContentError
|
22
|
+
# Yahoo! says somethig.
|
23
|
+
raise
|
24
|
+
rescue YANAPI::ExternalError
|
25
|
+
# Give up with an error message.
|
26
|
+
# It is a persistent external error.
|
27
|
+
# We tried three times in Yanser, but it seems to go kept.
|
28
|
+
raise
|
29
|
+
end
|
18
30
|
|
19
|
-
|
31
|
+
begin
|
32
|
+
storage = Yanser::Store.new(options)
|
33
|
+
storage.store(result)
|
34
|
+
rescue
|
35
|
+
raise
|
36
|
+
end
|
data/lib/yanser/error.rb
CHANGED
@@ -1,11 +1,12 @@
|
|
1
|
-
# -*-
|
1
|
+
# -*- coding: utf-8 -*-
|
2
2
|
|
3
3
|
module Yanser
|
4
|
-
class
|
4
|
+
class Error < RuntimeError; end
|
5
|
+
class OptParserError < Error
|
5
6
|
# empty for now
|
6
7
|
end
|
7
8
|
|
8
|
-
class YanserError <
|
9
|
+
class YanserError < Error
|
9
10
|
# empty for now
|
10
11
|
end
|
11
12
|
end
|
data/lib/yanser/opt_parser.rb
CHANGED
@@ -7,41 +7,54 @@ require 'yanser/error'
|
|
7
7
|
|
8
8
|
module Yanser
|
9
9
|
class OptParser
|
10
|
-
|
11
|
-
#
|
10
|
+
|
11
|
+
# Different possible values accepted by Yahoo!.
|
12
|
+
REGIONS = ['de', 'us', 'uk', 'ca', 'au', 'in', 'es',
|
13
|
+
'br', 'ar', 'mx', 'e1', 'it', 'fr', 'sg']
|
14
|
+
TYPES = ['all', 'resolved', 'open', 'undecided']
|
15
|
+
SORT_TYPES = ['relevance', 'date_desc', 'date_asc']
|
16
|
+
DATE_RANGES = ['all', '7', '7-30', '30-60', '60-90', 'more90']
|
17
|
+
SEARCH_LOCATIONS = ['all', 'question', 'best_answer']
|
18
|
+
OUTPUT_TYPES = ['xml', 'json', 'php', 'rss']
|
19
|
+
FILTERS = ['question', 'best_answer']
|
20
|
+
|
21
|
+
# Main class method.
|
22
|
+
# OP expects cmd_args to be an array like ARGV.
|
12
23
|
def self.parse(cmd_args)
|
13
24
|
@@options = {}
|
14
25
|
@@options[:query_params] = {}
|
15
26
|
|
16
27
|
parser = create_parser
|
17
28
|
|
18
|
-
#
|
29
|
+
# If no options provided print the help.
|
19
30
|
if cmd_args.empty?
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
# $stderr.printf "You have to provide some options.\n\n"
|
24
|
-
# puts parser
|
25
|
-
# exit 1
|
31
|
+
puts('You have to provide some options.',
|
32
|
+
'Please start with <yanser --help>.')
|
33
|
+
exit(1)
|
26
34
|
end
|
27
35
|
|
28
36
|
# Parse ARGV and provide the options hash.
|
29
37
|
# Check if everything is correct and handle exceptions
|
30
38
|
begin
|
31
|
-
parser.parse
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
39
|
+
parser.parse(cmd_args)
|
40
|
+
rescue OptionParser::InvalidArgument => e
|
41
|
+
arg = e.message.split.last
|
42
|
+
puts "The provided argument #{arg} is currently not supported by Yahoo!"
|
43
|
+
puts 'Please colsult <yanser --help>.'
|
44
|
+
exit(1)
|
45
|
+
rescue OptionParser::InvalidOption => e
|
46
|
+
puts "You have provided an #{e.message}."
|
47
|
+
puts 'Please colsult <yanser --help>.'
|
48
|
+
exit(1)
|
49
|
+
rescue
|
50
|
+
raise
|
38
51
|
end
|
39
52
|
|
40
53
|
# Check to see if we got the required arguments needed.
|
41
|
-
check_required_options(@@options)
|
54
|
+
# check_required_options(@@options)
|
42
55
|
|
43
56
|
# Set the search method.
|
44
|
-
@@options[:
|
57
|
+
@@options[:method] = set_query_type(@@options[:query_params])
|
45
58
|
|
46
59
|
# Set some defaults such as :region and :search_in
|
47
60
|
@@options = set_defaults(@@options)
|
@@ -50,34 +63,23 @@ module Yanser
|
|
50
63
|
end # parse
|
51
64
|
|
52
65
|
private
|
53
|
-
|
54
|
-
# Check if the value of given region is correct.
|
55
|
-
# Now 14 regions are supported by Yahoo! Answers.
|
56
|
-
def self.prove_region(region)
|
57
|
-
regions = ['de', 'us', 'uk', 'ca', 'au', 'in', 'es',
|
58
|
-
'br', 'ar', 'mx', 'e1', 'it', 'fr', 'sg']
|
59
|
-
|
60
|
-
unless regions.include?(region)
|
61
|
-
msg = "The provided search region #{region} is currently not supported by Yahoo!"
|
62
|
-
raise OptParserError, msg
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
+
|
66
67
|
# define one of the following query types: TermQuery|CategoryQuery|
|
67
68
|
# QuestionQuery|UserQuery
|
68
69
|
def self.set_query_type(params)
|
69
70
|
case
|
70
71
|
when (params[:category_id] || params[:category_name]) && ! params[:query]
|
71
|
-
'
|
72
|
+
'getByCategory'
|
72
73
|
when params[:query]
|
73
|
-
'
|
74
|
+
'questionSearch'
|
74
75
|
when params[:user_id]
|
75
|
-
'
|
76
|
+
'getByUser'
|
76
77
|
when params[:question_id]
|
77
|
-
'
|
78
|
+
'getQuestion'
|
78
79
|
end
|
79
80
|
end
|
80
|
-
|
81
|
+
|
82
|
+
=begin
|
81
83
|
def self.check_required_options(options)
|
82
84
|
required_opts = [:appid]
|
83
85
|
|
@@ -85,31 +87,19 @@ module Yanser
|
|
85
87
|
if options[:query_params].has_key?(opt)
|
86
88
|
next
|
87
89
|
else
|
88
|
-
|
89
|
-
|
90
|
+
puts "The required option --#{opt} is missing."
|
91
|
+
puts 'Please colsult <yanser --help>.'
|
92
|
+
exit(1)
|
90
93
|
end
|
91
94
|
end
|
92
95
|
end
|
93
|
-
|
94
|
-
|
95
|
-
dir = File.expand_path(dir)
|
96
|
-
#check for existens
|
97
|
-
if File.directory?(dir)
|
98
|
-
if File.writable?(dir)
|
99
|
-
return dir
|
100
|
-
else
|
101
|
-
$stderr.puts 'The directory you have provided is not writable!'
|
102
|
-
exit 1
|
103
|
-
end
|
104
|
-
else
|
105
|
-
FileUtils.mkdir_p(dir)
|
106
|
-
return dir
|
107
|
-
end
|
108
|
-
end # provide_dir
|
96
|
+
=end
|
97
|
+
|
109
98
|
|
110
99
|
def self.set_defaults(opts)
|
111
100
|
# fake method for now
|
112
|
-
|
101
|
+
|
102
|
+
opts
|
113
103
|
end # set_defaults
|
114
104
|
|
115
105
|
def self.create_parser
|
@@ -133,7 +123,9 @@ module Yanser
|
|
133
123
|
opts.separator ' Mandatory search arguments:'
|
134
124
|
|
135
125
|
opts.on('-k', '--key-word KEYWORD',
|
136
|
-
'Provide a single keyword or a boolean expression.'
|
126
|
+
'Provide a single keyword or a boolean expression.',
|
127
|
+
'You might combine this option with an explicit',
|
128
|
+
'<category> name or id to restrict your search.'
|
137
129
|
) do |keyword|
|
138
130
|
@@options[:query_params][:query] = keyword
|
139
131
|
# not a solution!!!
|
@@ -141,10 +133,12 @@ module Yanser
|
|
141
133
|
|
142
134
|
end
|
143
135
|
|
144
|
-
opts.separator ''
|
136
|
+
opts.separator ' and/or'
|
145
137
|
|
146
138
|
opts.on('-c', '--category CATEGORY',
|
147
|
-
'Provide a category name or ID.'
|
139
|
+
'Provide a category name or ID. This is useful',
|
140
|
+
'while searching for categories or in the combination',
|
141
|
+
'with some key words.'
|
148
142
|
) do |category|
|
149
143
|
|
150
144
|
if category =~ /^[[:digit:]]+$/
|
@@ -155,7 +149,7 @@ module Yanser
|
|
155
149
|
|
156
150
|
end
|
157
151
|
|
158
|
-
opts.separator ''
|
152
|
+
opts.separator ' or'
|
159
153
|
|
160
154
|
opts.on('--user-id ID',
|
161
155
|
'Provide an user ID of questions you search for.',
|
@@ -164,7 +158,7 @@ module Yanser
|
|
164
158
|
@@options[:query_params][:user_id] = user_id
|
165
159
|
end
|
166
160
|
|
167
|
-
opts.separator ''
|
161
|
+
opts.separator ' or'
|
168
162
|
|
169
163
|
opts.on('--question-id ID',
|
170
164
|
'Provide a question ID of the question you search for.',
|
@@ -172,32 +166,33 @@ module Yanser
|
|
172
166
|
) do |question_id|
|
173
167
|
@@options[:query_params][:question_id] = question_id
|
174
168
|
end
|
175
|
-
|
169
|
+
|
170
|
+
#################
|
176
171
|
opts.separator ''
|
177
172
|
opts.separator ' Optional search arguments:'
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
'This defaults to en.'
|
184
|
-
) do |region|
|
185
|
-
# todo
|
186
|
-
prove_region(region)
|
187
|
-
@@options[:query_params][:region] = region
|
173
|
+
opts.on('--date-range DATE', DATE_RANGES,
|
174
|
+
'Provide some date range for the search. It defaults to <all>.',
|
175
|
+
"Possible values are: <#{DATE_RANGES.join('>, <')}>."
|
176
|
+
) do |date|
|
177
|
+
@@options[:query_params][:date_range] = date
|
188
178
|
end
|
189
|
-
|
179
|
+
|
190
180
|
opts.separator ''
|
191
|
-
|
192
|
-
|
193
|
-
'
|
194
|
-
|
195
|
-
|
196
|
-
@@options[:output_dir] = provide_dir(output_dir)
|
181
|
+
opts.on('-f', '--output-format FORMAT', OUTPUT_TYPES,
|
182
|
+
"Provide an output format: <#{OUTPUT_TYPES.join('>, <')}>.",
|
183
|
+
'It defaults to <xml>, you may simply ommit this option.'
|
184
|
+
) do |f|
|
185
|
+
@@options[:query_params][:output] = f
|
197
186
|
end
|
198
187
|
|
199
188
|
opts.separator ''
|
189
|
+
opts.on('--filter TYPE', FILTERS,
|
190
|
+
"Possible values are: <#{FILTERS.join('>, <')}>."
|
191
|
+
) do |type|
|
192
|
+
@@options[:query_params][:filter] = type
|
193
|
+
end
|
200
194
|
|
195
|
+
opts.separator ''
|
201
196
|
opts.on('-l', '--limit NUMBER', Integer,
|
202
197
|
'Provide a number of answers you want to get from Yahoo.',
|
203
198
|
'This argument is not mandatory, if you want to get',
|
@@ -206,12 +201,12 @@ module Yanser
|
|
206
201
|
@@options[:limit] = limit
|
207
202
|
end
|
208
203
|
|
209
|
-
opts.separator ''
|
210
|
-
opts.on('-
|
211
|
-
'Provide an output
|
212
|
-
'
|
213
|
-
) do |
|
214
|
-
|
204
|
+
opts.separator ''
|
205
|
+
opts.on('-o', '--output-dir DIR',
|
206
|
+
'Provide an output folder.',
|
207
|
+
'This directory will be created if it does not exist yet.'
|
208
|
+
) do |output_dir|
|
209
|
+
@@options[:output_dir] = output_dir
|
215
210
|
end
|
216
211
|
|
217
212
|
opts.separator ''
|
@@ -222,23 +217,92 @@ module Yanser
|
|
222
217
|
'It can be useful if you want to put many query results',
|
223
218
|
'in the same output folder.'
|
224
219
|
) do |pref|
|
225
|
-
|
220
|
+
@@options[:prefix] = pref
|
226
221
|
end
|
227
222
|
|
228
223
|
opts.separator ''
|
229
|
-
opts.
|
230
|
-
|
231
|
-
'
|
232
|
-
|
233
|
-
|
224
|
+
opts.separator ''
|
225
|
+
opts.on('-r', '--region REGION', REGIONS,
|
226
|
+
'Provide a geographic region to search in for terms.',
|
227
|
+
'Possible values are: de, us, uk, ca, au, in, es, br,',
|
228
|
+
' ar, mx, e1, it, fr, sg.',
|
229
|
+
'This defaults to <en>.'
|
230
|
+
) do |region|
|
231
|
+
|
232
|
+
@@options[:query_params][:region] = region
|
234
233
|
end
|
235
234
|
|
236
235
|
opts.separator ''
|
236
|
+
opts.on('--search-in PLACE', SEARCH_LOCATIONS,
|
237
|
+
"Possible values are: <#{SEARCH_LOCATIONS.join('>, <')}>."
|
238
|
+
) do |place|
|
239
|
+
@@options[:query_params][:search_in] = place
|
240
|
+
end
|
241
|
+
|
242
|
+
opts.separator ''
|
243
|
+
opts.on('--start POSITION',
|
244
|
+
'Provide the position of the question in the search list',
|
245
|
+
'you want start with.'
|
246
|
+
) do |pos|
|
247
|
+
@@options[:query_params][:start] = pos
|
248
|
+
end
|
249
|
+
|
250
|
+
opts.separator ''
|
251
|
+
opts.on('--sort ORDER', SORT_TYPES,
|
252
|
+
"Possible values are: <#{SORT_TYPES.join('>, <')}>."
|
253
|
+
) do |order|
|
254
|
+
@@options[:query_params][:sort] = order
|
255
|
+
end
|
256
|
+
|
257
|
+
opts.separator ''
|
258
|
+
opts.on('--type TYPE', TYPES,
|
259
|
+
"Possible values are: <#{TYPES.join('>, <')}>."
|
260
|
+
) do |type|
|
261
|
+
@@options[:query_params][:type] = type
|
262
|
+
end
|
263
|
+
|
264
|
+
|
265
|
+
#################
|
266
|
+
opts.separator ""
|
267
|
+
opts.separator ' Experimental options. They may not be implemented yet.'
|
237
268
|
opts.on('--exp-file FILE',
|
238
269
|
'Provide a formal description of the experiment.'
|
239
270
|
) do |file_name|
|
240
|
-
|
271
|
+
warn 'Not yet implemented!'
|
272
|
+
end
|
273
|
+
|
274
|
+
opts.separator ''
|
275
|
+
opts.on('--key-word-list FILE',
|
276
|
+
'Provide a file with key words or phrases',
|
277
|
+
'(boolean syntax allowed), one search token per line.'
|
278
|
+
) do |file_name|
|
279
|
+
warn 'Not yet implemented!'
|
280
|
+
end
|
281
|
+
|
282
|
+
opts.separator ''
|
283
|
+
opts.on('--log [LOGFILE]',
|
284
|
+
'SOME DESCRIPTION'
|
285
|
+
) do |file|
|
286
|
+
warn 'Add description!'
|
287
|
+
warn 'Not yet implemented!'
|
288
|
+
end
|
289
|
+
|
290
|
+
opts.separator ''
|
291
|
+
opts.on('--extract',
|
292
|
+
'SOME DESCRIPTION'
|
293
|
+
) do
|
294
|
+
# some non xml representation in a human readable format
|
295
|
+
warn 'Add description!'
|
296
|
+
warn 'Not yet implemented!'
|
241
297
|
end
|
298
|
+
opts.separator ''
|
299
|
+
opts.on('-t', '--time-interval SECONDS', Integer,
|
300
|
+
'SOME DESCRIPTION'
|
301
|
+
) do |t|
|
302
|
+
@@options[:interval] = t
|
303
|
+
warn 'Add description!'
|
304
|
+
end
|
305
|
+
|
242
306
|
opts.separator ""
|
243
307
|
opts.separator "Common options:"
|
244
308
|
|
@@ -261,20 +325,24 @@ __END__
|
|
261
325
|
# List of options planned for implementation.
|
262
326
|
--appid
|
263
327
|
-k, --key-word
|
264
|
-
-c, --category
|
328
|
+
-c, --category # Term & CategorySearch
|
265
329
|
--user-id
|
266
330
|
--question-id # QuestionSearch
|
267
331
|
-r, --region
|
268
332
|
-f, --output-format
|
269
|
-
-l, --limit
|
270
333
|
--date-range
|
271
334
|
--sort
|
335
|
+
--start
|
272
336
|
--filter
|
337
|
+
--callback # bad idea
|
338
|
+
--type
|
339
|
+
--search-in
|
273
340
|
|
274
341
|
|
342
|
+
-l, --limit
|
275
343
|
--prefix #prefix for output files
|
276
344
|
--key-word-list
|
277
|
-
--exp-file
|
345
|
+
--exp-file # experiment description, yaml
|
278
346
|
-o, --output-dir
|
279
347
|
-h, --help
|
280
348
|
-v, --version
|
@@ -306,8 +374,9 @@ instance interface
|
|
306
374
|
:type => "all" | "resolved" | "open" | "undecided", # default 'all'
|
307
375
|
:user_id => '123456'
|
308
376
|
},
|
309
|
-
:
|
377
|
+
:interval => Integer, # in seconds
|
378
|
+
:limit => Integer,
|
379
|
+
:method => 'TermQuery'|'QuestionQuery'|'UserQuery'|'CategoryQuery',
|
310
380
|
:output_dir => 'some path',
|
311
|
-
:prefix => 'some prefix'
|
312
|
-
:limit => Integer
|
381
|
+
:prefix => 'some prefix' # prefix for output files
|
313
382
|
}
|
data/lib/yanser/store.rb
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
|
3
|
+
module Yanser
|
4
|
+
class Store
|
5
|
+
def initialize(params)
|
6
|
+
@params = params
|
7
|
+
end
|
8
|
+
|
9
|
+
def store(result)
|
10
|
+
if dir = @params[:output_dir]
|
11
|
+
provide_dir(dir)
|
12
|
+
save(result)
|
13
|
+
else
|
14
|
+
puts result
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
# rework!
|
21
|
+
def provide_dir(dir)
|
22
|
+
dir = File.expand_path(dir)
|
23
|
+
#check for existens
|
24
|
+
if File.directory?(dir)
|
25
|
+
unless File.writable?(dir)
|
26
|
+
warn 'The directory you have provided is not writable!'
|
27
|
+
exit(1)
|
28
|
+
end
|
29
|
+
else
|
30
|
+
FileUtils.mkdir_p(dir)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# save results to a dir
|
35
|
+
# not a good implementation
|
36
|
+
# interface (filename, data)
|
37
|
+
def save(result)
|
38
|
+
filename = File.join(@params[:output_dir],
|
39
|
+
"yanser_output.xml")
|
40
|
+
File.open(filename, 'w') do |file|
|
41
|
+
file.puts result
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end # Store
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
|
data/lib/yanser/version.rb
CHANGED
data/lib/yanser/yanser.rb
CHANGED
@@ -1,29 +1,10 @@
|
|
1
|
-
# this solution is not ideal since I force people to use 'rubygems'
|
2
|
-
# possible it would be a better solution to prompt at this point
|
3
|
-
# to install the lib in the way the user is accustomed to
|
4
|
-
=begin
|
5
|
-
begin
|
6
|
-
require 'yanapi'
|
7
|
-
rescue LoadError
|
8
|
-
require 'rubygems'
|
9
|
-
require 'yanapi'
|
10
|
-
end
|
11
|
-
=end
|
12
1
|
require 'yanapi'
|
13
2
|
|
14
3
|
# :title: YANSER, Yahoo! ANSwers harvestER
|
15
|
-
# :main:
|
4
|
+
# :main: README
|
5
|
+
|
16
6
|
# Main processing class.
|
17
|
-
# Yanser encapsulates the main routine and instantiates
|
18
|
-
# all other classes.
|
19
|
-
#--
|
20
|
-
# Yanser takes the users input and validates it.
|
21
|
-
# It decides which search method to choose.
|
22
|
-
# Then it collects all parameters and useful default values,
|
23
|
-
# creates an XyzQuery with the starting point of 0 and gets the first results.
|
24
|
-
# If more results were requested, Yanser creates a similar XyzQuery and gets
|
25
|
-
# the next result set until the result limitation set by the user is met.
|
26
|
-
#
|
7
|
+
# Yanser encapsulates the main routine and instantiates all other classes.
|
27
8
|
module Yanser
|
28
9
|
|
29
10
|
class Yanser
|
@@ -38,98 +19,85 @@ module Yanser
|
|
38
19
|
# Yahoo! Answers returns maximum 50 results.
|
39
20
|
MAX_RESULTS = 50
|
40
21
|
|
41
|
-
def initialize(
|
42
|
-
|
43
|
-
|
44
|
-
# {:query_type=>u|q|c|w, :query_params=>{appid, start, results, query}}
|
45
|
-
# opts come from the OptionParser
|
46
|
-
# they are supposed to be correct, no validation here
|
47
|
-
@options = opts
|
48
|
-
|
49
|
-
@options[:limit] = @options[:limit] || START_LIMIT + MAX_RESULTS
|
22
|
+
def initialize(params)
|
23
|
+
# Libs are not allowed to mutate the input.
|
24
|
+
@params = params.clone
|
50
25
|
|
51
|
-
@
|
26
|
+
@params[:limit] = @params[:limit] || START_LIMIT + MAX_RESULTS
|
52
27
|
|
53
|
-
@
|
28
|
+
@query_params = @params[:query_params]
|
54
29
|
|
30
|
+
@output = @query_params[:output] || 'xml'
|
31
|
+
|
32
|
+
# Yahoo counts results beginning with 0.
|
33
|
+
# <:results=50> means all questins from 0 to 49.
|
55
34
|
@query_params[:start] = @query_params[:start] || 0
|
56
35
|
end
|
57
36
|
|
58
|
-
#
|
37
|
+
# It returns the accumulated string or <nil>, if nothing found.
|
59
38
|
def start
|
60
|
-
|
61
|
-
|
62
|
-
query(@query_params)
|
63
|
-
elsif @options[:limit] < MAX_RESULTS
|
64
|
-
@query_params[:results] = @options[:limit]
|
65
|
-
query(@query_params)
|
39
|
+
if @params[:method] == 'getQuestion'
|
40
|
+
response = get_response(@params)
|
66
41
|
else
|
67
|
-
|
42
|
+
response = ''
|
68
43
|
|
69
|
-
while
|
70
|
-
|
71
|
-
@query_params[:start] += MAX_RESULTS
|
44
|
+
while @query_params[:start] < @params[:limit]
|
45
|
+
results_left = @params[:limit] - @query_params[:start]
|
72
46
|
|
73
|
-
results_left
|
74
|
-
if results_left == 0
|
75
|
-
break
|
76
|
-
elsif results_left < MAX_RESULTS
|
47
|
+
if results_left < MAX_RESULTS
|
77
48
|
@query_params[:results] = results_left
|
49
|
+
else
|
50
|
+
@query_params[:results] = MAX_RESULTS
|
51
|
+
end
|
52
|
+
|
53
|
+
r = get_response(@params)
|
54
|
+
if r
|
55
|
+
response << r
|
56
|
+
else
|
57
|
+
if response.empty?
|
58
|
+
response = nil
|
59
|
+
end
|
78
60
|
break
|
79
61
|
end
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
62
|
+
@query_params[:start] += @query_params[:results]
|
63
|
+
sleep(QUERY_INTERVAL)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
response
|
84
68
|
end # start
|
85
69
|
|
86
70
|
private
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
71
|
+
# It returns a result string or <nil>.
|
72
|
+
# It tries three times in case of YANAPI::ExternalError,
|
73
|
+
# it gives up if an YANAPI::ContentError occures (returns <nil>).
|
74
|
+
def get_response(params)
|
75
|
+
query = create_query(params)
|
76
|
+
t = 0
|
91
77
|
begin
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
return false # do not iterate futher
|
99
|
-
rescue => e # some errors to retry
|
100
|
-
if (tries < 4)
|
101
|
-
sleep(QUERY_INTERVAL**tries)
|
78
|
+
t += 1
|
79
|
+
response = query.get
|
80
|
+
rescue YANAPI::ExternalError => e
|
81
|
+
warn "External error: #{e}!"
|
82
|
+
if t <= 3
|
83
|
+
sleep(QUERY_INTERVAL**t)
|
102
84
|
retry
|
103
85
|
else
|
104
|
-
|
105
|
-
|
86
|
+
raise
|
87
|
+
end
|
106
88
|
end
|
89
|
+
|
90
|
+
response
|
107
91
|
end
|
108
|
-
return true # we may iterate futher
|
109
|
-
end
|
110
92
|
|
93
|
+
# It creates a new api object or raises an exception.
|
94
|
+
# YANAPI::UserError should not be rescued, it indicates that the user
|
95
|
+
# must correct the input hash.
|
111
96
|
def create_query(params)
|
112
|
-
|
113
|
-
end
|
114
|
-
|
115
|
-
def output(result)
|
116
|
-
if @options[:output_dir]
|
117
|
-
save(result)
|
118
|
-
else
|
119
|
-
puts result
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
123
|
-
# save results to a dir
|
124
|
-
# this dir exists since has been proved by OptionParser
|
125
|
-
# not a good implementation
|
126
|
-
# interface (filename, data)
|
127
|
-
def save(result)
|
128
|
-
filename = File.join(@options[:output_dir], "#{@query_params[:start]}.#{@output_type}")
|
129
|
-
file = File.new(filename, 'w')
|
130
|
-
file.puts result
|
131
|
-
file.close
|
97
|
+
YANAPI::API.new(params)
|
132
98
|
end
|
133
99
|
|
134
100
|
end # class Yanser
|
135
101
|
end # module Yanser
|
102
|
+
|
103
|
+
__END__
|
data/test/test_opt_parser.rb
CHANGED
@@ -1,138 +1,264 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
require 'test/unit'
|
3
3
|
require 'yanser/opt_parser'
|
4
|
+
require 'stringio' # for helper methods
|
5
|
+
|
4
6
|
include Yanser
|
7
|
+
|
5
8
|
class TestOptionParser < Test::Unit::TestCase
|
6
|
-
def setup
|
7
|
-
@input = ['--appid', 'YahooDemo', '-r',
|
8
|
-
'de', '-l', '10', '-o', '/tmp',
|
9
|
-
'-k' 'Haus AND grün']
|
10
|
-
@output = {
|
11
|
-
:query_params => {
|
12
|
-
:appid => 'YahooDemo',
|
13
|
-
:query => 'Haus AND grün',
|
14
|
-
:region => 'de',
|
15
|
-
:search_in => 'question'
|
16
|
-
},
|
17
|
-
:query_type => 'TermQuery',
|
18
|
-
:output_dir => '/tmp',
|
19
|
-
:limit => 10
|
20
|
-
}
|
21
9
|
|
10
|
+
def setup
|
22
11
|
@cmd_args = ['--appid', 'YahooDemo']
|
23
|
-
@minimal_input = ['--appid', 'YahooDemo',
|
24
|
-
|
12
|
+
@minimal_input = ['--appid', 'YahooDemo', '-k', 'Haus']
|
13
|
+
@valid_opts = ['--appid', 'MyID',
|
14
|
+
'--category', '12345',
|
15
|
+
'--date-range', 'all',
|
16
|
+
'--exp-file', 'exp_file.yml',
|
17
|
+
'--filter', 'question',
|
18
|
+
'--help',
|
19
|
+
'--key-word', 'Haus',
|
20
|
+
'--key-word-list', 'some_file.txt',
|
21
|
+
'--limit', '1',
|
22
|
+
'--output-dir', '/tmp',
|
23
|
+
'--output-format', 'xml',
|
24
|
+
'--prefix', 'file1',
|
25
|
+
'--question-id', '12345',
|
26
|
+
'--region', 'de',
|
27
|
+
'--search-in', 'question',
|
28
|
+
'--sort', 'relevance',
|
29
|
+
'--start', '10',
|
30
|
+
'--time-interval', '10',
|
31
|
+
'--type', 'resolved',
|
32
|
+
'--user-id', '12345',
|
33
|
+
'--version'
|
34
|
+
]
|
25
35
|
end
|
26
36
|
|
27
37
|
def teardown
|
28
38
|
end
|
29
39
|
|
30
|
-
#
|
40
|
+
# It should have a public class method 'parse'.
|
31
41
|
def test_public_methods
|
32
42
|
assert_respond_to(Yanser::OptParser, :parse)
|
33
43
|
end
|
34
44
|
|
35
|
-
#
|
45
|
+
# It should return a non empty parameter hash.
|
36
46
|
def test_parse_method
|
37
|
-
return_value = Yanser::OptParser.parse(@
|
38
|
-
assert(return_value.instance_of?
|
39
|
-
|
47
|
+
return_value = Yanser::OptParser.parse(@minimal_input)
|
48
|
+
assert(return_value.instance_of?(Hash))
|
49
|
+
assert_equal(false, return_value.empty?)
|
40
50
|
end
|
41
51
|
|
42
|
-
#
|
43
|
-
def
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
assert_equal(@output, Yanser::OptParser.parse(cmd_args))
|
52
|
+
# It should reject the empty input and exit.
|
53
|
+
def test_empty_input
|
54
|
+
out, err = intercept_output do
|
55
|
+
assert_raises(SystemExit) { OptParser.parse([]) }
|
56
|
+
end
|
57
|
+
assert_match(/You have to provide some options./, out)
|
49
58
|
end
|
50
59
|
|
60
|
+
# It should accept correct options.
|
61
|
+
# Invalid options is the matter of OptionParser itself,
|
62
|
+
# do not test it here.
|
63
|
+
# We test only, that OP exits and does not raise an exception.
|
64
|
+
def test_accept_correct_options
|
65
|
+
# this options we should treat separately
|
66
|
+
@valid_opts.delete('--help')
|
67
|
+
@valid_opts.delete('--version')
|
68
|
+
assert_nothing_raised { OptParser.parse(@valid_opts) }
|
69
|
+
|
70
|
+
stdout, stderr = intercept_output do
|
71
|
+
assert_raises(SystemExit) { OptParser.parse(['--invalid-option']) }
|
72
|
+
end
|
73
|
+
|
74
|
+
assert_match(/You have provided an invalid option:/, stdout)
|
75
|
+
end
|
76
|
+
|
77
|
+
# It should successfully exit with some options.
|
78
|
+
def test_successful_exit
|
79
|
+
quietly do
|
80
|
+
success_args = ['-h', '--help', '-v', '--version']
|
81
|
+
success_args.each do |arg|
|
82
|
+
assert_raises(SystemExit) { OptParser.parse(arg.split) }
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
=begin
|
88
|
+
# It is done in YANAPI.
|
89
|
+
# It should require mandatory cmd arguments.
|
90
|
+
def test_mandatory_args
|
91
|
+
flunk('Rewrite!')
|
92
|
+
args = ['-k', 'Haus']
|
93
|
+
assert_raises(OptParserError) {OptParser.parse args}
|
94
|
+
end
|
95
|
+
=end
|
96
|
+
|
97
|
+
# It should accept only valid date arguments.
|
98
|
+
def test_date_arguments
|
99
|
+
validate(OptParser::DATE_RANGES, '--date-range', :date_range)
|
100
|
+
end
|
101
|
+
|
102
|
+
# It should accept a file and parse it.
|
103
|
+
def test_exp_file
|
104
|
+
flunk('Not implemented! It should accept a file and parse it.')
|
105
|
+
end
|
106
|
+
|
107
|
+
# It should accept only valid filter arguments.
|
108
|
+
def test_filter_arguments
|
109
|
+
validate(OptParser::FILTERS, '--filter', :filter)
|
110
|
+
end
|
111
|
+
|
112
|
+
# It should accept only numeric parameters for limits.
|
113
|
+
def test_limit_arguments
|
114
|
+
flunk('Not implemented! It should accept only numeric paramters for limits.')
|
115
|
+
end
|
51
116
|
|
52
|
-
#
|
53
|
-
def
|
117
|
+
# It should accept only valid output formats.
|
118
|
+
def test_output_arguments
|
119
|
+
validate(OptParser::OUTPUT_TYPES, '--output-format', :output)
|
120
|
+
end
|
121
|
+
|
122
|
+
# It should accept only valid regions.
|
123
|
+
def test_region_arguments
|
124
|
+
validate(OptParser::REGIONS, '-r', :region)
|
125
|
+
end
|
126
|
+
|
127
|
+
# It should accept only valid search locations.
|
128
|
+
def test_search_locations
|
129
|
+
validate(OptParser::SEARCH_LOCATIONS, '--search-in', :search_in)
|
130
|
+
end
|
131
|
+
|
132
|
+
# It should accept only valid sort types.
|
133
|
+
def test_sort_types
|
134
|
+
validate(OptParser::SORT_TYPES, '--sort', :sort)
|
135
|
+
end
|
136
|
+
|
137
|
+
# It should accept only valid types.
|
138
|
+
def test_types
|
139
|
+
validate(OptParser::TYPES, '--type', :type)
|
140
|
+
end
|
141
|
+
|
142
|
+
# It should set up the right search method.
|
143
|
+
def test_correct_setting_of_the_method
|
54
144
|
|
55
145
|
cmd_args = ['-k', 'Haus'] | @cmd_args
|
56
|
-
assert_equal('
|
146
|
+
assert_equal('questionSearch', OptParser.parse(cmd_args)[:method])
|
57
147
|
|
58
148
|
cmd_args = ['-k', 'Haus', '-c', '09876543'] | @cmd_args
|
59
|
-
assert_equal('
|
149
|
+
assert_equal('questionSearch', OptParser.parse(cmd_args)[:method])
|
60
150
|
|
61
151
|
cmd_args = ['-c', '09876543'] | @cmd_args
|
62
|
-
assert_equal('
|
152
|
+
assert_equal('getByCategory', OptParser.parse(cmd_args)[:method])
|
63
153
|
|
64
154
|
cmd_args = ['-c', 'Ausgehen'] | @cmd_args
|
65
|
-
assert_equal('
|
155
|
+
assert_equal('getByCategory', OptParser.parse(cmd_args)[:method])
|
66
156
|
|
67
157
|
cmd_args = ['--user-id', '12345'] | @cmd_args
|
68
|
-
assert_equal('
|
158
|
+
assert_equal('getByUser', OptParser.parse(cmd_args)[:method])
|
69
159
|
|
70
160
|
cmd_args = ['--question-id', '12345'] | @cmd_args
|
71
|
-
assert_equal('
|
161
|
+
assert_equal('getQuestion', OptParser.parse(cmd_args)[:method])
|
72
162
|
end
|
73
163
|
|
74
|
-
#
|
164
|
+
# It should set either :category_id or :category_name.
|
75
165
|
def test_category_identifier
|
76
166
|
args = ['-k', 'Haus', '-c', '123'] | @cmd_args
|
77
167
|
opts = Yanser::OptParser.parse(args)
|
78
168
|
assert_equal('123', opts[:query_params][:category_id])
|
79
|
-
|
169
|
+
assert_equal(false, opts[:query_params].has_key?(:category_name))
|
80
170
|
|
81
171
|
args = ['-k', 'Haus', '-c', 'Haushalt'] | @cmd_args
|
82
172
|
opts = Yanser::OptParser.parse(args)
|
83
173
|
assert_equal('Haushalt', opts[:query_params][:category_name])
|
84
|
-
|
85
|
-
|
174
|
+
assert_equal(false, opts[:query_params].has_key?(:category_id))
|
86
175
|
end
|
87
176
|
|
88
|
-
|
177
|
+
# It should set reasonable defaults.
|
178
|
+
# Yanser may set defaults, Yanapi provides minimal output, i.e. if Yahoo
|
179
|
+
# changes defaults, every user can be affected. That's whe we offer some
|
180
|
+
# reasonable defaults for our experiments.
|
181
|
+
# :region => 'de'
|
182
|
+
# :data_range => 'all'
|
183
|
+
def test_default_values
|
184
|
+
flunk('Not implemented! It should set reasonable defaults.')
|
185
|
+
|
89
186
|
end
|
90
187
|
|
91
|
-
|
188
|
+
# It should return a right complex output.
|
189
|
+
def test_output
|
190
|
+
cmd_args = ['--appid', 'YahooDemo', '-r',
|
191
|
+
'de', '-l', '10', '-o', '/tmp',
|
192
|
+
'-k' 'Haus AND grün'
|
193
|
+
]
|
194
|
+
output = {
|
195
|
+
:query_params => {
|
196
|
+
:appid => 'YahooDemo',
|
197
|
+
:query => 'Haus AND grün',
|
198
|
+
:region => 'de',
|
199
|
+
:search_in => 'question'
|
200
|
+
},
|
201
|
+
:method => 'questionSearch',
|
202
|
+
:output_dir => '/tmp',
|
203
|
+
:limit => 10
|
204
|
+
}
|
205
|
+
assert_equal(output, OptParser.parse(cmd_args))
|
92
206
|
end
|
93
207
|
|
94
|
-
#
|
95
|
-
def
|
96
|
-
|
208
|
+
# It should set correct parameters for logging.
|
209
|
+
def test_log_arguments
|
210
|
+
flunk('Not implemented. It should set correct parameters for logging.')
|
97
211
|
end
|
98
212
|
|
99
|
-
#
|
100
|
-
def
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
213
|
+
# It should set correct parameters for data extraction.
|
214
|
+
def test_extraction_arguments
|
215
|
+
flunk('Not implemented. It should set correct parameters for data extraction.')
|
216
|
+
end
|
217
|
+
################################################################################
|
218
|
+
# It is a helper method, many testable units provide some verbose output
|
219
|
+
# to stderr and/or stdout. It is usefull to suppress any kind of verbosity.
|
220
|
+
def quietly(&b)
|
221
|
+
begin
|
222
|
+
orig_stderr = $stderr.clone
|
223
|
+
orig_stdout = $stdout.clone
|
224
|
+
$stderr.reopen(File.new('/dev/null', 'w'))
|
225
|
+
$stdout.reopen(File.new('/dev/null', 'w'))
|
226
|
+
b.call
|
227
|
+
ensure
|
228
|
+
$stderr.reopen(orig_stderr)
|
229
|
+
$stdout.reopen(orig_stdout)
|
106
230
|
end
|
107
|
-
input = @minimal_input | ['-r', 'abc']
|
108
|
-
assert_raises(Yanser::OptParserError) {Yanser::OptParser.parse(input)}
|
109
231
|
end
|
110
232
|
|
111
|
-
#
|
112
|
-
def
|
233
|
+
# It is a helper method for validation of used parameter values.
|
234
|
+
def validate(params, option, key)
|
235
|
+
params.each do |p|
|
236
|
+
input = @minimal_input | [option, p]
|
237
|
+
params = {}
|
238
|
+
assert_nothing_raised(SystemExit) { params = OptParser.parse(input) }
|
239
|
+
assert_equal(p, params[:query_params][key])
|
240
|
+
end
|
241
|
+
|
242
|
+
input = @minimal_input | [option, 'invalid']
|
243
|
+
stdout, stderr = intercept_output do
|
244
|
+
assert_raises(SystemExit) { OptParser.parse(input) }
|
245
|
+
end
|
246
|
+
assert_match(/The provided .+ is currently not supported by Yahoo!/,
|
247
|
+
stdout)
|
113
248
|
end
|
114
249
|
|
115
|
-
#
|
116
|
-
def
|
117
|
-
|
250
|
+
# It is a helper method for handling stdout and stderr as strings.
|
251
|
+
def intercept_output
|
252
|
+
orig_stdout = $stdout
|
253
|
+
orig_stderr = $stderr
|
254
|
+
$stdout = StringIO.new
|
255
|
+
$stderr = StringIO.new
|
118
256
|
|
119
|
-
|
120
|
-
def test_mandatory_args
|
121
|
-
args = ['-k', 'Haus']
|
122
|
-
assert_raises(OptParserError) {OptParser.parse args}
|
123
|
-
end
|
257
|
+
yield
|
124
258
|
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
$stderr.reopen(File.new('/dev/null', 'w'))
|
130
|
-
$stdout.reopen(File.new('/dev/null', 'w'))
|
131
|
-
success_args = ['-h', '--help', '-v', '--version']
|
132
|
-
success_args.each do |arg|
|
133
|
-
assert_raises(SystemExit) {OptParser.parse(arg.to_a)}
|
134
|
-
end
|
135
|
-
$stderr.reopen(orig_stderr)
|
136
|
-
$stdout.reopen(orig_stdout)
|
259
|
+
return $stdout.string, $stderr.string
|
260
|
+
ensure
|
261
|
+
$stdout = orig_stdout
|
262
|
+
$stderr = orig_stderr
|
137
263
|
end
|
138
264
|
end
|
data/test/test_yanser.rb
CHANGED
@@ -10,6 +10,7 @@ class TestYanser < Test::Unit::TestCase
|
|
10
10
|
def teardown
|
11
11
|
end
|
12
12
|
|
13
|
+
# It should have a public method <start>.
|
13
14
|
def test_public_methods
|
14
15
|
yanser = Yanser::Yanser.new(
|
15
16
|
:query_params => {
|
@@ -26,10 +27,15 @@ class TestYanser < Test::Unit::TestCase
|
|
26
27
|
assert_respond_to(yanser, :start)
|
27
28
|
end
|
28
29
|
|
30
|
+
# It should have a constant <VERSION>.
|
29
31
|
def test_constants
|
30
32
|
assert(Yanser::VERSION.instance_of?(String))
|
31
33
|
end
|
32
|
-
|
34
|
+
|
35
|
+
# It should accept input according to the defined interface.
|
36
|
+
def test_input
|
37
|
+
flunk('Not implemented. It should accept input according to the defined interface.')
|
33
38
|
end
|
39
|
+
|
34
40
|
|
35
41
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yanser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 27
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
+
- 1
|
8
9
|
- 0
|
9
|
-
|
10
|
-
version: 0.0.3
|
10
|
+
version: 0.1.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Andrei Beliankou
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-08-
|
18
|
+
date: 2011-08-12 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: yanapi
|
@@ -23,17 +23,17 @@ dependencies:
|
|
23
23
|
requirement: &id001 !ruby/object:Gem::Requirement
|
24
24
|
none: false
|
25
25
|
requirements:
|
26
|
-
- - "
|
26
|
+
- - ">="
|
27
27
|
- !ruby/object:Gem::Version
|
28
|
-
hash:
|
28
|
+
hash: 17
|
29
29
|
segments:
|
30
30
|
- 0
|
31
|
+
- 3
|
31
32
|
- 1
|
32
|
-
|
33
|
-
version: 0.1.1
|
33
|
+
version: 0.3.1
|
34
34
|
type: :runtime
|
35
35
|
version_requirements: *id001
|
36
|
-
description: YANSER (Yahoo! ANSwers harvestER) is a convinient search tool providing access to the Yahoo! Answers Q&A collection. Based on YANAPI it provides a simple CLI and helps to search for Questions and Answers which contain a set of key words, belong to a specific semantic domain or are posted by a certain user. Yanser is a research tool in the field of Computational Linguistics.
|
36
|
+
description: YANSER (Yahoo! ANSwers harvestER) is a convinient search tool providing access to the Yahoo! Answers Q&A collection. Based on YANAPI it provides a simple CLI and helps to search for Questions and Answers which contain a set of key words, belong to a specific semantic domain or are posted by a certain user. Yanser is a research tool in the field of Computational Linguistics e.g for Question Answering.
|
37
37
|
email: a.belenkow@uni-trier.de
|
38
38
|
executables:
|
39
39
|
- yanser
|
@@ -42,25 +42,27 @@ extensions: []
|
|
42
42
|
extra_rdoc_files:
|
43
43
|
- README
|
44
44
|
- LICENSE
|
45
|
+
- CHANGELOG
|
45
46
|
files:
|
46
|
-
- lib/tester.rb
|
47
47
|
- lib/yanser/error.rb
|
48
|
+
- lib/yanser/store.rb
|
48
49
|
- lib/yanser/yanser.rb
|
49
50
|
- lib/yanser/opt_parser.rb
|
50
51
|
- lib/yanser/version.rb
|
51
52
|
- bin/yanser
|
52
|
-
- LICENSE
|
53
|
-
- Rakefile
|
54
53
|
- README
|
54
|
+
- LICENSE
|
55
|
+
- CHANGELOG
|
55
56
|
- test/test_opt_parser.rb
|
56
57
|
- test/test_yanser.rb
|
58
|
+
- .yardopts
|
57
59
|
homepage: http://www.uni-trier.de/index.php?id=34451
|
58
60
|
licenses: []
|
59
61
|
|
60
62
|
post_install_message:
|
61
63
|
rdoc_options:
|
62
64
|
- -m
|
63
|
-
- README
|
65
|
+
- README
|
64
66
|
require_paths:
|
65
67
|
- lib
|
66
68
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -86,7 +88,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
86
88
|
requirements: []
|
87
89
|
|
88
90
|
rubyforge_project: yanser
|
89
|
-
rubygems_version: 1.7
|
91
|
+
rubygems_version: 1.8.7
|
90
92
|
signing_key:
|
91
93
|
specification_version: 3
|
92
94
|
summary: Yanser is a convinient search tool providing access to the Yahoo! Answers Q/A collection.
|
data/Rakefile
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
# We need rake to user FileLists.
|
2
|
-
require 'rake'
|
3
|
-
# We can require 'rake/clean' to add 'clobber' and 'clean' tasks.
|
4
|
-
require 'rake/clean'
|
5
|
-
|
6
|
-
|
7
|
-
# I am not sure how to use this constant.
|
8
|
-
SRC = FileList['**/*.rb']
|
9
|
-
|
10
|
-
CLOBBER.include('doc', '**/*.html', '**/*.gem')
|
11
|
-
|
12
|
-
# testing
|
13
|
-
require 'rake/testtask'
|
14
|
-
Rake::TestTask.new do |t|
|
15
|
-
t.test_files = FileList.new('test/**/*.rb').to_a
|
16
|
-
# rake starts an other ruby process with a new options set.
|
17
|
-
# ruby --some-option -S rake is not sufficient to propagate
|
18
|
-
# the option "--some-option".
|
19
|
-
t.ruby_opts = ['-rubygems']
|
20
|
-
end
|
21
|
-
|
22
|
-
# Build the gem package
|
23
|
-
load 'yanser.gemspec'
|
24
|
-
require 'rubygems/package_task'
|
25
|
-
Gem::PackageTask.new(GEMSPEC).define
|
26
|
-
|
27
|
-
# Generate documentation
|
28
|
-
require 'rdoc/task'
|
29
|
-
RDoc::Task.new do |rdoc|
|
30
|
-
rdoc.rdoc_files.include('README', 'LICENSE', 'lib/**/*')
|
31
|
-
end
|
32
|
-
|
33
|
-
desc "Open an irb session preloaded with this library."
|
34
|
-
task :console do
|
35
|
-
sh "irb -rubygems -I lib -r yanser/opt_parser -r yanser/yanser"
|
36
|
-
end
|
37
|
-
|
38
|
-
desc 'Run the main executable file of the project.'
|
39
|
-
task :start do
|
40
|
-
sh "ruby -w -rubygems -I lib bin/yanser #{ENV['cmd']}"
|
41
|
-
end
|
data/lib/tester.rb
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
|
3
|
-
require 'yanser'
|
4
|
-
|
5
|
-
params = {
|
6
|
-
:query_method => 'TermQuery',
|
7
|
-
:query_params => {
|
8
|
-
:appid => 'YahooDemo',
|
9
|
-
:query => 'Köln',
|
10
|
-
:region => 'de',
|
11
|
-
:results => 5,
|
12
|
-
:start => 0
|
13
|
-
}
|
14
|
-
}
|
15
|
-
|
16
|
-
y = Yanser.new(params)
|
17
|
-
|
18
|
-
y.start
|