yanser 0.0.3 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.yardopts +6 -0
- data/CHANGELOG +24 -0
- data/README +19 -3
- data/bin/yanser +30 -13
- data/lib/yanser/error.rb +4 -3
- data/lib/yanser/opt_parser.rb +166 -97
- data/lib/yanser/store.rb +48 -0
- data/lib/yanser/version.rb +1 -1
- data/lib/yanser/yanser.rb +58 -90
- data/test/test_opt_parser.rb +203 -77
- data/test/test_yanser.rb +7 -1
- metadata +16 -14
- data/Rakefile +0 -41
- data/lib/tester.rb +0 -18
data/.yardopts
ADDED
data/CHANGELOG
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
=== COMPLETED
|
2
|
+
==== 0.1.0
|
3
|
+
Yanser supportst YANAPI 0.3.1 and further now.
|
4
|
+
==== 0.0.3
|
5
|
+
Yanser depends on the old interface of YANAPI. No functional improvements.
|
6
|
+
==== 0.0.2
|
7
|
+
Small changes in the documentation.
|
8
|
+
==== 0.0.1
|
9
|
+
Initial release of Yanser.
|
10
|
+
|
11
|
+
|
12
|
+
=== PLANNED
|
13
|
+
|
14
|
+
==== 0.0.2
|
15
|
+
==== 0.0.3
|
16
|
+
==== 0.4.0
|
17
|
+
==== 0.5.0
|
18
|
+
==== 0.6.0
|
19
|
+
==== 0.7.0
|
20
|
+
==== 0.8.0
|
21
|
+
==== 0.9.0
|
22
|
+
==== 1.0.0
|
23
|
+
|
24
|
+
|
data/README
CHANGED
@@ -2,20 +2,36 @@
|
|
2
2
|
|
3
3
|
* {RubyGems}[http://rubygems.org/gems/yanser]
|
4
4
|
* Developers {Homepage}[http://www.uni-trier.de/index.php?id=24140]
|
5
|
-
* {
|
5
|
+
* {YANSER Project Page}[http://yanser.rubyforge.org/]
|
6
6
|
|
7
7
|
== DESCRIPTION
|
8
8
|
|
9
|
-
YANSER (Yahoo! ANSwers harvestER) is a convinient search tool providing access
|
9
|
+
YANSER (Yahoo! ANSwers harvestER) is a convinient search tool providing access
|
10
|
+
to the Yahoo! Answers Q&A collection. Based on YANAPI it provides a simple CLI
|
11
|
+
and helps to search for Questions and Answers which contain a set of key words,
|
12
|
+
belong to a specific semantic domain or are posted by a certain user.
|
13
|
+
|
14
|
+
Yanser is a research tool in the field of Computational Linguistics.
|
10
15
|
|
11
16
|
== SYNOPSIS
|
12
17
|
$ yanser --help
|
13
18
|
|
19
|
+
== FEATURE LIST
|
20
|
+
|
21
|
+
== INSTALLATION
|
22
|
+
|
23
|
+
== USAGE
|
24
|
+
|
25
|
+
== CHANGELOG
|
26
|
+
|
27
|
+
See CHANGELOG.
|
28
|
+
|
29
|
+
== CONTRIBUTORS
|
14
30
|
|
15
31
|
|
16
32
|
== LICENSE
|
17
33
|
|
18
34
|
YANSER is a copyrighted software by Andrei Beliankou, 2011.
|
35
|
+
|
19
36
|
You may use, redistribute and change it under the terms
|
20
37
|
provided in the LICENSE file.
|
21
|
-
|
data/bin/yanser
CHANGED
@@ -1,19 +1,36 @@
|
|
1
1
|
# -*- coding: utf-8; mode: ruby -*-
|
2
|
-
|
3
|
-
# это неверное решение, на самом деле я не должен напрямую указывать,
|
4
|
-
# где находятся файлы, но пока пусть будет так
|
5
|
-
# it is required only for .tar.gz distribution
|
6
|
-
#lib_path = File.expand_path('../../lib', __FILE__)
|
7
|
-
#$LOAD_PATH.unshift(lib_path) unless $LOAD_PATH.include?(lib_path)
|
8
|
-
|
9
|
-
# подобный способ является правильным, только так классы Yanser и OptionParser будут независимы
|
10
2
|
require 'yanser/yanser'
|
11
3
|
require 'yanser/opt_parser'
|
4
|
+
require 'yanser/store'
|
12
5
|
|
13
|
-
|
14
|
-
options = Yanser::OptParser.parse(ARGV)
|
15
|
-
|
6
|
+
begin
|
7
|
+
options = Yanser::OptParser.parse(ARGV)
|
8
|
+
rescue
|
9
|
+
raise
|
10
|
+
end
|
16
11
|
|
17
|
-
|
12
|
+
# Handle here exceptions from YANAPI, use messages for it.
|
13
|
+
# Add "See <yanser --help>." to it.
|
14
|
+
begin
|
15
|
+
yanser = Yanser::Yanser.new(options)
|
16
|
+
result = yanser.start
|
17
|
+
rescue YANAPI::UserError
|
18
|
+
# Give up with an error message.
|
19
|
+
# The user must change the input.
|
20
|
+
raise
|
21
|
+
rescue YANAPI::ContentError
|
22
|
+
# Yahoo! says somethig.
|
23
|
+
raise
|
24
|
+
rescue YANAPI::ExternalError
|
25
|
+
# Give up with an error message.
|
26
|
+
# It is a persistent external error.
|
27
|
+
# We tried three times in Yanser, but it seems to go kept.
|
28
|
+
raise
|
29
|
+
end
|
18
30
|
|
19
|
-
|
31
|
+
begin
|
32
|
+
storage = Yanser::Store.new(options)
|
33
|
+
storage.store(result)
|
34
|
+
rescue
|
35
|
+
raise
|
36
|
+
end
|
data/lib/yanser/error.rb
CHANGED
@@ -1,11 +1,12 @@
|
|
1
|
-
# -*-
|
1
|
+
# -*- coding: utf-8 -*-
|
2
2
|
|
3
3
|
module Yanser
|
4
|
-
class
|
4
|
+
class Error < RuntimeError; end
|
5
|
+
class OptParserError < Error
|
5
6
|
# empty for now
|
6
7
|
end
|
7
8
|
|
8
|
-
class YanserError <
|
9
|
+
class YanserError < Error
|
9
10
|
# empty for now
|
10
11
|
end
|
11
12
|
end
|
data/lib/yanser/opt_parser.rb
CHANGED
@@ -7,41 +7,54 @@ require 'yanser/error'
|
|
7
7
|
|
8
8
|
module Yanser
|
9
9
|
class OptParser
|
10
|
-
|
11
|
-
#
|
10
|
+
|
11
|
+
# Different possible values accepted by Yahoo!.
|
12
|
+
REGIONS = ['de', 'us', 'uk', 'ca', 'au', 'in', 'es',
|
13
|
+
'br', 'ar', 'mx', 'e1', 'it', 'fr', 'sg']
|
14
|
+
TYPES = ['all', 'resolved', 'open', 'undecided']
|
15
|
+
SORT_TYPES = ['relevance', 'date_desc', 'date_asc']
|
16
|
+
DATE_RANGES = ['all', '7', '7-30', '30-60', '60-90', 'more90']
|
17
|
+
SEARCH_LOCATIONS = ['all', 'question', 'best_answer']
|
18
|
+
OUTPUT_TYPES = ['xml', 'json', 'php', 'rss']
|
19
|
+
FILTERS = ['question', 'best_answer']
|
20
|
+
|
21
|
+
# Main class method.
|
22
|
+
# OP expects cmd_args to be an array like ARGV.
|
12
23
|
def self.parse(cmd_args)
|
13
24
|
@@options = {}
|
14
25
|
@@options[:query_params] = {}
|
15
26
|
|
16
27
|
parser = create_parser
|
17
28
|
|
18
|
-
#
|
29
|
+
# If no options provided print the help.
|
19
30
|
if cmd_args.empty?
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
# $stderr.printf "You have to provide some options.\n\n"
|
24
|
-
# puts parser
|
25
|
-
# exit 1
|
31
|
+
puts('You have to provide some options.',
|
32
|
+
'Please start with <yanser --help>.')
|
33
|
+
exit(1)
|
26
34
|
end
|
27
35
|
|
28
36
|
# Parse ARGV and provide the options hash.
|
29
37
|
# Check if everything is correct and handle exceptions
|
30
38
|
begin
|
31
|
-
parser.parse
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
39
|
+
parser.parse(cmd_args)
|
40
|
+
rescue OptionParser::InvalidArgument => e
|
41
|
+
arg = e.message.split.last
|
42
|
+
puts "The provided argument #{arg} is currently not supported by Yahoo!"
|
43
|
+
puts 'Please colsult <yanser --help>.'
|
44
|
+
exit(1)
|
45
|
+
rescue OptionParser::InvalidOption => e
|
46
|
+
puts "You have provided an #{e.message}."
|
47
|
+
puts 'Please colsult <yanser --help>.'
|
48
|
+
exit(1)
|
49
|
+
rescue
|
50
|
+
raise
|
38
51
|
end
|
39
52
|
|
40
53
|
# Check to see if we got the required arguments needed.
|
41
|
-
check_required_options(@@options)
|
54
|
+
# check_required_options(@@options)
|
42
55
|
|
43
56
|
# Set the search method.
|
44
|
-
@@options[:
|
57
|
+
@@options[:method] = set_query_type(@@options[:query_params])
|
45
58
|
|
46
59
|
# Set some defaults such as :region and :search_in
|
47
60
|
@@options = set_defaults(@@options)
|
@@ -50,34 +63,23 @@ module Yanser
|
|
50
63
|
end # parse
|
51
64
|
|
52
65
|
private
|
53
|
-
|
54
|
-
# Check if the value of given region is correct.
|
55
|
-
# Now 14 regions are supported by Yahoo! Answers.
|
56
|
-
def self.prove_region(region)
|
57
|
-
regions = ['de', 'us', 'uk', 'ca', 'au', 'in', 'es',
|
58
|
-
'br', 'ar', 'mx', 'e1', 'it', 'fr', 'sg']
|
59
|
-
|
60
|
-
unless regions.include?(region)
|
61
|
-
msg = "The provided search region #{region} is currently not supported by Yahoo!"
|
62
|
-
raise OptParserError, msg
|
63
|
-
end
|
64
|
-
end
|
65
|
-
|
66
|
+
|
66
67
|
# define one of the following query types: TermQuery|CategoryQuery|
|
67
68
|
# QuestionQuery|UserQuery
|
68
69
|
def self.set_query_type(params)
|
69
70
|
case
|
70
71
|
when (params[:category_id] || params[:category_name]) && ! params[:query]
|
71
|
-
'
|
72
|
+
'getByCategory'
|
72
73
|
when params[:query]
|
73
|
-
'
|
74
|
+
'questionSearch'
|
74
75
|
when params[:user_id]
|
75
|
-
'
|
76
|
+
'getByUser'
|
76
77
|
when params[:question_id]
|
77
|
-
'
|
78
|
+
'getQuestion'
|
78
79
|
end
|
79
80
|
end
|
80
|
-
|
81
|
+
|
82
|
+
=begin
|
81
83
|
def self.check_required_options(options)
|
82
84
|
required_opts = [:appid]
|
83
85
|
|
@@ -85,31 +87,19 @@ module Yanser
|
|
85
87
|
if options[:query_params].has_key?(opt)
|
86
88
|
next
|
87
89
|
else
|
88
|
-
|
89
|
-
|
90
|
+
puts "The required option --#{opt} is missing."
|
91
|
+
puts 'Please colsult <yanser --help>.'
|
92
|
+
exit(1)
|
90
93
|
end
|
91
94
|
end
|
92
95
|
end
|
93
|
-
|
94
|
-
|
95
|
-
dir = File.expand_path(dir)
|
96
|
-
#check for existens
|
97
|
-
if File.directory?(dir)
|
98
|
-
if File.writable?(dir)
|
99
|
-
return dir
|
100
|
-
else
|
101
|
-
$stderr.puts 'The directory you have provided is not writable!'
|
102
|
-
exit 1
|
103
|
-
end
|
104
|
-
else
|
105
|
-
FileUtils.mkdir_p(dir)
|
106
|
-
return dir
|
107
|
-
end
|
108
|
-
end # provide_dir
|
96
|
+
=end
|
97
|
+
|
109
98
|
|
110
99
|
def self.set_defaults(opts)
|
111
100
|
# fake method for now
|
112
|
-
|
101
|
+
|
102
|
+
opts
|
113
103
|
end # set_defaults
|
114
104
|
|
115
105
|
def self.create_parser
|
@@ -133,7 +123,9 @@ module Yanser
|
|
133
123
|
opts.separator ' Mandatory search arguments:'
|
134
124
|
|
135
125
|
opts.on('-k', '--key-word KEYWORD',
|
136
|
-
'Provide a single keyword or a boolean expression.'
|
126
|
+
'Provide a single keyword or a boolean expression.',
|
127
|
+
'You might combine this option with an explicit',
|
128
|
+
'<category> name or id to restrict your search.'
|
137
129
|
) do |keyword|
|
138
130
|
@@options[:query_params][:query] = keyword
|
139
131
|
# not a solution!!!
|
@@ -141,10 +133,12 @@ module Yanser
|
|
141
133
|
|
142
134
|
end
|
143
135
|
|
144
|
-
opts.separator ''
|
136
|
+
opts.separator ' and/or'
|
145
137
|
|
146
138
|
opts.on('-c', '--category CATEGORY',
|
147
|
-
'Provide a category name or ID.'
|
139
|
+
'Provide a category name or ID. This is useful',
|
140
|
+
'while searching for categories or in the combination',
|
141
|
+
'with some key words.'
|
148
142
|
) do |category|
|
149
143
|
|
150
144
|
if category =~ /^[[:digit:]]+$/
|
@@ -155,7 +149,7 @@ module Yanser
|
|
155
149
|
|
156
150
|
end
|
157
151
|
|
158
|
-
opts.separator ''
|
152
|
+
opts.separator ' or'
|
159
153
|
|
160
154
|
opts.on('--user-id ID',
|
161
155
|
'Provide an user ID of questions you search for.',
|
@@ -164,7 +158,7 @@ module Yanser
|
|
164
158
|
@@options[:query_params][:user_id] = user_id
|
165
159
|
end
|
166
160
|
|
167
|
-
opts.separator ''
|
161
|
+
opts.separator ' or'
|
168
162
|
|
169
163
|
opts.on('--question-id ID',
|
170
164
|
'Provide a question ID of the question you search for.',
|
@@ -172,32 +166,33 @@ module Yanser
|
|
172
166
|
) do |question_id|
|
173
167
|
@@options[:query_params][:question_id] = question_id
|
174
168
|
end
|
175
|
-
|
169
|
+
|
170
|
+
#################
|
176
171
|
opts.separator ''
|
177
172
|
opts.separator ' Optional search arguments:'
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
'This defaults to en.'
|
184
|
-
) do |region|
|
185
|
-
# todo
|
186
|
-
prove_region(region)
|
187
|
-
@@options[:query_params][:region] = region
|
173
|
+
opts.on('--date-range DATE', DATE_RANGES,
|
174
|
+
'Provide some date range for the search. It defaults to <all>.',
|
175
|
+
"Possible values are: <#{DATE_RANGES.join('>, <')}>."
|
176
|
+
) do |date|
|
177
|
+
@@options[:query_params][:date_range] = date
|
188
178
|
end
|
189
|
-
|
179
|
+
|
190
180
|
opts.separator ''
|
191
|
-
|
192
|
-
|
193
|
-
'
|
194
|
-
|
195
|
-
|
196
|
-
@@options[:output_dir] = provide_dir(output_dir)
|
181
|
+
opts.on('-f', '--output-format FORMAT', OUTPUT_TYPES,
|
182
|
+
"Provide an output format: <#{OUTPUT_TYPES.join('>, <')}>.",
|
183
|
+
'It defaults to <xml>, you may simply ommit this option.'
|
184
|
+
) do |f|
|
185
|
+
@@options[:query_params][:output] = f
|
197
186
|
end
|
198
187
|
|
199
188
|
opts.separator ''
|
189
|
+
opts.on('--filter TYPE', FILTERS,
|
190
|
+
"Possible values are: <#{FILTERS.join('>, <')}>."
|
191
|
+
) do |type|
|
192
|
+
@@options[:query_params][:filter] = type
|
193
|
+
end
|
200
194
|
|
195
|
+
opts.separator ''
|
201
196
|
opts.on('-l', '--limit NUMBER', Integer,
|
202
197
|
'Provide a number of answers you want to get from Yahoo.',
|
203
198
|
'This argument is not mandatory, if you want to get',
|
@@ -206,12 +201,12 @@ module Yanser
|
|
206
201
|
@@options[:limit] = limit
|
207
202
|
end
|
208
203
|
|
209
|
-
opts.separator ''
|
210
|
-
opts.on('-
|
211
|
-
'Provide an output
|
212
|
-
'
|
213
|
-
) do |
|
214
|
-
|
204
|
+
opts.separator ''
|
205
|
+
opts.on('-o', '--output-dir DIR',
|
206
|
+
'Provide an output folder.',
|
207
|
+
'This directory will be created if it does not exist yet.'
|
208
|
+
) do |output_dir|
|
209
|
+
@@options[:output_dir] = output_dir
|
215
210
|
end
|
216
211
|
|
217
212
|
opts.separator ''
|
@@ -222,23 +217,92 @@ module Yanser
|
|
222
217
|
'It can be useful if you want to put many query results',
|
223
218
|
'in the same output folder.'
|
224
219
|
) do |pref|
|
225
|
-
|
220
|
+
@@options[:prefix] = pref
|
226
221
|
end
|
227
222
|
|
228
223
|
opts.separator ''
|
229
|
-
opts.
|
230
|
-
|
231
|
-
'
|
232
|
-
|
233
|
-
|
224
|
+
opts.separator ''
|
225
|
+
opts.on('-r', '--region REGION', REGIONS,
|
226
|
+
'Provide a geographic region to search in for terms.',
|
227
|
+
'Possible values are: de, us, uk, ca, au, in, es, br,',
|
228
|
+
' ar, mx, e1, it, fr, sg.',
|
229
|
+
'This defaults to <en>.'
|
230
|
+
) do |region|
|
231
|
+
|
232
|
+
@@options[:query_params][:region] = region
|
234
233
|
end
|
235
234
|
|
236
235
|
opts.separator ''
|
236
|
+
opts.on('--search-in PLACE', SEARCH_LOCATIONS,
|
237
|
+
"Possible values are: <#{SEARCH_LOCATIONS.join('>, <')}>."
|
238
|
+
) do |place|
|
239
|
+
@@options[:query_params][:search_in] = place
|
240
|
+
end
|
241
|
+
|
242
|
+
opts.separator ''
|
243
|
+
opts.on('--start POSITION',
|
244
|
+
'Provide the position of the question in the search list',
|
245
|
+
'you want start with.'
|
246
|
+
) do |pos|
|
247
|
+
@@options[:query_params][:start] = pos
|
248
|
+
end
|
249
|
+
|
250
|
+
opts.separator ''
|
251
|
+
opts.on('--sort ORDER', SORT_TYPES,
|
252
|
+
"Possible values are: <#{SORT_TYPES.join('>, <')}>."
|
253
|
+
) do |order|
|
254
|
+
@@options[:query_params][:sort] = order
|
255
|
+
end
|
256
|
+
|
257
|
+
opts.separator ''
|
258
|
+
opts.on('--type TYPE', TYPES,
|
259
|
+
"Possible values are: <#{TYPES.join('>, <')}>."
|
260
|
+
) do |type|
|
261
|
+
@@options[:query_params][:type] = type
|
262
|
+
end
|
263
|
+
|
264
|
+
|
265
|
+
#################
|
266
|
+
opts.separator ""
|
267
|
+
opts.separator ' Experimental options. They may not be implemented yet.'
|
237
268
|
opts.on('--exp-file FILE',
|
238
269
|
'Provide a formal description of the experiment.'
|
239
270
|
) do |file_name|
|
240
|
-
|
271
|
+
warn 'Not yet implemented!'
|
272
|
+
end
|
273
|
+
|
274
|
+
opts.separator ''
|
275
|
+
opts.on('--key-word-list FILE',
|
276
|
+
'Provide a file with key words or phrases',
|
277
|
+
'(boolean syntax allowed), one search token per line.'
|
278
|
+
) do |file_name|
|
279
|
+
warn 'Not yet implemented!'
|
280
|
+
end
|
281
|
+
|
282
|
+
opts.separator ''
|
283
|
+
opts.on('--log [LOGFILE]',
|
284
|
+
'SOME DESCRIPTION'
|
285
|
+
) do |file|
|
286
|
+
warn 'Add description!'
|
287
|
+
warn 'Not yet implemented!'
|
288
|
+
end
|
289
|
+
|
290
|
+
opts.separator ''
|
291
|
+
opts.on('--extract',
|
292
|
+
'SOME DESCRIPTION'
|
293
|
+
) do
|
294
|
+
# some non xml representation in a human readable format
|
295
|
+
warn 'Add description!'
|
296
|
+
warn 'Not yet implemented!'
|
241
297
|
end
|
298
|
+
opts.separator ''
|
299
|
+
opts.on('-t', '--time-interval SECONDS', Integer,
|
300
|
+
'SOME DESCRIPTION'
|
301
|
+
) do |t|
|
302
|
+
@@options[:interval] = t
|
303
|
+
warn 'Add description!'
|
304
|
+
end
|
305
|
+
|
242
306
|
opts.separator ""
|
243
307
|
opts.separator "Common options:"
|
244
308
|
|
@@ -261,20 +325,24 @@ __END__
|
|
261
325
|
# List of options planned for implementation.
|
262
326
|
--appid
|
263
327
|
-k, --key-word
|
264
|
-
-c, --category
|
328
|
+
-c, --category # Term & CategorySearch
|
265
329
|
--user-id
|
266
330
|
--question-id # QuestionSearch
|
267
331
|
-r, --region
|
268
332
|
-f, --output-format
|
269
|
-
-l, --limit
|
270
333
|
--date-range
|
271
334
|
--sort
|
335
|
+
--start
|
272
336
|
--filter
|
337
|
+
--callback # bad idea
|
338
|
+
--type
|
339
|
+
--search-in
|
273
340
|
|
274
341
|
|
342
|
+
-l, --limit
|
275
343
|
--prefix #prefix for output files
|
276
344
|
--key-word-list
|
277
|
-
--exp-file
|
345
|
+
--exp-file # experiment description, yaml
|
278
346
|
-o, --output-dir
|
279
347
|
-h, --help
|
280
348
|
-v, --version
|
@@ -306,8 +374,9 @@ instance interface
|
|
306
374
|
:type => "all" | "resolved" | "open" | "undecided", # default 'all'
|
307
375
|
:user_id => '123456'
|
308
376
|
},
|
309
|
-
:
|
377
|
+
:interval => Integer, # in seconds
|
378
|
+
:limit => Integer,
|
379
|
+
:method => 'TermQuery'|'QuestionQuery'|'UserQuery'|'CategoryQuery',
|
310
380
|
:output_dir => 'some path',
|
311
|
-
:prefix => 'some prefix'
|
312
|
-
:limit => Integer
|
381
|
+
:prefix => 'some prefix' # prefix for output files
|
313
382
|
}
|
data/lib/yanser/store.rb
ADDED
@@ -0,0 +1,48 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
|
3
|
+
module Yanser
|
4
|
+
class Store
|
5
|
+
def initialize(params)
|
6
|
+
@params = params
|
7
|
+
end
|
8
|
+
|
9
|
+
def store(result)
|
10
|
+
if dir = @params[:output_dir]
|
11
|
+
provide_dir(dir)
|
12
|
+
save(result)
|
13
|
+
else
|
14
|
+
puts result
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
private
|
19
|
+
|
20
|
+
# rework!
|
21
|
+
def provide_dir(dir)
|
22
|
+
dir = File.expand_path(dir)
|
23
|
+
#check for existens
|
24
|
+
if File.directory?(dir)
|
25
|
+
unless File.writable?(dir)
|
26
|
+
warn 'The directory you have provided is not writable!'
|
27
|
+
exit(1)
|
28
|
+
end
|
29
|
+
else
|
30
|
+
FileUtils.mkdir_p(dir)
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
# save results to a dir
|
35
|
+
# not a good implementation
|
36
|
+
# interface (filename, data)
|
37
|
+
def save(result)
|
38
|
+
filename = File.join(@params[:output_dir],
|
39
|
+
"yanser_output.xml")
|
40
|
+
File.open(filename, 'w') do |file|
|
41
|
+
file.puts result
|
42
|
+
end
|
43
|
+
end
|
44
|
+
end # Store
|
45
|
+
end
|
46
|
+
|
47
|
+
|
48
|
+
|
data/lib/yanser/version.rb
CHANGED
data/lib/yanser/yanser.rb
CHANGED
@@ -1,29 +1,10 @@
|
|
1
|
-
# this solution is not ideal since I force people to use 'rubygems'
|
2
|
-
# possible it would be a better solution to prompt at this point
|
3
|
-
# to install the lib in the way the user is accustomed to
|
4
|
-
=begin
|
5
|
-
begin
|
6
|
-
require 'yanapi'
|
7
|
-
rescue LoadError
|
8
|
-
require 'rubygems'
|
9
|
-
require 'yanapi'
|
10
|
-
end
|
11
|
-
=end
|
12
1
|
require 'yanapi'
|
13
2
|
|
14
3
|
# :title: YANSER, Yahoo! ANSwers harvestER
|
15
|
-
# :main:
|
4
|
+
# :main: README
|
5
|
+
|
16
6
|
# Main processing class.
|
17
|
-
# Yanser encapsulates the main routine and instantiates
|
18
|
-
# all other classes.
|
19
|
-
#--
|
20
|
-
# Yanser takes the users input and validates it.
|
21
|
-
# It decides which search method to choose.
|
22
|
-
# Then it collects all parameters and useful default values,
|
23
|
-
# creates an XyzQuery with the starting point of 0 and gets the first results.
|
24
|
-
# If more results were requested, Yanser creates a similar XyzQuery and gets
|
25
|
-
# the next result set until the result limitation set by the user is met.
|
26
|
-
#
|
7
|
+
# Yanser encapsulates the main routine and instantiates all other classes.
|
27
8
|
module Yanser
|
28
9
|
|
29
10
|
class Yanser
|
@@ -38,98 +19,85 @@ module Yanser
|
|
38
19
|
# Yahoo! Answers returns maximum 50 results.
|
39
20
|
MAX_RESULTS = 50
|
40
21
|
|
41
|
-
def initialize(
|
42
|
-
|
43
|
-
|
44
|
-
# {:query_type=>u|q|c|w, :query_params=>{appid, start, results, query}}
|
45
|
-
# opts come from the OptionParser
|
46
|
-
# they are supposed to be correct, no validation here
|
47
|
-
@options = opts
|
48
|
-
|
49
|
-
@options[:limit] = @options[:limit] || START_LIMIT + MAX_RESULTS
|
22
|
+
def initialize(params)
|
23
|
+
# Libs are not allowed to mutate the input.
|
24
|
+
@params = params.clone
|
50
25
|
|
51
|
-
@
|
26
|
+
@params[:limit] = @params[:limit] || START_LIMIT + MAX_RESULTS
|
52
27
|
|
53
|
-
@
|
28
|
+
@query_params = @params[:query_params]
|
54
29
|
|
30
|
+
@output = @query_params[:output] || 'xml'
|
31
|
+
|
32
|
+
# Yahoo counts results beginning with 0.
|
33
|
+
# <:results=50> means all questins from 0 to 49.
|
55
34
|
@query_params[:start] = @query_params[:start] || 0
|
56
35
|
end
|
57
36
|
|
58
|
-
#
|
37
|
+
# It returns the accumulated string or <nil>, if nothing found.
|
59
38
|
def start
|
60
|
-
|
61
|
-
|
62
|
-
query(@query_params)
|
63
|
-
elsif @options[:limit] < MAX_RESULTS
|
64
|
-
@query_params[:results] = @options[:limit]
|
65
|
-
query(@query_params)
|
39
|
+
if @params[:method] == 'getQuestion'
|
40
|
+
response = get_response(@params)
|
66
41
|
else
|
67
|
-
|
42
|
+
response = ''
|
68
43
|
|
69
|
-
while
|
70
|
-
|
71
|
-
@query_params[:start] += MAX_RESULTS
|
44
|
+
while @query_params[:start] < @params[:limit]
|
45
|
+
results_left = @params[:limit] - @query_params[:start]
|
72
46
|
|
73
|
-
results_left
|
74
|
-
if results_left == 0
|
75
|
-
break
|
76
|
-
elsif results_left < MAX_RESULTS
|
47
|
+
if results_left < MAX_RESULTS
|
77
48
|
@query_params[:results] = results_left
|
49
|
+
else
|
50
|
+
@query_params[:results] = MAX_RESULTS
|
51
|
+
end
|
52
|
+
|
53
|
+
r = get_response(@params)
|
54
|
+
if r
|
55
|
+
response << r
|
56
|
+
else
|
57
|
+
if response.empty?
|
58
|
+
response = nil
|
59
|
+
end
|
78
60
|
break
|
79
61
|
end
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
62
|
+
@query_params[:start] += @query_params[:results]
|
63
|
+
sleep(QUERY_INTERVAL)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
response
|
84
68
|
end # start
|
85
69
|
|
86
70
|
private
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
71
|
+
# It returns a result string or <nil>.
|
72
|
+
# It tries three times in case of YANAPI::ExternalError,
|
73
|
+
# it gives up if an YANAPI::ContentError occures (returns <nil>).
|
74
|
+
def get_response(params)
|
75
|
+
query = create_query(params)
|
76
|
+
t = 0
|
91
77
|
begin
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
return false # do not iterate futher
|
99
|
-
rescue => e # some errors to retry
|
100
|
-
if (tries < 4)
|
101
|
-
sleep(QUERY_INTERVAL**tries)
|
78
|
+
t += 1
|
79
|
+
response = query.get
|
80
|
+
rescue YANAPI::ExternalError => e
|
81
|
+
warn "External error: #{e}!"
|
82
|
+
if t <= 3
|
83
|
+
sleep(QUERY_INTERVAL**t)
|
102
84
|
retry
|
103
85
|
else
|
104
|
-
|
105
|
-
|
86
|
+
raise
|
87
|
+
end
|
106
88
|
end
|
89
|
+
|
90
|
+
response
|
107
91
|
end
|
108
|
-
return true # we may iterate futher
|
109
|
-
end
|
110
92
|
|
93
|
+
# It creates a new api object or raises an exception.
|
94
|
+
# YANAPI::UserError should not be rescued, it indicates that the user
|
95
|
+
# must correct the input hash.
|
111
96
|
def create_query(params)
|
112
|
-
|
113
|
-
end
|
114
|
-
|
115
|
-
def output(result)
|
116
|
-
if @options[:output_dir]
|
117
|
-
save(result)
|
118
|
-
else
|
119
|
-
puts result
|
120
|
-
end
|
121
|
-
end
|
122
|
-
|
123
|
-
# save results to a dir
|
124
|
-
# this dir exists since has been proved by OptionParser
|
125
|
-
# not a good implementation
|
126
|
-
# interface (filename, data)
|
127
|
-
def save(result)
|
128
|
-
filename = File.join(@options[:output_dir], "#{@query_params[:start]}.#{@output_type}")
|
129
|
-
file = File.new(filename, 'w')
|
130
|
-
file.puts result
|
131
|
-
file.close
|
97
|
+
YANAPI::API.new(params)
|
132
98
|
end
|
133
99
|
|
134
100
|
end # class Yanser
|
135
101
|
end # module Yanser
|
102
|
+
|
103
|
+
__END__
|
data/test/test_opt_parser.rb
CHANGED
@@ -1,138 +1,264 @@
|
|
1
1
|
# -*- coding: utf-8 -*-
|
2
2
|
require 'test/unit'
|
3
3
|
require 'yanser/opt_parser'
|
4
|
+
require 'stringio' # for helper methods
|
5
|
+
|
4
6
|
include Yanser
|
7
|
+
|
5
8
|
class TestOptionParser < Test::Unit::TestCase
|
6
|
-
def setup
|
7
|
-
@input = ['--appid', 'YahooDemo', '-r',
|
8
|
-
'de', '-l', '10', '-o', '/tmp',
|
9
|
-
'-k' 'Haus AND grün']
|
10
|
-
@output = {
|
11
|
-
:query_params => {
|
12
|
-
:appid => 'YahooDemo',
|
13
|
-
:query => 'Haus AND grün',
|
14
|
-
:region => 'de',
|
15
|
-
:search_in => 'question'
|
16
|
-
},
|
17
|
-
:query_type => 'TermQuery',
|
18
|
-
:output_dir => '/tmp',
|
19
|
-
:limit => 10
|
20
|
-
}
|
21
9
|
|
10
|
+
def setup
|
22
11
|
@cmd_args = ['--appid', 'YahooDemo']
|
23
|
-
@minimal_input = ['--appid', 'YahooDemo',
|
24
|
-
|
12
|
+
@minimal_input = ['--appid', 'YahooDemo', '-k', 'Haus']
|
13
|
+
@valid_opts = ['--appid', 'MyID',
|
14
|
+
'--category', '12345',
|
15
|
+
'--date-range', 'all',
|
16
|
+
'--exp-file', 'exp_file.yml',
|
17
|
+
'--filter', 'question',
|
18
|
+
'--help',
|
19
|
+
'--key-word', 'Haus',
|
20
|
+
'--key-word-list', 'some_file.txt',
|
21
|
+
'--limit', '1',
|
22
|
+
'--output-dir', '/tmp',
|
23
|
+
'--output-format', 'xml',
|
24
|
+
'--prefix', 'file1',
|
25
|
+
'--question-id', '12345',
|
26
|
+
'--region', 'de',
|
27
|
+
'--search-in', 'question',
|
28
|
+
'--sort', 'relevance',
|
29
|
+
'--start', '10',
|
30
|
+
'--time-interval', '10',
|
31
|
+
'--type', 'resolved',
|
32
|
+
'--user-id', '12345',
|
33
|
+
'--version'
|
34
|
+
]
|
25
35
|
end
|
26
36
|
|
27
37
|
def teardown
|
28
38
|
end
|
29
39
|
|
30
|
-
#
|
40
|
+
# It should have a public class method 'parse'.
|
31
41
|
def test_public_methods
|
32
42
|
assert_respond_to(Yanser::OptParser, :parse)
|
33
43
|
end
|
34
44
|
|
35
|
-
#
|
45
|
+
# It should return a non empty parameter hash.
|
36
46
|
def test_parse_method
|
37
|
-
return_value = Yanser::OptParser.parse(@
|
38
|
-
assert(return_value.instance_of?
|
39
|
-
|
47
|
+
return_value = Yanser::OptParser.parse(@minimal_input)
|
48
|
+
assert(return_value.instance_of?(Hash))
|
49
|
+
assert_equal(false, return_value.empty?)
|
40
50
|
end
|
41
51
|
|
42
|
-
#
|
43
|
-
def
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
assert_equal(@output, Yanser::OptParser.parse(cmd_args))
|
52
|
+
# It should reject the empty input and exit.
|
53
|
+
def test_empty_input
|
54
|
+
out, err = intercept_output do
|
55
|
+
assert_raises(SystemExit) { OptParser.parse([]) }
|
56
|
+
end
|
57
|
+
assert_match(/You have to provide some options./, out)
|
49
58
|
end
|
50
59
|
|
60
|
+
# It should accept correct options.
|
61
|
+
# Invalid options is the matter of OptionParser itself,
|
62
|
+
# do not test it here.
|
63
|
+
# We test only, that OP exits and does not raise an exception.
|
64
|
+
def test_accept_correct_options
|
65
|
+
# this options we should treat separately
|
66
|
+
@valid_opts.delete('--help')
|
67
|
+
@valid_opts.delete('--version')
|
68
|
+
assert_nothing_raised { OptParser.parse(@valid_opts) }
|
69
|
+
|
70
|
+
stdout, stderr = intercept_output do
|
71
|
+
assert_raises(SystemExit) { OptParser.parse(['--invalid-option']) }
|
72
|
+
end
|
73
|
+
|
74
|
+
assert_match(/You have provided an invalid option:/, stdout)
|
75
|
+
end
|
76
|
+
|
77
|
+
# It should successfully exit with some options.
|
78
|
+
def test_successful_exit
|
79
|
+
quietly do
|
80
|
+
success_args = ['-h', '--help', '-v', '--version']
|
81
|
+
success_args.each do |arg|
|
82
|
+
assert_raises(SystemExit) { OptParser.parse(arg.split) }
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
=begin
|
88
|
+
# It is done in YANAPI.
|
89
|
+
# It should require mandatory cmd arguments.
|
90
|
+
def test_mandatory_args
|
91
|
+
flunk('Rewrite!')
|
92
|
+
args = ['-k', 'Haus']
|
93
|
+
assert_raises(OptParserError) {OptParser.parse args}
|
94
|
+
end
|
95
|
+
=end
|
96
|
+
|
97
|
+
# It should accept only valid date arguments.
|
98
|
+
def test_date_arguments
|
99
|
+
validate(OptParser::DATE_RANGES, '--date-range', :date_range)
|
100
|
+
end
|
101
|
+
|
102
|
+
# It should accept a file and parse it.
|
103
|
+
def test_exp_file
|
104
|
+
flunk('Not implemented! It should accept a file and parse it.')
|
105
|
+
end
|
106
|
+
|
107
|
+
# It should accept only valid filter arguments.
|
108
|
+
def test_filter_arguments
|
109
|
+
validate(OptParser::FILTERS, '--filter', :filter)
|
110
|
+
end
|
111
|
+
|
112
|
+
# It should accept only numeric parameters for limits.
|
113
|
+
def test_limit_arguments
|
114
|
+
flunk('Not implemented! It should accept only numeric paramters for limits.')
|
115
|
+
end
|
51
116
|
|
52
|
-
#
|
53
|
-
def
|
117
|
+
# It should accept only valid output formats.
|
118
|
+
def test_output_arguments
|
119
|
+
validate(OptParser::OUTPUT_TYPES, '--output-format', :output)
|
120
|
+
end
|
121
|
+
|
122
|
+
# It should accept only valid regions.
|
123
|
+
def test_region_arguments
|
124
|
+
validate(OptParser::REGIONS, '-r', :region)
|
125
|
+
end
|
126
|
+
|
127
|
+
# It should accept only valid search locations.
|
128
|
+
def test_search_locations
|
129
|
+
validate(OptParser::SEARCH_LOCATIONS, '--search-in', :search_in)
|
130
|
+
end
|
131
|
+
|
132
|
+
# It should accept only valid sort types.
|
133
|
+
def test_sort_types
|
134
|
+
validate(OptParser::SORT_TYPES, '--sort', :sort)
|
135
|
+
end
|
136
|
+
|
137
|
+
# It should accept only valid types.
|
138
|
+
def test_types
|
139
|
+
validate(OptParser::TYPES, '--type', :type)
|
140
|
+
end
|
141
|
+
|
142
|
+
# It should set up the right search method.
|
143
|
+
def test_correct_setting_of_the_method
|
54
144
|
|
55
145
|
cmd_args = ['-k', 'Haus'] | @cmd_args
|
56
|
-
assert_equal('
|
146
|
+
assert_equal('questionSearch', OptParser.parse(cmd_args)[:method])
|
57
147
|
|
58
148
|
cmd_args = ['-k', 'Haus', '-c', '09876543'] | @cmd_args
|
59
|
-
assert_equal('
|
149
|
+
assert_equal('questionSearch', OptParser.parse(cmd_args)[:method])
|
60
150
|
|
61
151
|
cmd_args = ['-c', '09876543'] | @cmd_args
|
62
|
-
assert_equal('
|
152
|
+
assert_equal('getByCategory', OptParser.parse(cmd_args)[:method])
|
63
153
|
|
64
154
|
cmd_args = ['-c', 'Ausgehen'] | @cmd_args
|
65
|
-
assert_equal('
|
155
|
+
assert_equal('getByCategory', OptParser.parse(cmd_args)[:method])
|
66
156
|
|
67
157
|
cmd_args = ['--user-id', '12345'] | @cmd_args
|
68
|
-
assert_equal('
|
158
|
+
assert_equal('getByUser', OptParser.parse(cmd_args)[:method])
|
69
159
|
|
70
160
|
cmd_args = ['--question-id', '12345'] | @cmd_args
|
71
|
-
assert_equal('
|
161
|
+
assert_equal('getQuestion', OptParser.parse(cmd_args)[:method])
|
72
162
|
end
|
73
163
|
|
74
|
-
#
|
164
|
+
# It should set either :category_id or :category_name.
|
75
165
|
def test_category_identifier
|
76
166
|
args = ['-k', 'Haus', '-c', '123'] | @cmd_args
|
77
167
|
opts = Yanser::OptParser.parse(args)
|
78
168
|
assert_equal('123', opts[:query_params][:category_id])
|
79
|
-
|
169
|
+
assert_equal(false, opts[:query_params].has_key?(:category_name))
|
80
170
|
|
81
171
|
args = ['-k', 'Haus', '-c', 'Haushalt'] | @cmd_args
|
82
172
|
opts = Yanser::OptParser.parse(args)
|
83
173
|
assert_equal('Haushalt', opts[:query_params][:category_name])
|
84
|
-
|
85
|
-
|
174
|
+
assert_equal(false, opts[:query_params].has_key?(:category_id))
|
86
175
|
end
|
87
176
|
|
88
|
-
|
177
|
+
# It should set reasonable defaults.
|
178
|
+
# Yanser may set defaults, Yanapi provides minimal output, i.e. if Yahoo
|
179
|
+
# changes defaults, every user can be affected. That's whe we offer some
|
180
|
+
# reasonable defaults for our experiments.
|
181
|
+
# :region => 'de'
|
182
|
+
# :data_range => 'all'
|
183
|
+
def test_default_values
|
184
|
+
flunk('Not implemented! It should set reasonable defaults.')
|
185
|
+
|
89
186
|
end
|
90
187
|
|
91
|
-
|
188
|
+
# It should return a right complex output.
|
189
|
+
def test_output
|
190
|
+
cmd_args = ['--appid', 'YahooDemo', '-r',
|
191
|
+
'de', '-l', '10', '-o', '/tmp',
|
192
|
+
'-k' 'Haus AND grün'
|
193
|
+
]
|
194
|
+
output = {
|
195
|
+
:query_params => {
|
196
|
+
:appid => 'YahooDemo',
|
197
|
+
:query => 'Haus AND grün',
|
198
|
+
:region => 'de',
|
199
|
+
:search_in => 'question'
|
200
|
+
},
|
201
|
+
:method => 'questionSearch',
|
202
|
+
:output_dir => '/tmp',
|
203
|
+
:limit => 10
|
204
|
+
}
|
205
|
+
assert_equal(output, OptParser.parse(cmd_args))
|
92
206
|
end
|
93
207
|
|
94
|
-
#
|
95
|
-
def
|
96
|
-
|
208
|
+
# It should set correct parameters for logging.
|
209
|
+
def test_log_arguments
|
210
|
+
flunk('Not implemented. It should set correct parameters for logging.')
|
97
211
|
end
|
98
212
|
|
99
|
-
#
|
100
|
-
def
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
213
|
+
# It should set correct parameters for data extraction.
|
214
|
+
def test_extraction_arguments
|
215
|
+
flunk('Not implemented. It should set correct parameters for data extraction.')
|
216
|
+
end
|
217
|
+
################################################################################
|
218
|
+
# It is a helper method, many testable units provide some verbose output
|
219
|
+
# to stderr and/or stdout. It is usefull to suppress any kind of verbosity.
|
220
|
+
def quietly(&b)
|
221
|
+
begin
|
222
|
+
orig_stderr = $stderr.clone
|
223
|
+
orig_stdout = $stdout.clone
|
224
|
+
$stderr.reopen(File.new('/dev/null', 'w'))
|
225
|
+
$stdout.reopen(File.new('/dev/null', 'w'))
|
226
|
+
b.call
|
227
|
+
ensure
|
228
|
+
$stderr.reopen(orig_stderr)
|
229
|
+
$stdout.reopen(orig_stdout)
|
106
230
|
end
|
107
|
-
input = @minimal_input | ['-r', 'abc']
|
108
|
-
assert_raises(Yanser::OptParserError) {Yanser::OptParser.parse(input)}
|
109
231
|
end
|
110
232
|
|
111
|
-
#
|
112
|
-
def
|
233
|
+
# It is a helper method for validation of used parameter values.
|
234
|
+
def validate(params, option, key)
|
235
|
+
params.each do |p|
|
236
|
+
input = @minimal_input | [option, p]
|
237
|
+
params = {}
|
238
|
+
assert_nothing_raised(SystemExit) { params = OptParser.parse(input) }
|
239
|
+
assert_equal(p, params[:query_params][key])
|
240
|
+
end
|
241
|
+
|
242
|
+
input = @minimal_input | [option, 'invalid']
|
243
|
+
stdout, stderr = intercept_output do
|
244
|
+
assert_raises(SystemExit) { OptParser.parse(input) }
|
245
|
+
end
|
246
|
+
assert_match(/The provided .+ is currently not supported by Yahoo!/,
|
247
|
+
stdout)
|
113
248
|
end
|
114
249
|
|
115
|
-
#
|
116
|
-
def
|
117
|
-
|
250
|
+
# It is a helper method for handling stdout and stderr as strings.
|
251
|
+
def intercept_output
|
252
|
+
orig_stdout = $stdout
|
253
|
+
orig_stderr = $stderr
|
254
|
+
$stdout = StringIO.new
|
255
|
+
$stderr = StringIO.new
|
118
256
|
|
119
|
-
|
120
|
-
def test_mandatory_args
|
121
|
-
args = ['-k', 'Haus']
|
122
|
-
assert_raises(OptParserError) {OptParser.parse args}
|
123
|
-
end
|
257
|
+
yield
|
124
258
|
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
$stderr.reopen(File.new('/dev/null', 'w'))
|
130
|
-
$stdout.reopen(File.new('/dev/null', 'w'))
|
131
|
-
success_args = ['-h', '--help', '-v', '--version']
|
132
|
-
success_args.each do |arg|
|
133
|
-
assert_raises(SystemExit) {OptParser.parse(arg.to_a)}
|
134
|
-
end
|
135
|
-
$stderr.reopen(orig_stderr)
|
136
|
-
$stdout.reopen(orig_stdout)
|
259
|
+
return $stdout.string, $stderr.string
|
260
|
+
ensure
|
261
|
+
$stdout = orig_stdout
|
262
|
+
$stderr = orig_stderr
|
137
263
|
end
|
138
264
|
end
|
data/test/test_yanser.rb
CHANGED
@@ -10,6 +10,7 @@ class TestYanser < Test::Unit::TestCase
|
|
10
10
|
def teardown
|
11
11
|
end
|
12
12
|
|
13
|
+
# It should have a public method <start>.
|
13
14
|
def test_public_methods
|
14
15
|
yanser = Yanser::Yanser.new(
|
15
16
|
:query_params => {
|
@@ -26,10 +27,15 @@ class TestYanser < Test::Unit::TestCase
|
|
26
27
|
assert_respond_to(yanser, :start)
|
27
28
|
end
|
28
29
|
|
30
|
+
# It should have a constant <VERSION>.
|
29
31
|
def test_constants
|
30
32
|
assert(Yanser::VERSION.instance_of?(String))
|
31
33
|
end
|
32
|
-
|
34
|
+
|
35
|
+
# It should accept input according to the defined interface.
|
36
|
+
def test_input
|
37
|
+
flunk('Not implemented. It should accept input according to the defined interface.')
|
33
38
|
end
|
39
|
+
|
34
40
|
|
35
41
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: yanser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 27
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
+
- 1
|
8
9
|
- 0
|
9
|
-
|
10
|
-
version: 0.0.3
|
10
|
+
version: 0.1.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Andrei Beliankou
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-08-
|
18
|
+
date: 2011-08-12 00:00:00 Z
|
19
19
|
dependencies:
|
20
20
|
- !ruby/object:Gem::Dependency
|
21
21
|
name: yanapi
|
@@ -23,17 +23,17 @@ dependencies:
|
|
23
23
|
requirement: &id001 !ruby/object:Gem::Requirement
|
24
24
|
none: false
|
25
25
|
requirements:
|
26
|
-
- - "
|
26
|
+
- - ">="
|
27
27
|
- !ruby/object:Gem::Version
|
28
|
-
hash:
|
28
|
+
hash: 17
|
29
29
|
segments:
|
30
30
|
- 0
|
31
|
+
- 3
|
31
32
|
- 1
|
32
|
-
|
33
|
-
version: 0.1.1
|
33
|
+
version: 0.3.1
|
34
34
|
type: :runtime
|
35
35
|
version_requirements: *id001
|
36
|
-
description: YANSER (Yahoo! ANSwers harvestER) is a convinient search tool providing access to the Yahoo! Answers Q&A collection. Based on YANAPI it provides a simple CLI and helps to search for Questions and Answers which contain a set of key words, belong to a specific semantic domain or are posted by a certain user. Yanser is a research tool in the field of Computational Linguistics.
|
36
|
+
description: YANSER (Yahoo! ANSwers harvestER) is a convinient search tool providing access to the Yahoo! Answers Q&A collection. Based on YANAPI it provides a simple CLI and helps to search for Questions and Answers which contain a set of key words, belong to a specific semantic domain or are posted by a certain user. Yanser is a research tool in the field of Computational Linguistics e.g for Question Answering.
|
37
37
|
email: a.belenkow@uni-trier.de
|
38
38
|
executables:
|
39
39
|
- yanser
|
@@ -42,25 +42,27 @@ extensions: []
|
|
42
42
|
extra_rdoc_files:
|
43
43
|
- README
|
44
44
|
- LICENSE
|
45
|
+
- CHANGELOG
|
45
46
|
files:
|
46
|
-
- lib/tester.rb
|
47
47
|
- lib/yanser/error.rb
|
48
|
+
- lib/yanser/store.rb
|
48
49
|
- lib/yanser/yanser.rb
|
49
50
|
- lib/yanser/opt_parser.rb
|
50
51
|
- lib/yanser/version.rb
|
51
52
|
- bin/yanser
|
52
|
-
- LICENSE
|
53
|
-
- Rakefile
|
54
53
|
- README
|
54
|
+
- LICENSE
|
55
|
+
- CHANGELOG
|
55
56
|
- test/test_opt_parser.rb
|
56
57
|
- test/test_yanser.rb
|
58
|
+
- .yardopts
|
57
59
|
homepage: http://www.uni-trier.de/index.php?id=34451
|
58
60
|
licenses: []
|
59
61
|
|
60
62
|
post_install_message:
|
61
63
|
rdoc_options:
|
62
64
|
- -m
|
63
|
-
- README
|
65
|
+
- README
|
64
66
|
require_paths:
|
65
67
|
- lib
|
66
68
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -86,7 +88,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
86
88
|
requirements: []
|
87
89
|
|
88
90
|
rubyforge_project: yanser
|
89
|
-
rubygems_version: 1.7
|
91
|
+
rubygems_version: 1.8.7
|
90
92
|
signing_key:
|
91
93
|
specification_version: 3
|
92
94
|
summary: Yanser is a convinient search tool providing access to the Yahoo! Answers Q/A collection.
|
data/Rakefile
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
# We need rake to user FileLists.
|
2
|
-
require 'rake'
|
3
|
-
# We can require 'rake/clean' to add 'clobber' and 'clean' tasks.
|
4
|
-
require 'rake/clean'
|
5
|
-
|
6
|
-
|
7
|
-
# I am not sure how to use this constant.
|
8
|
-
SRC = FileList['**/*.rb']
|
9
|
-
|
10
|
-
CLOBBER.include('doc', '**/*.html', '**/*.gem')
|
11
|
-
|
12
|
-
# testing
|
13
|
-
require 'rake/testtask'
|
14
|
-
Rake::TestTask.new do |t|
|
15
|
-
t.test_files = FileList.new('test/**/*.rb').to_a
|
16
|
-
# rake starts an other ruby process with a new options set.
|
17
|
-
# ruby --some-option -S rake is not sufficient to propagate
|
18
|
-
# the option "--some-option".
|
19
|
-
t.ruby_opts = ['-rubygems']
|
20
|
-
end
|
21
|
-
|
22
|
-
# Build the gem package
|
23
|
-
load 'yanser.gemspec'
|
24
|
-
require 'rubygems/package_task'
|
25
|
-
Gem::PackageTask.new(GEMSPEC).define
|
26
|
-
|
27
|
-
# Generate documentation
|
28
|
-
require 'rdoc/task'
|
29
|
-
RDoc::Task.new do |rdoc|
|
30
|
-
rdoc.rdoc_files.include('README', 'LICENSE', 'lib/**/*')
|
31
|
-
end
|
32
|
-
|
33
|
-
desc "Open an irb session preloaded with this library."
|
34
|
-
task :console do
|
35
|
-
sh "irb -rubygems -I lib -r yanser/opt_parser -r yanser/yanser"
|
36
|
-
end
|
37
|
-
|
38
|
-
desc 'Run the main executable file of the project.'
|
39
|
-
task :start do
|
40
|
-
sh "ruby -w -rubygems -I lib bin/yanser #{ENV['cmd']}"
|
41
|
-
end
|
data/lib/tester.rb
DELETED
@@ -1,18 +0,0 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
|
3
|
-
require 'yanser'
|
4
|
-
|
5
|
-
params = {
|
6
|
-
:query_method => 'TermQuery',
|
7
|
-
:query_params => {
|
8
|
-
:appid => 'YahooDemo',
|
9
|
-
:query => 'Köln',
|
10
|
-
:region => 'de',
|
11
|
-
:results => 5,
|
12
|
-
:start => 0
|
13
|
-
}
|
14
|
-
}
|
15
|
-
|
16
|
-
y = Yanser.new(params)
|
17
|
-
|
18
|
-
y.start
|