yanser 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/test/test_yanser.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  require 'test/unit'
3
- require 'yanser'
3
+ require 'yanser/yanser'
4
+ require 'yanser/version'
4
5
 
5
6
  class TestYanser < Test::Unit::TestCase
6
7
  def setup
@@ -10,7 +11,7 @@ class TestYanser < Test::Unit::TestCase
10
11
  end
11
12
 
12
13
  def test_public_methods
13
- yanser = Yanser.new(
14
+ yanser = Yanser::Yanser.new(
14
15
  :query_params => {
15
16
  :appid => 'YahooDemo',
16
17
  :query => 'Haus AND grün',
@@ -24,7 +25,10 @@ class TestYanser < Test::Unit::TestCase
24
25
 
25
26
  assert_respond_to(yanser, :start)
26
27
  end
27
-
28
+
29
+ def test_constants
30
+ assert(Yanser::VERSION.instance_of?(String))
31
+ end
28
32
  def test_mandatory_options
29
33
  end
30
34
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yanser
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
5
- prerelease: false
4
+ hash: 27
5
+ prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 1
10
- version: 0.0.1
9
+ - 2
10
+ version: 0.0.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - Andrei Beliankou
@@ -15,8 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-04-06 00:00:00 +02:00
19
- default_executable:
18
+ date: 2011-07-08 00:00:00 Z
20
19
  dependencies:
21
20
  - !ruby/object:Gem::Dependency
22
21
  name: yanapi
@@ -41,28 +40,20 @@ executables:
41
40
  extensions: []
42
41
 
43
42
  extra_rdoc_files:
44
- - README.rdoc
43
+ - README
45
44
  - LICENSE
46
45
  files:
47
46
  - lib/tester.rb
48
- - lib/yanser.rb
49
- - lib/option_parser.rb
50
- - lib/version.rb
47
+ - lib/yanser/error.rb
48
+ - lib/yanser/yanser.rb
49
+ - lib/yanser/opt_parser.rb
50
+ - lib/yanser/version.rb
51
51
  - bin/yanser
52
- - README.rdoc
53
52
  - LICENSE
54
53
  - Rakefile
55
54
  - README
56
- - test/test_option_parser.rb
57
- - test/yanapi/test_query.rb
58
- - test/yanapi/test_term_query.rb
55
+ - test/test_opt_parser.rb
59
56
  - test/test_yanser.rb
60
- - test/data/bad_xml.txt
61
- - test/data/empty_result.txt
62
- - test/data/error_code.txt
63
- - test/data/response_with_error.txt
64
- - test/data/successfull_response.txt
65
- has_rdoc: true
66
57
  homepage: http://www.uni-trier.de/index.php?id=34451
67
58
  licenses: []
68
59
 
@@ -95,10 +86,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
95
86
  requirements: []
96
87
 
97
88
  rubyforge_project: yanser
98
- rubygems_version: 1.3.7
89
+ rubygems_version: 1.7.2
99
90
  signing_key:
100
91
  specification_version: 3
101
92
  summary: Yanser is a convinient search tool providing access to the Yahoo! Answers Q/A collection.
102
93
  test_files:
103
- - test/test_option_parser.rb
94
+ - test/test_opt_parser.rb
104
95
  - test/test_yanser.rb
data/README.rdoc DELETED
@@ -1,21 +0,0 @@
1
- = YANSER
2
-
3
- * {RubyGems}[http://rubygems.org/gems/yanser]
4
- * Developers {Homepage}[http://www.uni-trier.de/index.php?id=24140]
5
- * {YANAPI Project Page}[http://yanser.rubyforge.org/]
6
-
7
- == DESCRIPTION
8
-
9
- YANSER (Yahoo! ANSwers harvestER) is a convinient search tool providing access to the Yahoo! Answers Q&A collection. Based on YANAPI it provides a simple CLI and helps to search for Questions and Answers which contain a set of key words, belong to a specific semantic domain or are posted by a certain user. Yanser is a research tool in the field of Computational Linguistics.
10
-
11
- == SYNOPSIS
12
- $ yanser --help
13
-
14
-
15
-
16
- == LICENSE
17
-
18
- YANSER is a copyrighted software by Andrei Beliankou, 2011.
19
- You may use, redistribute and change it under the terms
20
- provided in the LICENSE file.
21
-
data/lib/option_parser.rb DELETED
@@ -1,263 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- require 'optparse'
3
- require 'fileutils'
4
-
5
- require 'version'
6
-
7
- class OptionParser
8
- # OP expects cmd_args to be an array like ARGV
9
- # dummy output for temporary usage
10
- def self.parse(cmd_args)
11
- options = {}
12
- options[:query_params] = {}
13
-
14
- parser = OptionParser.new do |opts|
15
- opts.banner = 'Usage: yanser OPTIONS'
16
-
17
- opts.separator ''
18
- opts.separator 'Program specific options:'
19
-
20
- opts.on('--appid APPID',
21
- 'Provide an ApplicationID given by Yahoo,',
22
- 'to test Yanser you can use <YahooDemo> as the APPID,',
23
- 'think in this case on limitations placed by Yahoo.',
24
- 'This option is required!'
25
- ) do |appid|
26
- options[:query_params][:appid] = appid
27
- end
28
-
29
-
30
- opts.separator ''
31
- opts.separator ' Mandatory search arguments:'
32
-
33
- opts.on('-k', '--key-word KEYWORD',
34
- 'Provide a single keyword or a boolean expression.'
35
- ) do |keyword|
36
- options[:query_params][:query] = keyword
37
- # not a solution!!!
38
- options[:query_params][:search_in] = 'question'
39
-
40
- end
41
-
42
- opts.separator ''
43
-
44
- opts.on('-c', '--category CATEGORY',
45
- 'Provide a category name or ID.'
46
- ) do |category|
47
-
48
- if category =~ /^[[:digit:]]+$/
49
- options[:query_params][:category_id] = category
50
- else
51
- options[:query_params][:category_name] = category
52
- end
53
-
54
- end
55
-
56
- opts.separator ''
57
-
58
- opts.on('--user-id ID',
59
- 'Provide an user ID of questions you search for.',
60
- 'This way you can get question by a specific user.'
61
- ) do |user_id|
62
- options[:query_params][:user_id] = user_id
63
- end
64
-
65
- opts.separator ''
66
-
67
- opts.on('--question-id ID',
68
- 'Provide a question ID of the question you search for.',
69
- 'It returns a unique question.'
70
- ) do |question_id|
71
- options[:query_params][:question_id] = question_id
72
- end
73
-
74
- opts.separator ''
75
- opts.separator ' Optional search arguments:'
76
-
77
- opts.on('-r', '--region REGION',
78
- 'Provide a geographic region to search in for terms.',
79
- 'Possible values are: de, us, uk, ca, au, in, es, br,',
80
- ' ar, mx, e1, it, fr, sg.',
81
- 'This defaults to en.'
82
- ) do |region|
83
- # todo
84
- prove_region(region)
85
- options[:query_params][:region] = region
86
- end
87
-
88
- opts.separator ''
89
-
90
- opts.on('-o', '--output-dir DIR',
91
- 'Provide an output folder.',
92
- 'This directory will be created if it does not exist yet.'
93
- ) do |output_dir|
94
- options[:output_dir] = provide_dir(output_dir)
95
- end
96
-
97
- opts.separator ''
98
-
99
- opts.on('-l', '--limit NUMBER', Integer,
100
- 'Provide a number of answers you want to get from Yahoo.',
101
- 'This argument is not mandatory, if you want to get',
102
- 'all answers simply ommit this argument.'
103
- ) do |limit|
104
- options[:limit] = limit
105
- end
106
-
107
- opts.separator ''
108
- opts.on('-f', '--output-format FORMAT',
109
- 'Provide an output format: xml, json, rss, php.',
110
- 'It defaults to xml, and you can simply ommit this option.'
111
- ) do |f|
112
- raise NotImplementedError, 'Only default output format is implemented'
113
- end
114
-
115
- opts.separator ''
116
- opts.on('--prefix PREFIX',
117
- 'Provide a prefix for the output files. By default',
118
- 'the filename begins with the index of the retrieved',
119
- 'question. You can alter this by providing a prefix.',
120
- 'It can be useful if you want to put many query results',
121
- 'in the same output folder.'
122
- ) do |pref|
123
- raise NotImplementedError, 'No prefixes implemeted.'
124
- end
125
-
126
- opts.separator ""
127
- opts.separator "Common options:"
128
-
129
- opts.on_tail('-h', '--help', 'Show the help message.') do
130
- puts opts
131
- exit
132
- end
133
-
134
- opts.on_tail('-v', '--version', 'Show the program version.') do
135
- puts YANSER::VERSION
136
- exit
137
- end
138
- end
139
-
140
- # if no options provided print the help
141
- if cmd_args.empty?
142
- $stderr.printf "You have to provide some options.\n\n"
143
- puts parser
144
- exit 1
145
- end
146
-
147
- # Parse ARGV and provide the options hash.
148
- # Check if everything is correct and handle exceptions
149
- begin
150
- parser.parse!(cmd_args)
151
- # rescue all exceptions from OptionParser
152
- rescue => e
153
- $stderr.printf "#{e.message.capitalize}\n\n"
154
- puts parser
155
- exit 1
156
- end
157
-
158
- # Check to see if we got the required arguments needed.
159
- check_required_options(options)
160
-
161
- # Set the search method.
162
- options[:query_type] = set_query_type(options[:query_params])
163
-
164
- return options
165
- end # parse
166
-
167
- private
168
-
169
- # Check if the value of given region is correct.
170
- # Now 14 regions are supported by Yahoo! Answers.
171
- def self.prove_region(region)
172
- regions = ['de', 'us', 'uk', 'ca', 'au', 'in', 'es',
173
- 'br', 'ar', 'mx', 'e1', 'it', 'fr', 'sg']
174
-
175
- unless regions.include?(region)
176
- $stderr.puts "The provided search region #{region} is currently not supported by Yahoo!"
177
- exit 1
178
- end
179
- end
180
-
181
- # define one of the following query types: TermQuery|CategoryQuery|
182
- # QuestionQuery|UserQuery
183
- def self.set_query_type(params)
184
- case
185
- when (params[:category_id] || params[:category_name]) && ! params[:query]
186
- return 'CategoryQuery'
187
- when params[:query]
188
- return 'TermQuery'
189
- when params[:user_id]
190
- return 'UserQuery'
191
- when params[:question_id]
192
- return 'QuestionQuery'
193
- end
194
- end
195
-
196
- def self.check_required_options(options)
197
- required_opts = [:appid]
198
-
199
- required_opts.each do |opt|
200
- if options[:query_params].has_key?(opt)
201
- next
202
- else
203
- $stderr.puts "A required option --#{opt} is missing."
204
- exit 1
205
- end
206
- end
207
- end
208
-
209
- def self.provide_dir(dir)
210
- dir = File.expand_path(dir)
211
- #check for existens
212
- if File.directory?(dir)
213
- if File.writable?(dir)
214
- return dir
215
- else
216
- $stderr.puts 'The directory you have provided is not writable!'
217
- exit 1
218
- end
219
- else
220
- FileUtils.mkdir_p(dir)
221
- return dir
222
- end
223
- end # provide_dir
224
-
225
- end # OptionParser
226
-
227
- __END__
228
-
229
- -c, --category-id # Term & CategorySearch
230
- -q, --question-id # QuestionSearch
231
-
232
- -t, --time-interval
233
- -f, --output-format
234
- -p, --prefix #prefix for output files
235
-
236
-
237
-
238
-
239
- instance interface
240
-
241
- {
242
- :query_params => {
243
- :appid => 'YahooDemo' | 'SomeStringWithYourID',
244
- :callback => '',
245
- :category_id => '',
246
- :category_name => '',
247
- :date_range => '',
248
- :filter => '',
249
- :output => '',
250
- :query => '',
251
- :question_id => '',
252
- :region => 'de'|'us'|'uk'|'ca'|'au'|'in'|'es'|'br'|'ar'|'mx'|'e1'|'it'|'fr'|'sg', # default 'en'
253
- :results => Integer, # 0..50
254
- :search_in => "all" | "question" | "best_answer", # default 'all'
255
- :sort => 'relevance' | 'date_desc'| 'date_asc', # default 'relevance'
256
- :start => Integer, # <= 1000
257
- :type => "all" | "resolved" | "open" | "undecided", # default 'all'
258
- :user_id => ''
259
- },
260
- :query_type => 'TermQuery'|'QuestionQuery'|'UserQuery'|'CategoryQuery',
261
- :output_dir => 'some path',
262
- :prefix => 'some prefix' # prefix for output files
263
- }
data/lib/version.rb DELETED
@@ -1,3 +0,0 @@
1
- module YANSER
2
- VERSION = '0.0.1'
3
- end
data/lib/yanser.rb DELETED
@@ -1,121 +0,0 @@
1
- require 'yanapi'
2
-
3
- # :title: YANSER, Yahoo! ANSwers harvestER
4
- # :main: Yanser
5
- # Main processing class.
6
- # Yanser encapsulates the main routine and instantiates
7
- # all other classes.
8
- #--
9
- # Yanser takes the users input and validates it.
10
- # It decides which search method to choose.
11
- # Then it collects all parameters and useful default values,
12
- # creates an XyzQuery with the starting point of 0 and gets the first results.
13
- # If more results were requested, Yanser creates a similar XyzQuery and gets
14
- # the next result set until the result limitation set by the user is met.
15
- #
16
- class Yanser
17
-
18
- # Yahoo! Answers returns starting not more than at the 1000st question.
19
- # It makes no sense to step over.
20
- START_LIMIT = 1000
21
-
22
- # We query the web service every two seconds.
23
- QUERY_INTERVAL = 2
24
-
25
- # Yahoo! Answers returns maximum 50 results.
26
- MAX_RESULTS = 50
27
-
28
- def initialize(opts)
29
-
30
- # the minimal output of an OptionParser
31
- # {:query_type=>u|q|c|w, :query_params=>{appid, start, results, query}}
32
- # opts come from the OptionParser
33
- # they are supposed to be correct, no validation here
34
- @options = opts
35
-
36
- @options[:limit] = @options[:limit] || START_LIMIT + MAX_RESULTS
37
-
38
- @query_params = @options[:query_params]
39
-
40
- @output_type = @query_params[:output] || 'xml'
41
-
42
- @query_params[:start] = @query_params[:start] || 0
43
- end
44
-
45
- # TODO: implement some logging
46
- def start
47
-
48
- if @options[:query_type] == 'QuestionQuery'
49
- query(@query_params)
50
- elsif @options[:limit] < MAX_RESULTS
51
- @query_params[:results] = @options[:limit]
52
- query(@query_params)
53
- else
54
- @query_params[:results] = MAX_RESULTS
55
-
56
- while query(@query_params) do
57
- # we get the next start point here
58
- @query_params[:start] += MAX_RESULTS
59
-
60
- results_left = @options[:limit] - @query_params[:start]
61
- if results_left == 0
62
- break
63
- elsif results_left < MAX_RESULTS
64
- @query_params[:results] = results_left
65
- break
66
- end
67
- end # while
68
-
69
- end # if
70
-
71
- end # start
72
-
73
- private
74
-
75
- def query(params)
76
- q = create_query(params)
77
- tries = 0
78
- begin
79
- tries += 1
80
- result = q.get
81
- output(result)
82
- sleep(2)
83
- rescue YANAPI::EmptyResponse => e
84
- $sderr.puts e
85
- return false # do not iterate futher
86
- rescue => e # some errors to retry
87
- if (tries < 4)
88
- sleep(QUERY_INTERVAL**tries)
89
- retry
90
- else
91
- $stderr.puts e
92
- return false # do not iterate futher
93
- end
94
- end
95
- return true # we may iterate futher
96
- end
97
-
98
- def create_query(params)
99
- eval("YANAPI::#{@options[:query_type]}.new(params)")
100
- end
101
-
102
- def output(result)
103
- if @options[:output_dir]
104
- save(result)
105
- else
106
- puts result
107
- end
108
- end
109
-
110
- # save results to a dir
111
- # this dir exists since has been proved by OptionParser
112
- # not a good implementation
113
- # interface (filename, data)
114
- def save(result)
115
- filename = File.join(@options[:output_dir], "#{@query_params[:start]}.#{@output_type}")
116
- file = File.new(filename, 'w')
117
- file.puts result
118
- file.close
119
- end
120
-
121
- end # Yanser