yanser 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/test/test_yanser.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  require 'test/unit'
3
- require 'yanser'
3
+ require 'yanser/yanser'
4
+ require 'yanser/version'
4
5
 
5
6
  class TestYanser < Test::Unit::TestCase
6
7
  def setup
@@ -10,7 +11,7 @@ class TestYanser < Test::Unit::TestCase
10
11
  end
11
12
 
12
13
  def test_public_methods
13
- yanser = Yanser.new(
14
+ yanser = Yanser::Yanser.new(
14
15
  :query_params => {
15
16
  :appid => 'YahooDemo',
16
17
  :query => 'Haus AND grün',
@@ -24,7 +25,10 @@ class TestYanser < Test::Unit::TestCase
24
25
 
25
26
  assert_respond_to(yanser, :start)
26
27
  end
27
-
28
+
29
+ def test_constants
30
+ assert(Yanser::VERSION.instance_of?(String))
31
+ end
28
32
  def test_mandatory_options
29
33
  end
30
34
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: yanser
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
5
- prerelease: false
4
+ hash: 27
5
+ prerelease:
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 1
10
- version: 0.0.1
9
+ - 2
10
+ version: 0.0.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - Andrei Beliankou
@@ -15,8 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-04-06 00:00:00 +02:00
19
- default_executable:
18
+ date: 2011-07-08 00:00:00 Z
20
19
  dependencies:
21
20
  - !ruby/object:Gem::Dependency
22
21
  name: yanapi
@@ -41,28 +40,20 @@ executables:
41
40
  extensions: []
42
41
 
43
42
  extra_rdoc_files:
44
- - README.rdoc
43
+ - README
45
44
  - LICENSE
46
45
  files:
47
46
  - lib/tester.rb
48
- - lib/yanser.rb
49
- - lib/option_parser.rb
50
- - lib/version.rb
47
+ - lib/yanser/error.rb
48
+ - lib/yanser/yanser.rb
49
+ - lib/yanser/opt_parser.rb
50
+ - lib/yanser/version.rb
51
51
  - bin/yanser
52
- - README.rdoc
53
52
  - LICENSE
54
53
  - Rakefile
55
54
  - README
56
- - test/test_option_parser.rb
57
- - test/yanapi/test_query.rb
58
- - test/yanapi/test_term_query.rb
55
+ - test/test_opt_parser.rb
59
56
  - test/test_yanser.rb
60
- - test/data/bad_xml.txt
61
- - test/data/empty_result.txt
62
- - test/data/error_code.txt
63
- - test/data/response_with_error.txt
64
- - test/data/successfull_response.txt
65
- has_rdoc: true
66
57
  homepage: http://www.uni-trier.de/index.php?id=34451
67
58
  licenses: []
68
59
 
@@ -95,10 +86,10 @@ required_rubygems_version: !ruby/object:Gem::Requirement
95
86
  requirements: []
96
87
 
97
88
  rubyforge_project: yanser
98
- rubygems_version: 1.3.7
89
+ rubygems_version: 1.7.2
99
90
  signing_key:
100
91
  specification_version: 3
101
92
  summary: Yanser is a convinient search tool providing access to the Yahoo! Answers Q/A collection.
102
93
  test_files:
103
- - test/test_option_parser.rb
94
+ - test/test_opt_parser.rb
104
95
  - test/test_yanser.rb
data/README.rdoc DELETED
@@ -1,21 +0,0 @@
1
- = YANSER
2
-
3
- * {RubyGems}[http://rubygems.org/gems/yanser]
4
- * Developers {Homepage}[http://www.uni-trier.de/index.php?id=24140]
5
- * {YANAPI Project Page}[http://yanser.rubyforge.org/]
6
-
7
- == DESCRIPTION
8
-
9
- YANSER (Yahoo! ANSwers harvestER) is a convinient search tool providing access to the Yahoo! Answers Q&A collection. Based on YANAPI it provides a simple CLI and helps to search for Questions and Answers which contain a set of key words, belong to a specific semantic domain or are posted by a certain user. Yanser is a research tool in the field of Computational Linguistics.
10
-
11
- == SYNOPSIS
12
- $ yanser --help
13
-
14
-
15
-
16
- == LICENSE
17
-
18
- YANSER is a copyrighted software by Andrei Beliankou, 2011.
19
- You may use, redistribute and change it under the terms
20
- provided in the LICENSE file.
21
-
data/lib/option_parser.rb DELETED
@@ -1,263 +0,0 @@
1
- # -*- coding: utf-8 -*-
2
- require 'optparse'
3
- require 'fileutils'
4
-
5
- require 'version'
6
-
7
- class OptionParser
8
- # OP expects cmd_args to be an array like ARGV
9
- # dummy output for temporary usage
10
- def self.parse(cmd_args)
11
- options = {}
12
- options[:query_params] = {}
13
-
14
- parser = OptionParser.new do |opts|
15
- opts.banner = 'Usage: yanser OPTIONS'
16
-
17
- opts.separator ''
18
- opts.separator 'Program specific options:'
19
-
20
- opts.on('--appid APPID',
21
- 'Provide an ApplicationID given by Yahoo,',
22
- 'to test Yanser you can use <YahooDemo> as the APPID,',
23
- 'think in this case on limitations placed by Yahoo.',
24
- 'This option is required!'
25
- ) do |appid|
26
- options[:query_params][:appid] = appid
27
- end
28
-
29
-
30
- opts.separator ''
31
- opts.separator ' Mandatory search arguments:'
32
-
33
- opts.on('-k', '--key-word KEYWORD',
34
- 'Provide a single keyword or a boolean expression.'
35
- ) do |keyword|
36
- options[:query_params][:query] = keyword
37
- # not a solution!!!
38
- options[:query_params][:search_in] = 'question'
39
-
40
- end
41
-
42
- opts.separator ''
43
-
44
- opts.on('-c', '--category CATEGORY',
45
- 'Provide a category name or ID.'
46
- ) do |category|
47
-
48
- if category =~ /^[[:digit:]]+$/
49
- options[:query_params][:category_id] = category
50
- else
51
- options[:query_params][:category_name] = category
52
- end
53
-
54
- end
55
-
56
- opts.separator ''
57
-
58
- opts.on('--user-id ID',
59
- 'Provide an user ID of questions you search for.',
60
- 'This way you can get question by a specific user.'
61
- ) do |user_id|
62
- options[:query_params][:user_id] = user_id
63
- end
64
-
65
- opts.separator ''
66
-
67
- opts.on('--question-id ID',
68
- 'Provide a question ID of the question you search for.',
69
- 'It returns a unique question.'
70
- ) do |question_id|
71
- options[:query_params][:question_id] = question_id
72
- end
73
-
74
- opts.separator ''
75
- opts.separator ' Optional search arguments:'
76
-
77
- opts.on('-r', '--region REGION',
78
- 'Provide a geographic region to search in for terms.',
79
- 'Possible values are: de, us, uk, ca, au, in, es, br,',
80
- ' ar, mx, e1, it, fr, sg.',
81
- 'This defaults to en.'
82
- ) do |region|
83
- # todo
84
- prove_region(region)
85
- options[:query_params][:region] = region
86
- end
87
-
88
- opts.separator ''
89
-
90
- opts.on('-o', '--output-dir DIR',
91
- 'Provide an output folder.',
92
- 'This directory will be created if it does not exist yet.'
93
- ) do |output_dir|
94
- options[:output_dir] = provide_dir(output_dir)
95
- end
96
-
97
- opts.separator ''
98
-
99
- opts.on('-l', '--limit NUMBER', Integer,
100
- 'Provide a number of answers you want to get from Yahoo.',
101
- 'This argument is not mandatory, if you want to get',
102
- 'all answers simply ommit this argument.'
103
- ) do |limit|
104
- options[:limit] = limit
105
- end
106
-
107
- opts.separator ''
108
- opts.on('-f', '--output-format FORMAT',
109
- 'Provide an output format: xml, json, rss, php.',
110
- 'It defaults to xml, and you can simply ommit this option.'
111
- ) do |f|
112
- raise NotImplementedError, 'Only default output format is implemented'
113
- end
114
-
115
- opts.separator ''
116
- opts.on('--prefix PREFIX',
117
- 'Provide a prefix for the output files. By default',
118
- 'the filename begins with the index of the retrieved',
119
- 'question. You can alter this by providing a prefix.',
120
- 'It can be useful if you want to put many query results',
121
- 'in the same output folder.'
122
- ) do |pref|
123
- raise NotImplementedError, 'No prefixes implemeted.'
124
- end
125
-
126
- opts.separator ""
127
- opts.separator "Common options:"
128
-
129
- opts.on_tail('-h', '--help', 'Show the help message.') do
130
- puts opts
131
- exit
132
- end
133
-
134
- opts.on_tail('-v', '--version', 'Show the program version.') do
135
- puts YANSER::VERSION
136
- exit
137
- end
138
- end
139
-
140
- # if no options provided print the help
141
- if cmd_args.empty?
142
- $stderr.printf "You have to provide some options.\n\n"
143
- puts parser
144
- exit 1
145
- end
146
-
147
- # Parse ARGV and provide the options hash.
148
- # Check if everything is correct and handle exceptions
149
- begin
150
- parser.parse!(cmd_args)
151
- # rescue all exceptions from OptionParser
152
- rescue => e
153
- $stderr.printf "#{e.message.capitalize}\n\n"
154
- puts parser
155
- exit 1
156
- end
157
-
158
- # Check to see if we got the required arguments needed.
159
- check_required_options(options)
160
-
161
- # Set the search method.
162
- options[:query_type] = set_query_type(options[:query_params])
163
-
164
- return options
165
- end # parse
166
-
167
- private
168
-
169
- # Check if the value of given region is correct.
170
- # Now 14 regions are supported by Yahoo! Answers.
171
- def self.prove_region(region)
172
- regions = ['de', 'us', 'uk', 'ca', 'au', 'in', 'es',
173
- 'br', 'ar', 'mx', 'e1', 'it', 'fr', 'sg']
174
-
175
- unless regions.include?(region)
176
- $stderr.puts "The provided search region #{region} is currently not supported by Yahoo!"
177
- exit 1
178
- end
179
- end
180
-
181
- # define one of the following query types: TermQuery|CategoryQuery|
182
- # QuestionQuery|UserQuery
183
- def self.set_query_type(params)
184
- case
185
- when (params[:category_id] || params[:category_name]) && ! params[:query]
186
- return 'CategoryQuery'
187
- when params[:query]
188
- return 'TermQuery'
189
- when params[:user_id]
190
- return 'UserQuery'
191
- when params[:question_id]
192
- return 'QuestionQuery'
193
- end
194
- end
195
-
196
- def self.check_required_options(options)
197
- required_opts = [:appid]
198
-
199
- required_opts.each do |opt|
200
- if options[:query_params].has_key?(opt)
201
- next
202
- else
203
- $stderr.puts "A required option --#{opt} is missing."
204
- exit 1
205
- end
206
- end
207
- end
208
-
209
- def self.provide_dir(dir)
210
- dir = File.expand_path(dir)
211
- #check for existens
212
- if File.directory?(dir)
213
- if File.writable?(dir)
214
- return dir
215
- else
216
- $stderr.puts 'The directory you have provided is not writable!'
217
- exit 1
218
- end
219
- else
220
- FileUtils.mkdir_p(dir)
221
- return dir
222
- end
223
- end # provide_dir
224
-
225
- end # OptionParser
226
-
227
- __END__
228
-
229
- -c, --category-id # Term & CategorySearch
230
- -q, --question-id # QuestionSearch
231
-
232
- -t, --time-interval
233
- -f, --output-format
234
- -p, --prefix #prefix for output files
235
-
236
-
237
-
238
-
239
- instance interface
240
-
241
- {
242
- :query_params => {
243
- :appid => 'YahooDemo' | 'SomeStringWithYourID',
244
- :callback => '',
245
- :category_id => '',
246
- :category_name => '',
247
- :date_range => '',
248
- :filter => '',
249
- :output => '',
250
- :query => '',
251
- :question_id => '',
252
- :region => 'de'|'us'|'uk'|'ca'|'au'|'in'|'es'|'br'|'ar'|'mx'|'e1'|'it'|'fr'|'sg', # default 'en'
253
- :results => Integer, # 0..50
254
- :search_in => "all" | "question" | "best_answer", # default 'all'
255
- :sort => 'relevance' | 'date_desc'| 'date_asc', # default 'relevance'
256
- :start => Integer, # <= 1000
257
- :type => "all" | "resolved" | "open" | "undecided", # default 'all'
258
- :user_id => ''
259
- },
260
- :query_type => 'TermQuery'|'QuestionQuery'|'UserQuery'|'CategoryQuery',
261
- :output_dir => 'some path',
262
- :prefix => 'some prefix' # prefix for output files
263
- }
data/lib/version.rb DELETED
@@ -1,3 +0,0 @@
1
- module YANSER
2
- VERSION = '0.0.1'
3
- end
data/lib/yanser.rb DELETED
@@ -1,121 +0,0 @@
1
- require 'yanapi'
2
-
3
- # :title: YANSER, Yahoo! ANSwers harvestER
4
- # :main: Yanser
5
- # Main processing class.
6
- # Yanser encapsulates the main routine and instantiates
7
- # all other classes.
8
- #--
9
- # Yanser takes the users input and validates it.
10
- # It decides which search method to choose.
11
- # Then it collects all parameters and useful default values,
12
- # creates an XyzQuery with the starting point of 0 and gets the first results.
13
- # If more results were requested, Yanser creates a similar XyzQuery and gets
14
- # the next result set until the result limitation set by the user is met.
15
- #
16
- class Yanser
17
-
18
- # Yahoo! Answers returns starting not more than at the 1000st question.
19
- # It makes no sense to step over.
20
- START_LIMIT = 1000
21
-
22
- # We query the web service every two seconds.
23
- QUERY_INTERVAL = 2
24
-
25
- # Yahoo! Answers returns maximum 50 results.
26
- MAX_RESULTS = 50
27
-
28
- def initialize(opts)
29
-
30
- # the minimal output of an OptionParser
31
- # {:query_type=>u|q|c|w, :query_params=>{appid, start, results, query}}
32
- # opts come from the OptionParser
33
- # they are supposed to be correct, no validation here
34
- @options = opts
35
-
36
- @options[:limit] = @options[:limit] || START_LIMIT + MAX_RESULTS
37
-
38
- @query_params = @options[:query_params]
39
-
40
- @output_type = @query_params[:output] || 'xml'
41
-
42
- @query_params[:start] = @query_params[:start] || 0
43
- end
44
-
45
- # TODO: implement some logging
46
- def start
47
-
48
- if @options[:query_type] == 'QuestionQuery'
49
- query(@query_params)
50
- elsif @options[:limit] < MAX_RESULTS
51
- @query_params[:results] = @options[:limit]
52
- query(@query_params)
53
- else
54
- @query_params[:results] = MAX_RESULTS
55
-
56
- while query(@query_params) do
57
- # we get the next start point here
58
- @query_params[:start] += MAX_RESULTS
59
-
60
- results_left = @options[:limit] - @query_params[:start]
61
- if results_left == 0
62
- break
63
- elsif results_left < MAX_RESULTS
64
- @query_params[:results] = results_left
65
- break
66
- end
67
- end # while
68
-
69
- end # if
70
-
71
- end # start
72
-
73
- private
74
-
75
- def query(params)
76
- q = create_query(params)
77
- tries = 0
78
- begin
79
- tries += 1
80
- result = q.get
81
- output(result)
82
- sleep(2)
83
- rescue YANAPI::EmptyResponse => e
84
- $sderr.puts e
85
- return false # do not iterate futher
86
- rescue => e # some errors to retry
87
- if (tries < 4)
88
- sleep(QUERY_INTERVAL**tries)
89
- retry
90
- else
91
- $stderr.puts e
92
- return false # do not iterate futher
93
- end
94
- end
95
- return true # we may iterate futher
96
- end
97
-
98
- def create_query(params)
99
- eval("YANAPI::#{@options[:query_type]}.new(params)")
100
- end
101
-
102
- def output(result)
103
- if @options[:output_dir]
104
- save(result)
105
- else
106
- puts result
107
- end
108
- end
109
-
110
- # save results to a dir
111
- # this dir exists since has been proved by OptionParser
112
- # not a good implementation
113
- # interface (filename, data)
114
- def save(result)
115
- filename = File.join(@options[:output_dir], "#{@query_params[:start]}.#{@output_type}")
116
- file = File.new(filename, 'w')
117
- file.puts result
118
- file.close
119
- end
120
-
121
- end # Yanser