semaphore_classification 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -14,6 +14,11 @@ To classify documents:
14
14
 
15
15
  Mostly likely you will specify a :document_uri when classifying documents, but if you do not you will need to specify an :alternate_body
16
16
 
17
+ == Semaphore::Client.decode_term_id(<term_id>) Options
18
+
19
+ This method is used when you have the ID of a term and would like to retrieve
20
+ its name from the Search Enhancement Server.
21
+
17
22
  == Semaphore::Client.classify() Options
18
23
 
19
24
  === :document_uri (optional)
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.2
1
+ 0.2.0
@@ -15,27 +15,57 @@ module Semaphore
15
15
  :threshold => 48, :language => LANGUAGES[:english_marathon_stemmer], :generated_keys => true, :min_avg_article_page_size => 1.0,
16
16
  :character_cutoff => 500000, :document_score_limit => 0, :article_mode => :single
17
17
  }
18
+
19
+ TERM_QUERY = 'select.exe?TBDB=disp_taxonomy&TEMPLATE=service.xml&SERVICE=browse&ID='
20
+
18
21
  @@connection = nil
22
+ @@decode_term_ids = nil
19
23
 
20
24
  class << self
21
25
 
22
26
  def set_realm(realm, proxy=nil)
23
27
  @@connection = Connection.new(realm, proxy)
24
28
  end
29
+
30
+ def decode_term_ids=(value)
31
+ raise RealmNotSpecified if @@connection.nil?
32
+ @@decode_term_ids = value
33
+ @@connection.decode_term_ids = @@decode_term_ids
34
+ end
35
+
36
+ def decode_term_ids
37
+ @@decode_term_ids
38
+ end
25
39
 
26
40
  def classify(*args)
27
41
  options = extract_options!(args)
28
- raise InsufficientArgs if options[:alternate_body].empty? && options[:document_uri].empty?
42
+ raise InsufficientArgs if options[:alternate_body].nil? && options[:document_uri].nil?
29
43
 
30
44
  result = post @@default_options.merge(options)
31
45
  end
32
46
 
47
+ def decode_term_id(term_id)
48
+ raise RealmNotSpecified if @@connection.nil?
49
+ begin
50
+ raw_host = decode_host(@@connection.realm)
51
+ path = [raw_host, 'cgi-bin', "#{TERM_QUERY}#{term_id}"].join('/')
52
+ term_doc = Nokogiri::XML.parse(open(path))
53
+ rescue
54
+ raise SemaphoreError
55
+ end
56
+ term_doc.xpath('//BROWSE_TERM/TERM/NAME').inner_text
57
+ end
58
+
33
59
  private
34
60
 
35
61
  def post(data)
36
62
  raise RealmNotSpecified if @@connection.nil?
37
63
  @@connection.post data
38
64
  end
65
+
66
+ def decode_host(realm)
67
+ realm.split('/').delete_if {|i| i == 'index.html' }.join('/').gsub(':5058', '')
68
+ end
39
69
 
40
70
  def extract_options!(args)
41
71
  if args.last.is_a?(Hash)
@@ -3,9 +3,11 @@ module Semaphore
3
3
  class Connection
4
4
 
5
5
  attr_reader :realm
6
+ attr_accessor :decode_term_ids
6
7
 
7
8
  def initialize(realm, proxy=nil)
8
9
  @realm = realm
10
+ @decode_term_ids = nil
9
11
  @proxy = proxy
10
12
  end
11
13
 
@@ -68,7 +70,11 @@ module Semaphore
68
70
  begin
69
71
  doc = Nokogiri::XML.parse(response)
70
72
  doc.xpath('//META').each do |node|
71
- data << { :term => node['value'], :key => node['key'], :score => node['score'] } if node['name'] == "Generic"
73
+ if node['name'] == "Generic_ID"
74
+ term = { :term_id => node['value'].to_i, :score => node['score'].to_f }
75
+ term[:term] = Client.decode_term_id(term[:term_id]) if @decode_term_ids
76
+ data << term
77
+ end
72
78
  end
73
79
  rescue
74
80
  raise DecodeError, "content: <#{response.body}>"
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: semaphore_classification
3
3
  version: !ruby/object:Gem::Version
4
- hash: 31
4
+ hash: 23
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
- - 1
9
8
  - 2
10
- version: 0.1.2
9
+ - 0
10
+ version: 0.2.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Mauricio Gomes
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-08-27 00:00:00 -04:00
18
+ date: 2010-10-12 00:00:00 -04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency