semaphore_classification 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -14,6 +14,11 @@ To classify documents:
14
14
 
15
15
  Mostly likely you will specify a :document_uri when classifying documents, but if you do not you will need to specify an :alternate_body
16
16
 
17
+ == Semaphore::Client.decode_term_id(<term_id>) Options
18
+
19
+ This method is used when you have the ID of a term and would like to retrieve
20
+ its name from the Search Enhancement Server.
21
+
17
22
  == Semaphore::Client.classify() Options
18
23
 
19
24
  === :document_uri (optional)
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.2
1
+ 0.2.0
@@ -15,27 +15,57 @@ module Semaphore
15
15
  :threshold => 48, :language => LANGUAGES[:english_marathon_stemmer], :generated_keys => true, :min_avg_article_page_size => 1.0,
16
16
  :character_cutoff => 500000, :document_score_limit => 0, :article_mode => :single
17
17
  }
18
+
19
+ TERM_QUERY = 'select.exe?TBDB=disp_taxonomy&TEMPLATE=service.xml&SERVICE=browse&ID='
20
+
18
21
  @@connection = nil
22
+ @@decode_term_ids = nil
19
23
 
20
24
  class << self
21
25
 
22
26
  def set_realm(realm, proxy=nil)
23
27
  @@connection = Connection.new(realm, proxy)
24
28
  end
29
+
30
+ def decode_term_ids=(value)
31
+ raise RealmNotSpecified if @@connection.nil?
32
+ @@decode_term_ids = value
33
+ @@connection.decode_term_ids = @@decode_term_ids
34
+ end
35
+
36
+ def decode_term_ids
37
+ @@decode_term_ids
38
+ end
25
39
 
26
40
  def classify(*args)
27
41
  options = extract_options!(args)
28
- raise InsufficientArgs if options[:alternate_body].empty? && options[:document_uri].empty?
42
+ raise InsufficientArgs if options[:alternate_body].nil? && options[:document_uri].nil?
29
43
 
30
44
  result = post @@default_options.merge(options)
31
45
  end
32
46
 
47
+ def decode_term_id(term_id)
48
+ raise RealmNotSpecified if @@connection.nil?
49
+ begin
50
+ raw_host = decode_host(@@connection.realm)
51
+ path = [raw_host, 'cgi-bin', "#{TERM_QUERY}#{term_id}"].join('/')
52
+ term_doc = Nokogiri::XML.parse(open(path))
53
+ rescue
54
+ raise SemaphoreError
55
+ end
56
+ term_doc.xpath('//BROWSE_TERM/TERM/NAME').inner_text
57
+ end
58
+
33
59
  private
34
60
 
35
61
  def post(data)
36
62
  raise RealmNotSpecified if @@connection.nil?
37
63
  @@connection.post data
38
64
  end
65
+
66
+ def decode_host(realm)
67
+ realm.split('/').delete_if {|i| i == 'index.html' }.join('/').gsub(':5058', '')
68
+ end
39
69
 
40
70
  def extract_options!(args)
41
71
  if args.last.is_a?(Hash)
@@ -3,9 +3,11 @@ module Semaphore
3
3
  class Connection
4
4
 
5
5
  attr_reader :realm
6
+ attr_accessor :decode_term_ids
6
7
 
7
8
  def initialize(realm, proxy=nil)
8
9
  @realm = realm
10
+ @decode_term_ids = nil
9
11
  @proxy = proxy
10
12
  end
11
13
 
@@ -68,7 +70,11 @@ module Semaphore
68
70
  begin
69
71
  doc = Nokogiri::XML.parse(response)
70
72
  doc.xpath('//META').each do |node|
71
- data << { :term => node['value'], :key => node['key'], :score => node['score'] } if node['name'] == "Generic"
73
+ if node['name'] == "Generic_ID"
74
+ term = { :term_id => node['value'].to_i, :score => node['score'].to_f }
75
+ term[:term] = Client.decode_term_id(term[:term_id]) if @decode_term_ids
76
+ data << term
77
+ end
72
78
  end
73
79
  rescue
74
80
  raise DecodeError, "content: <#{response.body}>"
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: semaphore_classification
3
3
  version: !ruby/object:Gem::Version
4
- hash: 31
4
+ hash: 23
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
- - 1
9
8
  - 2
10
- version: 0.1.2
9
+ - 0
10
+ version: 0.2.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Mauricio Gomes
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-08-27 00:00:00 -04:00
18
+ date: 2010-10-12 00:00:00 -04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency