semaphore_classification 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +5 -0
- data/VERSION +1 -1
- data/lib/semaphore_classification/client.rb +31 -1
- data/lib/semaphore_classification/connection.rb +7 -1
- metadata +4 -4
data/README.rdoc
CHANGED
@@ -14,6 +14,11 @@ To classify documents:
|
|
14
14
|
|
15
15
|
Mostly likely you will specify a :document_uri when classifying documents, but if you do not you will need to specify an :alternate_body
|
16
16
|
|
17
|
+
== Semaphore::Client.decode_term_id(<term_id>) Options
|
18
|
+
|
19
|
+
This method is used when you have the ID of a term and would like to retrieve
|
20
|
+
its name from the Search Enhancement Server.
|
21
|
+
|
17
22
|
== Semaphore::Client.classify() Options
|
18
23
|
|
19
24
|
=== :document_uri (optional)
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
@@ -15,27 +15,57 @@ module Semaphore
|
|
15
15
|
:threshold => 48, :language => LANGUAGES[:english_marathon_stemmer], :generated_keys => true, :min_avg_article_page_size => 1.0,
|
16
16
|
:character_cutoff => 500000, :document_score_limit => 0, :article_mode => :single
|
17
17
|
}
|
18
|
+
|
19
|
+
TERM_QUERY = 'select.exe?TBDB=disp_taxonomy&TEMPLATE=service.xml&SERVICE=browse&ID='
|
20
|
+
|
18
21
|
@@connection = nil
|
22
|
+
@@decode_term_ids = nil
|
19
23
|
|
20
24
|
class << self
|
21
25
|
|
22
26
|
def set_realm(realm, proxy=nil)
|
23
27
|
@@connection = Connection.new(realm, proxy)
|
24
28
|
end
|
29
|
+
|
30
|
+
def decode_term_ids=(value)
|
31
|
+
raise RealmNotSpecified if @@connection.nil?
|
32
|
+
@@decode_term_ids = value
|
33
|
+
@@connection.decode_term_ids = @@decode_term_ids
|
34
|
+
end
|
35
|
+
|
36
|
+
def decode_term_ids
|
37
|
+
@@decode_term_ids
|
38
|
+
end
|
25
39
|
|
26
40
|
def classify(*args)
|
27
41
|
options = extract_options!(args)
|
28
|
-
raise InsufficientArgs if options[:alternate_body].
|
42
|
+
raise InsufficientArgs if options[:alternate_body].nil? && options[:document_uri].nil?
|
29
43
|
|
30
44
|
result = post @@default_options.merge(options)
|
31
45
|
end
|
32
46
|
|
47
|
+
def decode_term_id(term_id)
|
48
|
+
raise RealmNotSpecified if @@connection.nil?
|
49
|
+
begin
|
50
|
+
raw_host = decode_host(@@connection.realm)
|
51
|
+
path = [raw_host, 'cgi-bin', "#{TERM_QUERY}#{term_id}"].join('/')
|
52
|
+
term_doc = Nokogiri::XML.parse(open(path))
|
53
|
+
rescue
|
54
|
+
raise SemaphoreError
|
55
|
+
end
|
56
|
+
term_doc.xpath('//BROWSE_TERM/TERM/NAME').inner_text
|
57
|
+
end
|
58
|
+
|
33
59
|
private
|
34
60
|
|
35
61
|
def post(data)
|
36
62
|
raise RealmNotSpecified if @@connection.nil?
|
37
63
|
@@connection.post data
|
38
64
|
end
|
65
|
+
|
66
|
+
def decode_host(realm)
|
67
|
+
realm.split('/').delete_if {|i| i == 'index.html' }.join('/').gsub(':5058', '')
|
68
|
+
end
|
39
69
|
|
40
70
|
def extract_options!(args)
|
41
71
|
if args.last.is_a?(Hash)
|
@@ -3,9 +3,11 @@ module Semaphore
|
|
3
3
|
class Connection
|
4
4
|
|
5
5
|
attr_reader :realm
|
6
|
+
attr_accessor :decode_term_ids
|
6
7
|
|
7
8
|
def initialize(realm, proxy=nil)
|
8
9
|
@realm = realm
|
10
|
+
@decode_term_ids = nil
|
9
11
|
@proxy = proxy
|
10
12
|
end
|
11
13
|
|
@@ -68,7 +70,11 @@ module Semaphore
|
|
68
70
|
begin
|
69
71
|
doc = Nokogiri::XML.parse(response)
|
70
72
|
doc.xpath('//META').each do |node|
|
71
|
-
|
73
|
+
if node['name'] == "Generic_ID"
|
74
|
+
term = { :term_id => node['value'].to_i, :score => node['score'].to_f }
|
75
|
+
term[:term] = Client.decode_term_id(term[:term_id]) if @decode_term_ids
|
76
|
+
data << term
|
77
|
+
end
|
72
78
|
end
|
73
79
|
rescue
|
74
80
|
raise DecodeError, "content: <#{response.body}>"
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: semaphore_classification
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
- 1
|
9
8
|
- 2
|
10
|
-
|
9
|
+
- 0
|
10
|
+
version: 0.2.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Mauricio Gomes
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-
|
18
|
+
date: 2010-10-12 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|