semaphore_classification 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +5 -0
- data/VERSION +1 -1
- data/lib/semaphore_classification/client.rb +31 -1
- data/lib/semaphore_classification/connection.rb +7 -1
- metadata +4 -4
data/README.rdoc
CHANGED
@@ -14,6 +14,11 @@ To classify documents:
|
|
14
14
|
|
15
15
|
Mostly likely you will specify a :document_uri when classifying documents, but if you do not you will need to specify an :alternate_body
|
16
16
|
|
17
|
+
== Semaphore::Client.decode_term_id(<term_id>) Options
|
18
|
+
|
19
|
+
This method is used when you have the ID of a term and would like to retrieve
|
20
|
+
its name from the Search Enhancement Server.
|
21
|
+
|
17
22
|
== Semaphore::Client.classify() Options
|
18
23
|
|
19
24
|
=== :document_uri (optional)
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
@@ -15,27 +15,57 @@ module Semaphore
|
|
15
15
|
:threshold => 48, :language => LANGUAGES[:english_marathon_stemmer], :generated_keys => true, :min_avg_article_page_size => 1.0,
|
16
16
|
:character_cutoff => 500000, :document_score_limit => 0, :article_mode => :single
|
17
17
|
}
|
18
|
+
|
19
|
+
TERM_QUERY = 'select.exe?TBDB=disp_taxonomy&TEMPLATE=service.xml&SERVICE=browse&ID='
|
20
|
+
|
18
21
|
@@connection = nil
|
22
|
+
@@decode_term_ids = nil
|
19
23
|
|
20
24
|
class << self
|
21
25
|
|
22
26
|
def set_realm(realm, proxy=nil)
|
23
27
|
@@connection = Connection.new(realm, proxy)
|
24
28
|
end
|
29
|
+
|
30
|
+
def decode_term_ids=(value)
|
31
|
+
raise RealmNotSpecified if @@connection.nil?
|
32
|
+
@@decode_term_ids = value
|
33
|
+
@@connection.decode_term_ids = @@decode_term_ids
|
34
|
+
end
|
35
|
+
|
36
|
+
def decode_term_ids
|
37
|
+
@@decode_term_ids
|
38
|
+
end
|
25
39
|
|
26
40
|
def classify(*args)
|
27
41
|
options = extract_options!(args)
|
28
|
-
raise InsufficientArgs if options[:alternate_body].
|
42
|
+
raise InsufficientArgs if options[:alternate_body].nil? && options[:document_uri].nil?
|
29
43
|
|
30
44
|
result = post @@default_options.merge(options)
|
31
45
|
end
|
32
46
|
|
47
|
+
def decode_term_id(term_id)
|
48
|
+
raise RealmNotSpecified if @@connection.nil?
|
49
|
+
begin
|
50
|
+
raw_host = decode_host(@@connection.realm)
|
51
|
+
path = [raw_host, 'cgi-bin', "#{TERM_QUERY}#{term_id}"].join('/')
|
52
|
+
term_doc = Nokogiri::XML.parse(open(path))
|
53
|
+
rescue
|
54
|
+
raise SemaphoreError
|
55
|
+
end
|
56
|
+
term_doc.xpath('//BROWSE_TERM/TERM/NAME').inner_text
|
57
|
+
end
|
58
|
+
|
33
59
|
private
|
34
60
|
|
35
61
|
def post(data)
|
36
62
|
raise RealmNotSpecified if @@connection.nil?
|
37
63
|
@@connection.post data
|
38
64
|
end
|
65
|
+
|
66
|
+
def decode_host(realm)
|
67
|
+
realm.split('/').delete_if {|i| i == 'index.html' }.join('/').gsub(':5058', '')
|
68
|
+
end
|
39
69
|
|
40
70
|
def extract_options!(args)
|
41
71
|
if args.last.is_a?(Hash)
|
@@ -3,9 +3,11 @@ module Semaphore
|
|
3
3
|
class Connection
|
4
4
|
|
5
5
|
attr_reader :realm
|
6
|
+
attr_accessor :decode_term_ids
|
6
7
|
|
7
8
|
def initialize(realm, proxy=nil)
|
8
9
|
@realm = realm
|
10
|
+
@decode_term_ids = nil
|
9
11
|
@proxy = proxy
|
10
12
|
end
|
11
13
|
|
@@ -68,7 +70,11 @@ module Semaphore
|
|
68
70
|
begin
|
69
71
|
doc = Nokogiri::XML.parse(response)
|
70
72
|
doc.xpath('//META').each do |node|
|
71
|
-
|
73
|
+
if node['name'] == "Generic_ID"
|
74
|
+
term = { :term_id => node['value'].to_i, :score => node['score'].to_f }
|
75
|
+
term[:term] = Client.decode_term_id(term[:term_id]) if @decode_term_ids
|
76
|
+
data << term
|
77
|
+
end
|
72
78
|
end
|
73
79
|
rescue
|
74
80
|
raise DecodeError, "content: <#{response.body}>"
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: semaphore_classification
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
- 1
|
9
8
|
- 2
|
10
|
-
|
9
|
+
- 0
|
10
|
+
version: 0.2.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Mauricio Gomes
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-
|
18
|
+
date: 2010-10-12 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|