semantic_hacker 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/README.txt +49 -0
- data/Rakefile +18 -0
- data/bin/semantic_hacker +7 -0
- data/data/categories.yaml +2048 -0
- data/data/labels.yaml +1762 -0
- data/lib/semantic_hacker/api.rb +29 -0
- data/lib/semantic_hacker/category.rb +19 -0
- data/lib/semantic_hacker/concept.rb +14 -0
- data/lib/semantic_hacker/setup.rb +32 -0
- data/lib/semantic_hacker/signature.rb +19 -0
- data/lib/semantic_hacker.rb +99 -0
- data/semantic_hacker.gemspec +36 -0
- data/spec/semantic_hacker_spec.rb +6 -0
- data/spec/spec_helper.rb +15 -0
- data/test/fixtures/1760484413.html +339 -0
- data/test/fixtures/category.json +17 -0
- data/test/fixtures/concept.json +27 -0
- data/test/fixtures/signature.json +135 -0
- data/test/semantic_hacker_test.rb +32 -0
- data/test/test_helper.rb +38 -0
- data/version.txt +1 -0
- metadata +100 -0
@@ -0,0 +1,27 @@
|
|
1
|
+
{
|
2
|
+
"about": {
|
3
|
+
"requestId": "EEDC78572D5F839FD115E0250FA00ADE",
|
4
|
+
"docId": "2627917C26F7F284C8364209B0B21354",
|
5
|
+
"systemType": "concept",
|
6
|
+
"configId": "odp_2007_l1_1.7k",
|
7
|
+
"contentType": "text/html",
|
8
|
+
"contentDigest": "C7DBE9045BF3F905EA8E7273EC1A4540",
|
9
|
+
"requestDate": "2010-05-27T19:19:46+00:00",
|
10
|
+
"systemVersion": "2.0",
|
11
|
+
"sourceUri": "http://washingtondc.craigslist.org/nva/apa/1760484413.html"
|
12
|
+
},
|
13
|
+
"conceptExtractor": {"conceptExtractorResponse": {"concepts": [
|
14
|
+
{
|
15
|
+
"weight": "0.042842045",
|
16
|
+
"label": "camden"
|
17
|
+
},
|
18
|
+
{
|
19
|
+
"weight": "0.002559835",
|
20
|
+
"label": "lake"
|
21
|
+
},
|
22
|
+
{
|
23
|
+
"weight": "0.0018040233",
|
24
|
+
"label": "fair"
|
25
|
+
}
|
26
|
+
]}}
|
27
|
+
}
|
@@ -0,0 +1,135 @@
|
|
1
|
+
{
|
2
|
+
"about": {
|
3
|
+
"requestId": "96696110A319CDAE224908941C7AA941",
|
4
|
+
"docId": "2627917C26F7F284C8364209B0B21354",
|
5
|
+
"systemType": "signature",
|
6
|
+
"configId": "odp_2007_l1_1.7k",
|
7
|
+
"contentType": "text/html",
|
8
|
+
"contentDigest": "C7DBE9045BF3F905EA8E7273EC1A4540",
|
9
|
+
"requestDate": "2010-05-27T19:19:26+00:00",
|
10
|
+
"systemVersion": "2.0",
|
11
|
+
"sourceUri": "http://washingtondc.craigslist.org/nva/apa/1760484413.html"
|
12
|
+
},
|
13
|
+
"siggen": {"siggenResponse": {"signature": [
|
14
|
+
{
|
15
|
+
"weight": "0.28168735",
|
16
|
+
"index": "1096"
|
17
|
+
},
|
18
|
+
{
|
19
|
+
"weight": "0.26628992",
|
20
|
+
"index": "1184"
|
21
|
+
},
|
22
|
+
{
|
23
|
+
"weight": "0.2640163",
|
24
|
+
"index": "1572"
|
25
|
+
},
|
26
|
+
{
|
27
|
+
"weight": "0.26159292",
|
28
|
+
"index": "1098"
|
29
|
+
},
|
30
|
+
{
|
31
|
+
"weight": "0.21498927",
|
32
|
+
"index": "1177"
|
33
|
+
},
|
34
|
+
{
|
35
|
+
"weight": "0.21138062",
|
36
|
+
"index": "1130"
|
37
|
+
},
|
38
|
+
{
|
39
|
+
"weight": "0.21044247",
|
40
|
+
"index": "1182"
|
41
|
+
},
|
42
|
+
{
|
43
|
+
"weight": "0.19739439",
|
44
|
+
"index": "1129"
|
45
|
+
},
|
46
|
+
{
|
47
|
+
"weight": "0.19653957",
|
48
|
+
"index": "1135"
|
49
|
+
},
|
50
|
+
{
|
51
|
+
"weight": "0.18718655",
|
52
|
+
"index": "1216"
|
53
|
+
},
|
54
|
+
{
|
55
|
+
"weight": "0.18387607",
|
56
|
+
"index": "254"
|
57
|
+
},
|
58
|
+
{
|
59
|
+
"weight": "0.1741769",
|
60
|
+
"index": "305"
|
61
|
+
},
|
62
|
+
{
|
63
|
+
"weight": "0.16953631",
|
64
|
+
"index": "658"
|
65
|
+
},
|
66
|
+
{
|
67
|
+
"weight": "0.16824634",
|
68
|
+
"index": "1491"
|
69
|
+
},
|
70
|
+
{
|
71
|
+
"weight": "0.15728694",
|
72
|
+
"index": "1045"
|
73
|
+
},
|
74
|
+
{
|
75
|
+
"weight": "0.15663064",
|
76
|
+
"index": "909"
|
77
|
+
},
|
78
|
+
{
|
79
|
+
"weight": "0.15609612",
|
80
|
+
"index": "854"
|
81
|
+
},
|
82
|
+
{
|
83
|
+
"weight": "0.15229575",
|
84
|
+
"index": "932"
|
85
|
+
},
|
86
|
+
{
|
87
|
+
"weight": "0.15147352",
|
88
|
+
"index": "1475"
|
89
|
+
},
|
90
|
+
{
|
91
|
+
"weight": "0.14810044",
|
92
|
+
"index": "1199"
|
93
|
+
},
|
94
|
+
{
|
95
|
+
"weight": "0.14544998",
|
96
|
+
"index": "1188"
|
97
|
+
},
|
98
|
+
{
|
99
|
+
"weight": "0.14537416",
|
100
|
+
"index": "1036"
|
101
|
+
},
|
102
|
+
{
|
103
|
+
"weight": "0.14495832",
|
104
|
+
"index": "1086"
|
105
|
+
},
|
106
|
+
{
|
107
|
+
"weight": "0.14386787",
|
108
|
+
"index": "1133"
|
109
|
+
},
|
110
|
+
{
|
111
|
+
"weight": "0.14327037",
|
112
|
+
"index": "1301"
|
113
|
+
},
|
114
|
+
{
|
115
|
+
"weight": "0.14155875",
|
116
|
+
"index": "1230"
|
117
|
+
},
|
118
|
+
{
|
119
|
+
"weight": "0.14054182",
|
120
|
+
"index": "890"
|
121
|
+
},
|
122
|
+
{
|
123
|
+
"weight": "0.13867372",
|
124
|
+
"index": "1197"
|
125
|
+
},
|
126
|
+
{
|
127
|
+
"weight": "0.13715525",
|
128
|
+
"index": "1069"
|
129
|
+
},
|
130
|
+
{
|
131
|
+
"weight": "0.13608803",
|
132
|
+
"index": "1062"
|
133
|
+
}
|
134
|
+
]}}
|
135
|
+
}
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require 'test_helper'
|
2
|
+
|
3
|
+
class SemanticHackerTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
should "have category method for category content" do
|
6
|
+
stub_get('/category?uri=http%3A%2F%2Fwashingtondc.craigslist.org%2Fnva%2Fapa%2F1760484413.html&format=json', 'category.json')
|
7
|
+
cat = SemanticHacker::Category.uri("http://washingtondc.craigslist.org/nva/apa/1760484413.html")
|
8
|
+
cat[:categories].size.should == 1
|
9
|
+
cat[:about].system_type.should == "category"
|
10
|
+
cat[:about].config_id.should == "odp_2007_l1_1.7k"
|
11
|
+
cat[:about].source_uri.should == "http://washingtondc.craigslist.org/nva/apa/1760484413.html"
|
12
|
+
end
|
13
|
+
|
14
|
+
should "have concept method for concept content" do
|
15
|
+
stub_get('/concept?uri=http%3A%2F%2Fwashingtondc.craigslist.org%2Fnva%2Fapa%2F1760484413.html&format=json', 'concept.json')
|
16
|
+
con = SemanticHacker::Concept.uri("http://washingtondc.craigslist.org/nva/apa/1760484413.html")
|
17
|
+
con[:concepts].size.should == 3
|
18
|
+
con[:about].system_type.should == "concept"
|
19
|
+
con[:about].config_id.should == "odp_2007_l1_1.7k"
|
20
|
+
con[:about].source_uri.should == "http://washingtondc.craigslist.org/nva/apa/1760484413.html"
|
21
|
+
end
|
22
|
+
|
23
|
+
should "have signature method for signature content" do
|
24
|
+
stub_get('/signature?uri=http%3A%2F%2Fwashingtondc.craigslist.org%2Fnva%2Fapa%2F1760484413.html&format=json', 'signature.json')
|
25
|
+
sig = SemanticHacker::Signature.uri("http://washingtondc.craigslist.org/nva/apa/1760484413.html")
|
26
|
+
sig[:signature].size.should == 30
|
27
|
+
sig[:about].system_type.should == "signature"
|
28
|
+
sig[:about].config_id.should == "odp_2007_l1_1.7k"
|
29
|
+
sig[:about].source_uri.should == "http://washingtondc.craigslist.org/nva/apa/1760484413.html"
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'pathname'
|
3
|
+
require 'rubygems'
|
4
|
+
|
5
|
+
gem 'shoulda', '>= 2.10.1'
|
6
|
+
gem 'jnunemaker-matchy', '0.4.0'
|
7
|
+
gem 'mocha', '0.9.4'
|
8
|
+
gem 'fakeweb', '>= 1.2.5'
|
9
|
+
|
10
|
+
require 'shoulda'
|
11
|
+
require 'matchy'
|
12
|
+
require 'mocha'
|
13
|
+
require 'fakeweb'
|
14
|
+
|
15
|
+
FakeWeb.allow_net_connect = false
|
16
|
+
|
17
|
+
dir = (Pathname(__FILE__).dirname + '../lib').expand_path
|
18
|
+
|
19
|
+
require dir + 'semantic_hacker'
|
20
|
+
|
21
|
+
class Test::Unit::TestCase
|
22
|
+
end
|
23
|
+
|
24
|
+
def fixture_file(filename)
|
25
|
+
return '' if filename == ''
|
26
|
+
file_path = File.expand_path(File.dirname(__FILE__) + '/fixtures/' + filename)
|
27
|
+
File.read(file_path)
|
28
|
+
end
|
29
|
+
|
30
|
+
def semantichacker_url(url)
|
31
|
+
url =~ /^http/ ? url : "http://api.semantichacker.com/55u2g9gs#{url}"
|
32
|
+
end
|
33
|
+
|
34
|
+
def stub_get(url, filename, status=nil)
|
35
|
+
options = {:body => fixture_file(filename)}
|
36
|
+
options.merge!({:status => status}) unless status.nil?
|
37
|
+
FakeWeb.register_uri(:get, semantichacker_url(url), options)
|
38
|
+
end
|
data/version.txt
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.1
|
metadata
ADDED
@@ -0,0 +1,100 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: semantic_hacker
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease: false
|
5
|
+
segments:
|
6
|
+
- 0
|
7
|
+
- 0
|
8
|
+
- 1
|
9
|
+
version: 0.0.1
|
10
|
+
platform: ruby
|
11
|
+
authors:
|
12
|
+
- Thomas Gallaway
|
13
|
+
autorequire:
|
14
|
+
bindir: bin
|
15
|
+
cert_chain: []
|
16
|
+
|
17
|
+
date: 2010-05-27 00:00:00 -04:00
|
18
|
+
default_executable: semantic_hacker
|
19
|
+
dependencies:
|
20
|
+
- !ruby/object:Gem::Dependency
|
21
|
+
name: bones
|
22
|
+
prerelease: false
|
23
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
24
|
+
requirements:
|
25
|
+
- - ">="
|
26
|
+
- !ruby/object:Gem::Version
|
27
|
+
segments:
|
28
|
+
- 3
|
29
|
+
- 4
|
30
|
+
- 1
|
31
|
+
version: 3.4.1
|
32
|
+
type: :development
|
33
|
+
version_requirements: *id001
|
34
|
+
description: "Semantic Signatures\xC2\xAE are a new way of representing and analyzing semantic information (meaning) in text. Semantic Signatures, produced by TextWise\xE2\x80\x99s Trainable Semantic Vectors (TSV) technology, provide a rich semantic representation of the multiple concepts and topics contained in a body of text. Semantic Signatures can be constructed for a wide range of texts including individual words, phrases, word lists (e.g. metadata), short passages (such as text advertisements or image labels), web pages, or full text documents (e.g. technical articles)."
|
35
|
+
email: atomist@atomlab.us
|
36
|
+
executables:
|
37
|
+
- semantic_hacker
|
38
|
+
extensions: []
|
39
|
+
|
40
|
+
extra_rdoc_files:
|
41
|
+
- History.txt
|
42
|
+
- README.txt
|
43
|
+
- bin/semantic_hacker
|
44
|
+
- version.txt
|
45
|
+
files:
|
46
|
+
- History.txt
|
47
|
+
- README.txt
|
48
|
+
- Rakefile
|
49
|
+
- bin/semantic_hacker
|
50
|
+
- data/categories.yaml
|
51
|
+
- data/labels.yaml
|
52
|
+
- lib/semantic_hacker.rb
|
53
|
+
- lib/semantic_hacker/api.rb
|
54
|
+
- lib/semantic_hacker/category.rb
|
55
|
+
- lib/semantic_hacker/concept.rb
|
56
|
+
- lib/semantic_hacker/setup.rb
|
57
|
+
- lib/semantic_hacker/signature.rb
|
58
|
+
- semantic_hacker.gemspec
|
59
|
+
- spec/semantic_hacker_spec.rb
|
60
|
+
- spec/spec_helper.rb
|
61
|
+
- test/fixtures/1760484413.html
|
62
|
+
- test/fixtures/category.json
|
63
|
+
- test/fixtures/concept.json
|
64
|
+
- test/fixtures/signature.json
|
65
|
+
- test/semantic_hacker_test.rb
|
66
|
+
- test/test_helper.rb
|
67
|
+
- version.txt
|
68
|
+
has_rdoc: true
|
69
|
+
homepage: http://www.atomlab.us/semantic_hacker
|
70
|
+
licenses: []
|
71
|
+
|
72
|
+
post_install_message:
|
73
|
+
rdoc_options:
|
74
|
+
- --main
|
75
|
+
- README.txt
|
76
|
+
require_paths:
|
77
|
+
- lib
|
78
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
segments:
|
83
|
+
- 0
|
84
|
+
version: "0"
|
85
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
segments:
|
90
|
+
- 0
|
91
|
+
version: "0"
|
92
|
+
requirements: []
|
93
|
+
|
94
|
+
rubyforge_project: semantic_hacker
|
95
|
+
rubygems_version: 1.3.6
|
96
|
+
signing_key:
|
97
|
+
specification_version: 3
|
98
|
+
summary: "Semantic Signatures\xC2\xAE are a new way of representing and analyzing semantic information (meaning) in text"
|
99
|
+
test_files:
|
100
|
+
- test/test_helper.rb
|