ankusa 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +6 -4
- data/Rakefile +2 -2
- data/docs/classes/Ankusa.html +8 -8
- data/docs/classes/Ankusa/Classifier.html +82 -271
- data/docs/classes/Ankusa/HBaseStorage.html +537 -0
- data/docs/classes/Ankusa/MemoryStorage.html +439 -0
- data/docs/classes/Ankusa/TextHash.html +84 -29
- data/docs/classes/String.html +172 -0
- data/docs/created.rid +1 -1
- data/docs/files/README_rdoc.html +6 -4
- data/docs/files/lib/ankusa/classifier_rb.html +1 -1
- data/docs/files/lib/ankusa/extensions_rb.html +108 -0
- data/docs/files/lib/ankusa/hasher_rb.html +1 -1
- data/docs/files/lib/ankusa/hbase_storage_rb.html +108 -0
- data/docs/files/lib/ankusa/{nbclass_rb.html → memory_storage_rb.html} +4 -4
- data/docs/files/lib/ankusa_rb.html +4 -2
- data/docs/fr_class_index.html +3 -1
- data/docs/fr_file_index.html +3 -1
- data/docs/fr_method_index.html +41 -17
- data/lib/ankusa.rb +3 -1
- data/lib/ankusa/classifier.rb +37 -86
- data/lib/ankusa/extensions.rb +13 -0
- data/lib/ankusa/hasher.rb +24 -10
- data/lib/ankusa/hbase_storage.rb +109 -0
- data/lib/ankusa/memory_storage.rb +61 -0
- metadata +13 -7
- data/docs/classes/Ankusa/NBClass.html +0 -168
- data/lib/ankusa/nbclass.rb +0 -15
@@ -0,0 +1,172 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>Class: String</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href=".././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="classHeader">
|
50
|
+
<table class="header-table">
|
51
|
+
<tr class="top-aligned-row">
|
52
|
+
<td><strong>Class</strong></td>
|
53
|
+
<td class="class-name-in-header">String</td>
|
54
|
+
</tr>
|
55
|
+
<tr class="top-aligned-row">
|
56
|
+
<td><strong>In:</strong></td>
|
57
|
+
<td>
|
58
|
+
<a href="../files/lib/ankusa/extensions_rb.html">
|
59
|
+
lib/ankusa/extensions.rb
|
60
|
+
</a>
|
61
|
+
<br />
|
62
|
+
</td>
|
63
|
+
</tr>
|
64
|
+
|
65
|
+
<tr class="top-aligned-row">
|
66
|
+
<td><strong>Parent:</strong></td>
|
67
|
+
<td>
|
68
|
+
Object
|
69
|
+
</td>
|
70
|
+
</tr>
|
71
|
+
</table>
|
72
|
+
</div>
|
73
|
+
<!-- banner header -->
|
74
|
+
|
75
|
+
<div id="bodyContent">
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
<div id="contextContent">
|
80
|
+
|
81
|
+
|
82
|
+
|
83
|
+
</div>
|
84
|
+
|
85
|
+
<div id="method-list">
|
86
|
+
<h3 class="section-bar">Methods</h3>
|
87
|
+
|
88
|
+
<div class="name-list">
|
89
|
+
<a href="#M000001">numeric?</a>
|
90
|
+
<a href="#M000002">to_ascii</a>
|
91
|
+
</div>
|
92
|
+
</div>
|
93
|
+
|
94
|
+
</div>
|
95
|
+
|
96
|
+
|
97
|
+
<!-- if includes -->
|
98
|
+
|
99
|
+
<div id="section">
|
100
|
+
|
101
|
+
|
102
|
+
|
103
|
+
|
104
|
+
|
105
|
+
|
106
|
+
|
107
|
+
|
108
|
+
<!-- if method_list -->
|
109
|
+
<div id="methods">
|
110
|
+
<h3 class="section-bar">Public Instance methods</h3>
|
111
|
+
|
112
|
+
<div id="method-M000001" class="method-detail">
|
113
|
+
<a name="M000001"></a>
|
114
|
+
|
115
|
+
<div class="method-heading">
|
116
|
+
<a href="#M000001" class="method-signature">
|
117
|
+
<span class="method-name">numeric?</span><span class="method-args">()</span>
|
118
|
+
</a>
|
119
|
+
</div>
|
120
|
+
|
121
|
+
<div class="method-description">
|
122
|
+
<p><a class="source-toggle" href="#"
|
123
|
+
onclick="toggleCode('M000001-source');return false;">[Source]</a></p>
|
124
|
+
<div class="method-source-code" id="M000001-source">
|
125
|
+
<pre>
|
126
|
+
<span class="ruby-comment cmt"># File lib/ankusa/extensions.rb, line 4</span>
|
127
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">numeric?</span>
|
128
|
+
<span class="ruby-keyword kw">true</span> <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Float</span>(<span class="ruby-keyword kw">self</span>) <span class="ruby-keyword kw">rescue</span> <span class="ruby-keyword kw">false</span>
|
129
|
+
<span class="ruby-keyword kw">end</span>
|
130
|
+
</pre>
|
131
|
+
</div>
|
132
|
+
</div>
|
133
|
+
</div>
|
134
|
+
|
135
|
+
<div id="method-M000002" class="method-detail">
|
136
|
+
<a name="M000002"></a>
|
137
|
+
|
138
|
+
<div class="method-heading">
|
139
|
+
<a href="#M000002" class="method-signature">
|
140
|
+
<span class="method-name">to_ascii</span><span class="method-args">()</span>
|
141
|
+
</a>
|
142
|
+
</div>
|
143
|
+
|
144
|
+
<div class="method-description">
|
145
|
+
<p><a class="source-toggle" href="#"
|
146
|
+
onclick="toggleCode('M000002-source');return false;">[Source]</a></p>
|
147
|
+
<div class="method-source-code" id="M000002-source">
|
148
|
+
<pre>
|
149
|
+
<span class="ruby-comment cmt"># File lib/ankusa/extensions.rb, line 8</span>
|
150
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">to_ascii</span>
|
151
|
+
<span class="ruby-comment cmt"># from http://www.jroller.com/obie/tags/unicode</span>
|
152
|
+
<span class="ruby-identifier">converter</span> = <span class="ruby-constant">Iconv</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value str">'ASCII//IGNORE//TRANSLIT'</span>, <span class="ruby-value str">'UTF-8'</span>)
|
153
|
+
<span class="ruby-identifier">converter</span>.<span class="ruby-identifier">iconv</span>(<span class="ruby-keyword kw">self</span>).<span class="ruby-identifier">unpack</span>(<span class="ruby-value str">'U*'</span>).<span class="ruby-identifier">select</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">cp</span><span class="ruby-operator">|</span> <span class="ruby-identifier">cp</span> <span class="ruby-operator"><</span> <span class="ruby-value">127</span> }.<span class="ruby-identifier">pack</span>(<span class="ruby-value str">'U*'</span>)
|
154
|
+
<span class="ruby-keyword kw">end</span>
|
155
|
+
</pre>
|
156
|
+
</div>
|
157
|
+
</div>
|
158
|
+
</div>
|
159
|
+
|
160
|
+
|
161
|
+
</div>
|
162
|
+
|
163
|
+
|
164
|
+
</div>
|
165
|
+
|
166
|
+
|
167
|
+
<div id="validator-badges">
|
168
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
169
|
+
</div>
|
170
|
+
|
171
|
+
</body>
|
172
|
+
</html>
|
data/docs/created.rid
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
Thu, 02 Dec 2010 16:24:45 -0500
|
data/docs/files/README_rdoc.html
CHANGED
@@ -56,7 +56,7 @@
|
|
56
56
|
</tr>
|
57
57
|
<tr class="top-aligned-row">
|
58
58
|
<td><strong>Last Update:</strong></td>
|
59
|
-
<td>
|
59
|
+
<td>Thu Dec 02 16:24:11 -0500 2010</td>
|
60
60
|
</tr>
|
61
61
|
</table>
|
62
62
|
</div>
|
@@ -87,12 +87,11 @@ been started as well. Then:
|
|
87
87
|
<pre>
|
88
88
|
require 'rubygems'
|
89
89
|
require 'ankusa'
|
90
|
-
require 'hbaserb'
|
91
90
|
|
92
91
|
# connect to HBase
|
93
|
-
|
92
|
+
storage = Ankusa::HBaseStorage.new 'localhost'
|
93
|
+
c = Ankusa::Classifier.new storage
|
94
94
|
|
95
|
-
c = Classifier.new client
|
96
95
|
c.train :spam, "This is some spammy text"
|
97
96
|
c.train :good, "This is not the bad stuff"
|
98
97
|
|
@@ -105,6 +104,9 @@ been started as well. Then:
|
|
105
104
|
|
106
105
|
# get a list of all classes
|
107
106
|
puts c.classes
|
107
|
+
|
108
|
+
# close connection
|
109
|
+
storage.close
|
108
110
|
</pre>
|
109
111
|
|
110
112
|
</div>
|
@@ -0,0 +1,108 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>File: extensions.rb</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="fileHeader">
|
50
|
+
<h1>extensions.rb</h1>
|
51
|
+
<table class="header-table">
|
52
|
+
<tr class="top-aligned-row">
|
53
|
+
<td><strong>Path:</strong></td>
|
54
|
+
<td>lib/ankusa/extensions.rb
|
55
|
+
</td>
|
56
|
+
</tr>
|
57
|
+
<tr class="top-aligned-row">
|
58
|
+
<td><strong>Last Update:</strong></td>
|
59
|
+
<td>Wed Dec 01 15:34:42 -0500 2010</td>
|
60
|
+
</tr>
|
61
|
+
</table>
|
62
|
+
</div>
|
63
|
+
<!-- banner header -->
|
64
|
+
|
65
|
+
<div id="bodyContent">
|
66
|
+
|
67
|
+
|
68
|
+
|
69
|
+
<div id="contextContent">
|
70
|
+
|
71
|
+
|
72
|
+
<div id="requires-list">
|
73
|
+
<h3 class="section-bar">Required files</h3>
|
74
|
+
|
75
|
+
<div class="name-list">
|
76
|
+
iconv
|
77
|
+
</div>
|
78
|
+
</div>
|
79
|
+
|
80
|
+
</div>
|
81
|
+
|
82
|
+
|
83
|
+
</div>
|
84
|
+
|
85
|
+
|
86
|
+
<!-- if includes -->
|
87
|
+
|
88
|
+
<div id="section">
|
89
|
+
|
90
|
+
|
91
|
+
|
92
|
+
|
93
|
+
|
94
|
+
|
95
|
+
|
96
|
+
|
97
|
+
<!-- if method_list -->
|
98
|
+
|
99
|
+
|
100
|
+
</div>
|
101
|
+
|
102
|
+
|
103
|
+
<div id="validator-badges">
|
104
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
105
|
+
</div>
|
106
|
+
|
107
|
+
</body>
|
108
|
+
</html>
|
@@ -0,0 +1,108 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>File: hbase_storage.rb</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="fileHeader">
|
50
|
+
<h1>hbase_storage.rb</h1>
|
51
|
+
<table class="header-table">
|
52
|
+
<tr class="top-aligned-row">
|
53
|
+
<td><strong>Path:</strong></td>
|
54
|
+
<td>lib/ankusa/hbase_storage.rb
|
55
|
+
</td>
|
56
|
+
</tr>
|
57
|
+
<tr class="top-aligned-row">
|
58
|
+
<td><strong>Last Update:</strong></td>
|
59
|
+
<td>Thu Dec 02 13:19:40 -0500 2010</td>
|
60
|
+
</tr>
|
61
|
+
</table>
|
62
|
+
</div>
|
63
|
+
<!-- banner header -->
|
64
|
+
|
65
|
+
<div id="bodyContent">
|
66
|
+
|
67
|
+
|
68
|
+
|
69
|
+
<div id="contextContent">
|
70
|
+
|
71
|
+
|
72
|
+
<div id="requires-list">
|
73
|
+
<h3 class="section-bar">Required files</h3>
|
74
|
+
|
75
|
+
<div class="name-list">
|
76
|
+
hbaserb
|
77
|
+
</div>
|
78
|
+
</div>
|
79
|
+
|
80
|
+
</div>
|
81
|
+
|
82
|
+
|
83
|
+
</div>
|
84
|
+
|
85
|
+
|
86
|
+
<!-- if includes -->
|
87
|
+
|
88
|
+
<div id="section">
|
89
|
+
|
90
|
+
|
91
|
+
|
92
|
+
|
93
|
+
|
94
|
+
|
95
|
+
|
96
|
+
|
97
|
+
<!-- if method_list -->
|
98
|
+
|
99
|
+
|
100
|
+
</div>
|
101
|
+
|
102
|
+
|
103
|
+
<div id="validator-badges">
|
104
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
105
|
+
</div>
|
106
|
+
|
107
|
+
</body>
|
108
|
+
</html>
|
@@ -5,7 +5,7 @@
|
|
5
5
|
|
6
6
|
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
7
|
<head>
|
8
|
-
<title>File:
|
8
|
+
<title>File: memory_storage.rb</title>
|
9
9
|
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
10
|
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
11
|
<link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
|
@@ -47,16 +47,16 @@
|
|
47
47
|
|
48
48
|
|
49
49
|
<div id="fileHeader">
|
50
|
-
<h1>
|
50
|
+
<h1>memory_storage.rb</h1>
|
51
51
|
<table class="header-table">
|
52
52
|
<tr class="top-aligned-row">
|
53
53
|
<td><strong>Path:</strong></td>
|
54
|
-
<td>lib/ankusa/
|
54
|
+
<td>lib/ankusa/memory_storage.rb
|
55
55
|
</td>
|
56
56
|
</tr>
|
57
57
|
<tr class="top-aligned-row">
|
58
58
|
<td><strong>Last Update:</strong></td>
|
59
|
-
<td>
|
59
|
+
<td>Thu Dec 02 10:10:26 -0500 2010</td>
|
60
60
|
</tr>
|
61
61
|
</table>
|
62
62
|
</div>
|