hal4r 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/COPYING +663 -0
- data/ChangeLog +11 -0
- data/README +49 -0
- data/Rakefile +23 -0
- data/bin/hal4r +11 -0
- data/lib/hal4r.rb +157 -0
- data/lib/hal4r/matrix.rb +82 -0
- data/lib/hal4r/vector.rb +51 -0
- data/lib/hal4r/version.rb +27 -0
- data/spec/hal4r/matrix_spec.rb +27 -0
- data/spec/hal4r/vector_spec.rb +15 -0
- data/spec/hal4r_spec.rb +282 -0
- data/spec/spec_helper.rb +3 -0
- metadata +135 -0
data/ChangeLog
ADDED
data/README
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
= hal4r - Hyperspace analogue to language for Ruby
|
2
|
+
|
3
|
+
== VERSION
|
4
|
+
|
5
|
+
This documentation refers to hal4r version 0.0.1.
|
6
|
+
|
7
|
+
|
8
|
+
== DESCRIPTION
|
9
|
+
|
10
|
+
HAL[https://en.wikipedia.org/wiki/Hyperspace_Analogue_to_Language] processing
|
11
|
+
for Ruby. Requires {GSL bindings}[https://blackwinter.github.com/rb-gsl] for
|
12
|
+
the heavy lifting.
|
13
|
+
|
14
|
+
|
15
|
+
== LINKS
|
16
|
+
|
17
|
+
Documentation:: https://blackwinter.github.com/hal4r
|
18
|
+
Source code:: https://github.com/blackwinter/hal4r
|
19
|
+
RubyGem:: https://rubygems.org/gems/hal4r
|
20
|
+
Travis CI:: https://travis-ci.org/blackwinter/hal4r
|
21
|
+
|
22
|
+
|
23
|
+
== LITERATURE
|
24
|
+
|
25
|
+
* Lund, K., Burgess, C. & Atchley, R. A. (1995). "Semantic and associative priming in high-dimensional semantic space". <i>Proceedings of the seventeenth annual conference of the Cognitive Science Society</i>, 660–665. ISBN:0-8058-2159-7[https://books.google.de/books?id=CSU_Mj07G7UC].
|
26
|
+
* Lund, K.; Burgess, C. (1996). "Producing high-dimensional semantic spaces from lexical co-occurrence". <i>Behavior Research Methods, Instruments & Computers</i> 28 (2): 203–208. doi:10.3758/bf03204766[https://dx.doi.org/10.3758%2Fbf03204766].
|
27
|
+
|
28
|
+
|
29
|
+
== AUTHORS
|
30
|
+
|
31
|
+
* Jens Wille <mailto:jens.wille@gmail.com>
|
32
|
+
|
33
|
+
|
34
|
+
== LICENSE AND COPYRIGHT
|
35
|
+
|
36
|
+
Copyright (C) 2015 Jens Wille
|
37
|
+
|
38
|
+
hal4r is free software: you can redistribute it and/or modify it
|
39
|
+
under the terms of the GNU Affero General Public License as published by
|
40
|
+
the Free Software Foundation, either version 3 of the License, or (at your
|
41
|
+
option) any later version.
|
42
|
+
|
43
|
+
hal4r is distributed in the hope that it will be useful, but
|
44
|
+
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
45
|
+
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
|
46
|
+
License for more details.
|
47
|
+
|
48
|
+
You should have received a copy of the GNU Affero General Public License
|
49
|
+
along with hal4r. If not, see <http://www.gnu.org/licenses/>.
|
data/Rakefile
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require_relative 'lib/hal4r/version'
|
2
|
+
|
3
|
+
begin
|
4
|
+
require 'hen'
|
5
|
+
|
6
|
+
Hen.lay! {{
|
7
|
+
gem: {
|
8
|
+
name: %q{hal4r},
|
9
|
+
version: Hal4R::VERSION,
|
10
|
+
summary: %q{Hyperspace analogue to language for Ruby.},
|
11
|
+
description: %q{HAL processing for Ruby.},
|
12
|
+
author: %q{Jens Wille},
|
13
|
+
email: %q{jens.wille@gmail.com},
|
14
|
+
license: %q{AGPL-3.0},
|
15
|
+
homepage: :blackwinter,
|
16
|
+
dependencies: %w[rb-gsl],
|
17
|
+
|
18
|
+
required_ruby_version: '>= 1.9.3'
|
19
|
+
}
|
20
|
+
}}
|
21
|
+
rescue LoadError => err
|
22
|
+
warn "Please install the `hen' gem. (#{err})"
|
23
|
+
end
|
data/bin/hal4r
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'hal4r'
|
4
|
+
|
5
|
+
abort "Usage: #{$0} <words...> [<window>]" if ARGV.empty? && $stdin.tty?
|
6
|
+
|
7
|
+
window_size = ARGV.pop.to_i if ARGV.last =~ /\A\d+\z/
|
8
|
+
|
9
|
+
terms = ARGV.empty? ? $stdin.readlines.flat_map(&:split).grep(/\w/) : ARGV
|
10
|
+
|
11
|
+
puts Hal4R.new(terms, window_size)
|
data/lib/hal4r.rb
ADDED
@@ -0,0 +1,157 @@
|
|
1
|
+
#--
|
2
|
+
###############################################################################
|
3
|
+
# #
|
4
|
+
# hal4r -- Hyperspace analogue to language for Ruby #
|
5
|
+
# #
|
6
|
+
# Copyright (C) 2015 Jens Wille #
|
7
|
+
# #
|
8
|
+
# Authors: #
|
9
|
+
# Jens Wille <jens.wille@gmail.com> #
|
10
|
+
# #
|
11
|
+
# hal4r is free software; you can redistribute it and/or modify it under the #
|
12
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
13
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
14
|
+
# any later version. #
|
15
|
+
# #
|
16
|
+
# hal4r is distributed in the hope that it will be useful, but WITHOUT ANY #
|
17
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
18
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
19
|
+
# more details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
22
|
+
# along with hal4r. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
require 'forwardable'
|
28
|
+
require 'nuggets/hash/idmap'
|
29
|
+
|
30
|
+
class Hal4R
|
31
|
+
|
32
|
+
include Enumerable
|
33
|
+
|
34
|
+
extend Forwardable
|
35
|
+
|
36
|
+
DEFAULT_WINDOW_SIZE = 10
|
37
|
+
|
38
|
+
def_delegator :@idmap, :keys, :terms
|
39
|
+
|
40
|
+
def_delegators :@idmap, :size, :empty?
|
41
|
+
|
42
|
+
def initialize(terms = [], window_size = nil)
|
43
|
+
reset(window_size)
|
44
|
+
add(terms)
|
45
|
+
end
|
46
|
+
|
47
|
+
attr_accessor :window_size
|
48
|
+
|
49
|
+
def reset(window_size = window_size())
|
50
|
+
@idmap, @matrix, @window = Hash.idmap(-1), Matrix.new,
|
51
|
+
Array.new(@window_size = window_size || DEFAULT_WINDOW_SIZE)
|
52
|
+
|
53
|
+
self
|
54
|
+
end
|
55
|
+
|
56
|
+
def <<(term)
|
57
|
+
row = @matrix.get(term_index = @idmap[term])
|
58
|
+
|
59
|
+
@window.each_with_index { |index, weight|
|
60
|
+
row[index] += weight + 1 if index
|
61
|
+
}.insert(-1, term_index).shift
|
62
|
+
|
63
|
+
self
|
64
|
+
end
|
65
|
+
|
66
|
+
def add(terms)
|
67
|
+
terms.each { |term| self << term }
|
68
|
+
self
|
69
|
+
end
|
70
|
+
|
71
|
+
def vector(term, norm = false)
|
72
|
+
vector_i(@idmap.fetch(term), norm)
|
73
|
+
end
|
74
|
+
|
75
|
+
alias_method :[], :vector
|
76
|
+
|
77
|
+
def norm(term)
|
78
|
+
vector(term, true)
|
79
|
+
end
|
80
|
+
|
81
|
+
def each_vector(norm = false)
|
82
|
+
return enum_for(:each_vector, norm) unless block_given?
|
83
|
+
|
84
|
+
@idmap.each_value { |index| yield vector_i(index, norm).to_a }
|
85
|
+
|
86
|
+
self
|
87
|
+
end
|
88
|
+
|
89
|
+
def each_norm(&block)
|
90
|
+
each_vector(true, &block)
|
91
|
+
end
|
92
|
+
|
93
|
+
alias_method :each, :each_norm
|
94
|
+
|
95
|
+
def each_distance(norm = true, dimension = 2)
|
96
|
+
return enum_for(:each_distance, norm, dimension) unless block_given?
|
97
|
+
|
98
|
+
terms.combination(2) { |t| yield *t.sort!, minkowski(*t, dimension, norm) }
|
99
|
+
|
100
|
+
self
|
101
|
+
end
|
102
|
+
|
103
|
+
def related(term, num = window_size, dimension = 2)
|
104
|
+
(terms - [term]).sort_by { |t| minkowski(term, t, dimension) }[0, num]
|
105
|
+
end
|
106
|
+
|
107
|
+
def minkowski(term1, term2, dimension, norm = true)
|
108
|
+
[term1, term2].map { |term| vector(term, norm).vector }
|
109
|
+
.inject(:-).abs.to_f.pow(dimension).sum ** 1.fdiv(dimension)
|
110
|
+
end
|
111
|
+
|
112
|
+
alias_method :distance, :minkowski
|
113
|
+
|
114
|
+
def euclidean(term1, term2, norm = true)
|
115
|
+
minkowski(term1, term2, 2, norm)
|
116
|
+
end
|
117
|
+
|
118
|
+
def manhattan(term1, term2, norm = true)
|
119
|
+
minkowski(term1, term2, 1, norm)
|
120
|
+
end
|
121
|
+
|
122
|
+
alias_method :cityblock, :manhattan
|
123
|
+
|
124
|
+
def to_a(norm = true)
|
125
|
+
norm ? each_norm.to_a : each_vector.to_a
|
126
|
+
end
|
127
|
+
|
128
|
+
def to_s
|
129
|
+
cols = [terms.unshift(nil)]
|
130
|
+
|
131
|
+
@matrix.each_col.with_index { |col, index|
|
132
|
+
cols << [@idmap.key(index), *col] unless col.isnull? }
|
133
|
+
|
134
|
+
fmt = cols.map { |col|
|
135
|
+
"%#{col.map { |val| val.to_s.length }.max}s" }.join(' ') << $/
|
136
|
+
|
137
|
+
cols.first.each_index.map { |index|
|
138
|
+
fmt % cols.map { |col| col[index] } }.join
|
139
|
+
end
|
140
|
+
|
141
|
+
def inspect
|
142
|
+
'#<%s:0x%x @window_size=%p, @size=%p>' % [
|
143
|
+
self.class, object_id, window_size, size
|
144
|
+
]
|
145
|
+
end
|
146
|
+
|
147
|
+
private
|
148
|
+
|
149
|
+
def vector_i(index, norm)
|
150
|
+
@matrix.vector(index, size, norm)
|
151
|
+
end
|
152
|
+
|
153
|
+
end
|
154
|
+
|
155
|
+
require_relative 'hal4r/matrix'
|
156
|
+
require_relative 'hal4r/vector'
|
157
|
+
require_relative 'hal4r/version'
|
data/lib/hal4r/matrix.rb
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
#--
|
2
|
+
###############################################################################
|
3
|
+
# #
|
4
|
+
# hal4r -- Hyperspace analogue to language for Ruby #
|
5
|
+
# #
|
6
|
+
# Copyright (C) 2015 Jens Wille #
|
7
|
+
# #
|
8
|
+
# Authors: #
|
9
|
+
# Jens Wille <jens.wille@gmail.com> #
|
10
|
+
# #
|
11
|
+
# hal4r is free software; you can redistribute it and/or modify it under the #
|
12
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
13
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
14
|
+
# any later version. #
|
15
|
+
# #
|
16
|
+
# hal4r is distributed in the hope that it will be useful, but WITHOUT ANY #
|
17
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
18
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
19
|
+
# more details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
22
|
+
# along with hal4r. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
require 'forwardable'
|
28
|
+
require 'gsl'
|
29
|
+
|
30
|
+
class Hal4R
|
31
|
+
|
32
|
+
class Matrix
|
33
|
+
|
34
|
+
include Enumerable
|
35
|
+
|
36
|
+
extend Forwardable
|
37
|
+
|
38
|
+
DEFAULT_STEP = 512
|
39
|
+
|
40
|
+
def_delegator :@matrix, :size1, :size
|
41
|
+
|
42
|
+
def initialize(step = nil)
|
43
|
+
@matrix = matrix(@step = step || DEFAULT_STEP)
|
44
|
+
end
|
45
|
+
|
46
|
+
attr_accessor :step
|
47
|
+
|
48
|
+
def get(index)
|
49
|
+
expand unless index < size
|
50
|
+
@matrix.row(index)
|
51
|
+
end
|
52
|
+
|
53
|
+
def vector(index, size = size(), norm = false)
|
54
|
+
vector = @matrix.subrow(index, 0, size)
|
55
|
+
.concat(@matrix.subcolumn(index, 0, size))
|
56
|
+
|
57
|
+
Vector.new(norm ? vector.to_f.normalize : vector)
|
58
|
+
end
|
59
|
+
|
60
|
+
def each_col(&block)
|
61
|
+
block ? @matrix.each_col(&block) : enum_for(:each_col)
|
62
|
+
end
|
63
|
+
|
64
|
+
def inspect
|
65
|
+
'#<%s:0x%x @step=%p, @size=%p>' % [
|
66
|
+
self.class, object_id, step, size
|
67
|
+
]
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
def expand(new_size = size + step)
|
73
|
+
@matrix = matrix(new_size).set(range = 0 .. size - 1, range, @matrix)
|
74
|
+
end
|
75
|
+
|
76
|
+
def matrix(size)
|
77
|
+
GSL::Matrix::Int.zeros(size)
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
data/lib/hal4r/vector.rb
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
#--
|
2
|
+
###############################################################################
|
3
|
+
# #
|
4
|
+
# hal4r -- Hyperspace analogue to language for Ruby #
|
5
|
+
# #
|
6
|
+
# Copyright (C) 2015 Jens Wille #
|
7
|
+
# #
|
8
|
+
# Authors: #
|
9
|
+
# Jens Wille <jens.wille@gmail.com> #
|
10
|
+
# #
|
11
|
+
# hal4r is free software; you can redistribute it and/or modify it under the #
|
12
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
13
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
14
|
+
# any later version. #
|
15
|
+
# #
|
16
|
+
# hal4r is distributed in the hope that it will be useful, but WITHOUT ANY #
|
17
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
18
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
19
|
+
# more details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
22
|
+
# along with hal4r. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
class Hal4R
|
28
|
+
|
29
|
+
class Vector
|
30
|
+
|
31
|
+
include Enumerable
|
32
|
+
|
33
|
+
def initialize(vector)
|
34
|
+
@vector = vector
|
35
|
+
end
|
36
|
+
|
37
|
+
attr_reader :vector
|
38
|
+
|
39
|
+
def each(&block)
|
40
|
+
block ? vector.each(&block) : enum_for(:each)
|
41
|
+
end
|
42
|
+
|
43
|
+
def inspect
|
44
|
+
'#<%s:0x%x @vector=%p>' % [
|
45
|
+
self.class, object_id, vector
|
46
|
+
]
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
class Hal4R
|
2
|
+
|
3
|
+
module Version
|
4
|
+
|
5
|
+
MAJOR = 0
|
6
|
+
MINOR = 0
|
7
|
+
TINY = 1
|
8
|
+
|
9
|
+
class << self
|
10
|
+
|
11
|
+
# Returns array representation.
|
12
|
+
def to_a
|
13
|
+
[MAJOR, MINOR, TINY]
|
14
|
+
end
|
15
|
+
|
16
|
+
# Short-cut for version string.
|
17
|
+
def to_s
|
18
|
+
to_a.join('.')
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
VERSION = Version.to_s
|
26
|
+
|
27
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
describe Hal4R::Matrix do
|
2
|
+
|
3
|
+
step = 5
|
4
|
+
|
5
|
+
subject { described_class.new(step) }
|
6
|
+
|
7
|
+
example do
|
8
|
+
expect(subject.get(0)).to eq(0)
|
9
|
+
expect(subject.vector(0)).to be_an_instance_of(Hal4R::Vector)
|
10
|
+
expect{subject.vector(step)}.to raise_error(GSL::ERROR::EINVAL, /row index is out of range/)
|
11
|
+
end
|
12
|
+
|
13
|
+
example do
|
14
|
+
expect(subject.get(step)).to eq(0)
|
15
|
+
expect(subject.vector(0)).to be_an_instance_of(Hal4R::Vector)
|
16
|
+
expect(subject.vector(step)).to be_an_instance_of(Hal4R::Vector)
|
17
|
+
end
|
18
|
+
|
19
|
+
example do
|
20
|
+
expect(subject.each_col).to be_an_instance_of(Enumerator)
|
21
|
+
end
|
22
|
+
|
23
|
+
example do
|
24
|
+
expect{|b|subject.each_col(&b)}.to yield_successive_args(*[GSL::Vector::Int::Col::View] * subject.size)
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|