hal4r 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/COPYING +663 -0
- data/ChangeLog +11 -0
- data/README +49 -0
- data/Rakefile +23 -0
- data/bin/hal4r +11 -0
- data/lib/hal4r.rb +157 -0
- data/lib/hal4r/matrix.rb +82 -0
- data/lib/hal4r/vector.rb +51 -0
- data/lib/hal4r/version.rb +27 -0
- data/spec/hal4r/matrix_spec.rb +27 -0
- data/spec/hal4r/vector_spec.rb +15 -0
- data/spec/hal4r_spec.rb +282 -0
- data/spec/spec_helper.rb +3 -0
- metadata +135 -0
data/ChangeLog
ADDED
data/README
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
= hal4r - Hyperspace analogue to language for Ruby
|
2
|
+
|
3
|
+
== VERSION
|
4
|
+
|
5
|
+
This documentation refers to hal4r version 0.0.1.
|
6
|
+
|
7
|
+
|
8
|
+
== DESCRIPTION
|
9
|
+
|
10
|
+
HAL[https://en.wikipedia.org/wiki/Hyperspace_Analogue_to_Language] processing
|
11
|
+
for Ruby. Requires {GSL bindings}[https://blackwinter.github.com/rb-gsl] for
|
12
|
+
the heavy lifting.
|
13
|
+
|
14
|
+
|
15
|
+
== LINKS
|
16
|
+
|
17
|
+
Documentation:: https://blackwinter.github.com/hal4r
|
18
|
+
Source code:: https://github.com/blackwinter/hal4r
|
19
|
+
RubyGem:: https://rubygems.org/gems/hal4r
|
20
|
+
Travis CI:: https://travis-ci.org/blackwinter/hal4r
|
21
|
+
|
22
|
+
|
23
|
+
== LITERATURE
|
24
|
+
|
25
|
+
* Lund, K., Burgess, C. & Atchley, R. A. (1995). "Semantic and associative priming in high-dimensional semantic space". <i>Proceedings of the seventeenth annual conference of the Cognitive Science Society</i>, 660–665. ISBN:0-8058-2159-7[https://books.google.de/books?id=CSU_Mj07G7UC].
|
26
|
+
* Lund, K.; Burgess, C. (1996). "Producing high-dimensional semantic spaces from lexical co-occurrence". <i>Behavior Research Methods, Instruments & Computers</i> 28 (2): 203–208. doi:10.3758/bf03204766[https://dx.doi.org/10.3758%2Fbf03204766].
|
27
|
+
|
28
|
+
|
29
|
+
== AUTHORS
|
30
|
+
|
31
|
+
* Jens Wille <mailto:jens.wille@gmail.com>
|
32
|
+
|
33
|
+
|
34
|
+
== LICENSE AND COPYRIGHT
|
35
|
+
|
36
|
+
Copyright (C) 2015 Jens Wille
|
37
|
+
|
38
|
+
hal4r is free software: you can redistribute it and/or modify it
|
39
|
+
under the terms of the GNU Affero General Public License as published by
|
40
|
+
the Free Software Foundation, either version 3 of the License, or (at your
|
41
|
+
option) any later version.
|
42
|
+
|
43
|
+
hal4r is distributed in the hope that it will be useful, but
|
44
|
+
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
45
|
+
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
|
46
|
+
License for more details.
|
47
|
+
|
48
|
+
You should have received a copy of the GNU Affero General Public License
|
49
|
+
along with hal4r. If not, see <http://www.gnu.org/licenses/>.
|
data/Rakefile
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require_relative 'lib/hal4r/version'
|
2
|
+
|
3
|
+
begin
|
4
|
+
require 'hen'
|
5
|
+
|
6
|
+
Hen.lay! {{
|
7
|
+
gem: {
|
8
|
+
name: %q{hal4r},
|
9
|
+
version: Hal4R::VERSION,
|
10
|
+
summary: %q{Hyperspace analogue to language for Ruby.},
|
11
|
+
description: %q{HAL processing for Ruby.},
|
12
|
+
author: %q{Jens Wille},
|
13
|
+
email: %q{jens.wille@gmail.com},
|
14
|
+
license: %q{AGPL-3.0},
|
15
|
+
homepage: :blackwinter,
|
16
|
+
dependencies: %w[rb-gsl],
|
17
|
+
|
18
|
+
required_ruby_version: '>= 1.9.3'
|
19
|
+
}
|
20
|
+
}}
|
21
|
+
rescue LoadError => err
|
22
|
+
warn "Please install the `hen' gem. (#{err})"
|
23
|
+
end
|
data/bin/hal4r
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
#! /usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'hal4r'
|
4
|
+
|
5
|
+
abort "Usage: #{$0} <words...> [<window>]" if ARGV.empty? && $stdin.tty?
|
6
|
+
|
7
|
+
window_size = ARGV.pop.to_i if ARGV.last =~ /\A\d+\z/
|
8
|
+
|
9
|
+
terms = ARGV.empty? ? $stdin.readlines.flat_map(&:split).grep(/\w/) : ARGV
|
10
|
+
|
11
|
+
puts Hal4R.new(terms, window_size)
|
data/lib/hal4r.rb
ADDED
@@ -0,0 +1,157 @@
|
|
1
|
+
#--
|
2
|
+
###############################################################################
|
3
|
+
# #
|
4
|
+
# hal4r -- Hyperspace analogue to language for Ruby #
|
5
|
+
# #
|
6
|
+
# Copyright (C) 2015 Jens Wille #
|
7
|
+
# #
|
8
|
+
# Authors: #
|
9
|
+
# Jens Wille <jens.wille@gmail.com> #
|
10
|
+
# #
|
11
|
+
# hal4r is free software; you can redistribute it and/or modify it under the #
|
12
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
13
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
14
|
+
# any later version. #
|
15
|
+
# #
|
16
|
+
# hal4r is distributed in the hope that it will be useful, but WITHOUT ANY #
|
17
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
18
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
19
|
+
# more details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
22
|
+
# along with hal4r. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
require 'forwardable'
|
28
|
+
require 'nuggets/hash/idmap'
|
29
|
+
|
30
|
+
class Hal4R
|
31
|
+
|
32
|
+
include Enumerable
|
33
|
+
|
34
|
+
extend Forwardable
|
35
|
+
|
36
|
+
DEFAULT_WINDOW_SIZE = 10
|
37
|
+
|
38
|
+
def_delegator :@idmap, :keys, :terms
|
39
|
+
|
40
|
+
def_delegators :@idmap, :size, :empty?
|
41
|
+
|
42
|
+
def initialize(terms = [], window_size = nil)
|
43
|
+
reset(window_size)
|
44
|
+
add(terms)
|
45
|
+
end
|
46
|
+
|
47
|
+
attr_accessor :window_size
|
48
|
+
|
49
|
+
def reset(window_size = window_size())
|
50
|
+
@idmap, @matrix, @window = Hash.idmap(-1), Matrix.new,
|
51
|
+
Array.new(@window_size = window_size || DEFAULT_WINDOW_SIZE)
|
52
|
+
|
53
|
+
self
|
54
|
+
end
|
55
|
+
|
56
|
+
def <<(term)
|
57
|
+
row = @matrix.get(term_index = @idmap[term])
|
58
|
+
|
59
|
+
@window.each_with_index { |index, weight|
|
60
|
+
row[index] += weight + 1 if index
|
61
|
+
}.insert(-1, term_index).shift
|
62
|
+
|
63
|
+
self
|
64
|
+
end
|
65
|
+
|
66
|
+
def add(terms)
|
67
|
+
terms.each { |term| self << term }
|
68
|
+
self
|
69
|
+
end
|
70
|
+
|
71
|
+
def vector(term, norm = false)
|
72
|
+
vector_i(@idmap.fetch(term), norm)
|
73
|
+
end
|
74
|
+
|
75
|
+
alias_method :[], :vector
|
76
|
+
|
77
|
+
def norm(term)
|
78
|
+
vector(term, true)
|
79
|
+
end
|
80
|
+
|
81
|
+
def each_vector(norm = false)
|
82
|
+
return enum_for(:each_vector, norm) unless block_given?
|
83
|
+
|
84
|
+
@idmap.each_value { |index| yield vector_i(index, norm).to_a }
|
85
|
+
|
86
|
+
self
|
87
|
+
end
|
88
|
+
|
89
|
+
def each_norm(&block)
|
90
|
+
each_vector(true, &block)
|
91
|
+
end
|
92
|
+
|
93
|
+
alias_method :each, :each_norm
|
94
|
+
|
95
|
+
def each_distance(norm = true, dimension = 2)
|
96
|
+
return enum_for(:each_distance, norm, dimension) unless block_given?
|
97
|
+
|
98
|
+
terms.combination(2) { |t| yield *t.sort!, minkowski(*t, dimension, norm) }
|
99
|
+
|
100
|
+
self
|
101
|
+
end
|
102
|
+
|
103
|
+
def related(term, num = window_size, dimension = 2)
|
104
|
+
(terms - [term]).sort_by { |t| minkowski(term, t, dimension) }[0, num]
|
105
|
+
end
|
106
|
+
|
107
|
+
def minkowski(term1, term2, dimension, norm = true)
|
108
|
+
[term1, term2].map { |term| vector(term, norm).vector }
|
109
|
+
.inject(:-).abs.to_f.pow(dimension).sum ** 1.fdiv(dimension)
|
110
|
+
end
|
111
|
+
|
112
|
+
alias_method :distance, :minkowski
|
113
|
+
|
114
|
+
def euclidean(term1, term2, norm = true)
|
115
|
+
minkowski(term1, term2, 2, norm)
|
116
|
+
end
|
117
|
+
|
118
|
+
def manhattan(term1, term2, norm = true)
|
119
|
+
minkowski(term1, term2, 1, norm)
|
120
|
+
end
|
121
|
+
|
122
|
+
alias_method :cityblock, :manhattan
|
123
|
+
|
124
|
+
def to_a(norm = true)
|
125
|
+
norm ? each_norm.to_a : each_vector.to_a
|
126
|
+
end
|
127
|
+
|
128
|
+
def to_s
|
129
|
+
cols = [terms.unshift(nil)]
|
130
|
+
|
131
|
+
@matrix.each_col.with_index { |col, index|
|
132
|
+
cols << [@idmap.key(index), *col] unless col.isnull? }
|
133
|
+
|
134
|
+
fmt = cols.map { |col|
|
135
|
+
"%#{col.map { |val| val.to_s.length }.max}s" }.join(' ') << $/
|
136
|
+
|
137
|
+
cols.first.each_index.map { |index|
|
138
|
+
fmt % cols.map { |col| col[index] } }.join
|
139
|
+
end
|
140
|
+
|
141
|
+
def inspect
|
142
|
+
'#<%s:0x%x @window_size=%p, @size=%p>' % [
|
143
|
+
self.class, object_id, window_size, size
|
144
|
+
]
|
145
|
+
end
|
146
|
+
|
147
|
+
private
|
148
|
+
|
149
|
+
def vector_i(index, norm)
|
150
|
+
@matrix.vector(index, size, norm)
|
151
|
+
end
|
152
|
+
|
153
|
+
end
|
154
|
+
|
155
|
+
require_relative 'hal4r/matrix'
|
156
|
+
require_relative 'hal4r/vector'
|
157
|
+
require_relative 'hal4r/version'
|
data/lib/hal4r/matrix.rb
ADDED
@@ -0,0 +1,82 @@
|
|
1
|
+
#--
|
2
|
+
###############################################################################
|
3
|
+
# #
|
4
|
+
# hal4r -- Hyperspace analogue to language for Ruby #
|
5
|
+
# #
|
6
|
+
# Copyright (C) 2015 Jens Wille #
|
7
|
+
# #
|
8
|
+
# Authors: #
|
9
|
+
# Jens Wille <jens.wille@gmail.com> #
|
10
|
+
# #
|
11
|
+
# hal4r is free software; you can redistribute it and/or modify it under the #
|
12
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
13
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
14
|
+
# any later version. #
|
15
|
+
# #
|
16
|
+
# hal4r is distributed in the hope that it will be useful, but WITHOUT ANY #
|
17
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
18
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
19
|
+
# more details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
22
|
+
# along with hal4r. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
require 'forwardable'
|
28
|
+
require 'gsl'
|
29
|
+
|
30
|
+
class Hal4R
|
31
|
+
|
32
|
+
class Matrix
|
33
|
+
|
34
|
+
include Enumerable
|
35
|
+
|
36
|
+
extend Forwardable
|
37
|
+
|
38
|
+
DEFAULT_STEP = 512
|
39
|
+
|
40
|
+
def_delegator :@matrix, :size1, :size
|
41
|
+
|
42
|
+
def initialize(step = nil)
|
43
|
+
@matrix = matrix(@step = step || DEFAULT_STEP)
|
44
|
+
end
|
45
|
+
|
46
|
+
attr_accessor :step
|
47
|
+
|
48
|
+
def get(index)
|
49
|
+
expand unless index < size
|
50
|
+
@matrix.row(index)
|
51
|
+
end
|
52
|
+
|
53
|
+
def vector(index, size = size(), norm = false)
|
54
|
+
vector = @matrix.subrow(index, 0, size)
|
55
|
+
.concat(@matrix.subcolumn(index, 0, size))
|
56
|
+
|
57
|
+
Vector.new(norm ? vector.to_f.normalize : vector)
|
58
|
+
end
|
59
|
+
|
60
|
+
def each_col(&block)
|
61
|
+
block ? @matrix.each_col(&block) : enum_for(:each_col)
|
62
|
+
end
|
63
|
+
|
64
|
+
def inspect
|
65
|
+
'#<%s:0x%x @step=%p, @size=%p>' % [
|
66
|
+
self.class, object_id, step, size
|
67
|
+
]
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
def expand(new_size = size + step)
|
73
|
+
@matrix = matrix(new_size).set(range = 0 .. size - 1, range, @matrix)
|
74
|
+
end
|
75
|
+
|
76
|
+
def matrix(size)
|
77
|
+
GSL::Matrix::Int.zeros(size)
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
data/lib/hal4r/vector.rb
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
#--
|
2
|
+
###############################################################################
|
3
|
+
# #
|
4
|
+
# hal4r -- Hyperspace analogue to language for Ruby #
|
5
|
+
# #
|
6
|
+
# Copyright (C) 2015 Jens Wille #
|
7
|
+
# #
|
8
|
+
# Authors: #
|
9
|
+
# Jens Wille <jens.wille@gmail.com> #
|
10
|
+
# #
|
11
|
+
# hal4r is free software; you can redistribute it and/or modify it under the #
|
12
|
+
# terms of the GNU Affero General Public License as published by the Free #
|
13
|
+
# Software Foundation; either version 3 of the License, or (at your option) #
|
14
|
+
# any later version. #
|
15
|
+
# #
|
16
|
+
# hal4r is distributed in the hope that it will be useful, but WITHOUT ANY #
|
17
|
+
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
|
18
|
+
# FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
|
19
|
+
# more details. #
|
20
|
+
# #
|
21
|
+
# You should have received a copy of the GNU Affero General Public License #
|
22
|
+
# along with hal4r. If not, see <http://www.gnu.org/licenses/>. #
|
23
|
+
# #
|
24
|
+
###############################################################################
|
25
|
+
#++
|
26
|
+
|
27
|
+
class Hal4R
|
28
|
+
|
29
|
+
class Vector
|
30
|
+
|
31
|
+
include Enumerable
|
32
|
+
|
33
|
+
def initialize(vector)
|
34
|
+
@vector = vector
|
35
|
+
end
|
36
|
+
|
37
|
+
attr_reader :vector
|
38
|
+
|
39
|
+
def each(&block)
|
40
|
+
block ? vector.each(&block) : enum_for(:each)
|
41
|
+
end
|
42
|
+
|
43
|
+
def inspect
|
44
|
+
'#<%s:0x%x @vector=%p>' % [
|
45
|
+
self.class, object_id, vector
|
46
|
+
]
|
47
|
+
end
|
48
|
+
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
class Hal4R
|
2
|
+
|
3
|
+
module Version
|
4
|
+
|
5
|
+
MAJOR = 0
|
6
|
+
MINOR = 0
|
7
|
+
TINY = 1
|
8
|
+
|
9
|
+
class << self
|
10
|
+
|
11
|
+
# Returns array representation.
|
12
|
+
def to_a
|
13
|
+
[MAJOR, MINOR, TINY]
|
14
|
+
end
|
15
|
+
|
16
|
+
# Short-cut for version string.
|
17
|
+
def to_s
|
18
|
+
to_a.join('.')
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
VERSION = Version.to_s
|
26
|
+
|
27
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
describe Hal4R::Matrix do
|
2
|
+
|
3
|
+
step = 5
|
4
|
+
|
5
|
+
subject { described_class.new(step) }
|
6
|
+
|
7
|
+
example do
|
8
|
+
expect(subject.get(0)).to eq(0)
|
9
|
+
expect(subject.vector(0)).to be_an_instance_of(Hal4R::Vector)
|
10
|
+
expect{subject.vector(step)}.to raise_error(GSL::ERROR::EINVAL, /row index is out of range/)
|
11
|
+
end
|
12
|
+
|
13
|
+
example do
|
14
|
+
expect(subject.get(step)).to eq(0)
|
15
|
+
expect(subject.vector(0)).to be_an_instance_of(Hal4R::Vector)
|
16
|
+
expect(subject.vector(step)).to be_an_instance_of(Hal4R::Vector)
|
17
|
+
end
|
18
|
+
|
19
|
+
example do
|
20
|
+
expect(subject.each_col).to be_an_instance_of(Enumerator)
|
21
|
+
end
|
22
|
+
|
23
|
+
example do
|
24
|
+
expect{|b|subject.each_col(&b)}.to yield_successive_args(*[GSL::Vector::Int::Col::View] * subject.size)
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|