hal4r 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,11 @@
1
+ # markup: rd
2
+
3
+ = Revision history for hal4r
4
+
5
+ == 0.0.1 [2015-07-23]
6
+
7
+ * First release.
8
+
9
+ == 0.0.0 [2015-07-15]
10
+
11
+ * Birthday :-)
data/README ADDED
@@ -0,0 +1,49 @@
1
+ = hal4r - Hyperspace analogue to language for Ruby
2
+
3
+ == VERSION
4
+
5
+ This documentation refers to hal4r version 0.0.1.
6
+
7
+
8
+ == DESCRIPTION
9
+
10
+ HAL[https://en.wikipedia.org/wiki/Hyperspace_Analogue_to_Language] processing
11
+ for Ruby. Requires {GSL bindings}[https://blackwinter.github.com/rb-gsl] for
12
+ the heavy lifting.
13
+
14
+
15
+ == LINKS
16
+
17
+ Documentation:: https://blackwinter.github.com/hal4r
18
+ Source code:: https://github.com/blackwinter/hal4r
19
+ RubyGem:: https://rubygems.org/gems/hal4r
20
+ Travis CI:: https://travis-ci.org/blackwinter/hal4r
21
+
22
+
23
+ == LITERATURE
24
+
25
+ * Lund, K., Burgess, C. & Atchley, R. A. (1995). "Semantic and associative priming in high-dimensional semantic space". <i>Proceedings of the seventeenth annual conference of the Cognitive Science Society</i>, 660–665. ISBN:0-8058-2159-7[https://books.google.de/books?id=CSU_Mj07G7UC].
26
+ * Lund, K.; Burgess, C. (1996). "Producing high-dimensional semantic spaces from lexical co-occurrence". <i>Behavior Research Methods, Instruments & Computers</i> 28 (2): 203–208. doi:10.3758/bf03204766[https://dx.doi.org/10.3758%2Fbf03204766].
27
+
28
+
29
+ == AUTHORS
30
+
31
+ * Jens Wille <mailto:jens.wille@gmail.com>
32
+
33
+
34
+ == LICENSE AND COPYRIGHT
35
+
36
+ Copyright (C) 2015 Jens Wille
37
+
38
+ hal4r is free software: you can redistribute it and/or modify it
39
+ under the terms of the GNU Affero General Public License as published by
40
+ the Free Software Foundation, either version 3 of the License, or (at your
41
+ option) any later version.
42
+
43
+ hal4r is distributed in the hope that it will be useful, but
44
+ WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
45
+ or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public
46
+ License for more details.
47
+
48
+ You should have received a copy of the GNU Affero General Public License
49
+ along with hal4r. If not, see <http://www.gnu.org/licenses/>.
@@ -0,0 +1,23 @@
1
+ require_relative 'lib/hal4r/version'
2
+
3
+ begin
4
+ require 'hen'
5
+
6
+ Hen.lay! {{
7
+ gem: {
8
+ name: %q{hal4r},
9
+ version: Hal4R::VERSION,
10
+ summary: %q{Hyperspace analogue to language for Ruby.},
11
+ description: %q{HAL processing for Ruby.},
12
+ author: %q{Jens Wille},
13
+ email: %q{jens.wille@gmail.com},
14
+ license: %q{AGPL-3.0},
15
+ homepage: :blackwinter,
16
+ dependencies: %w[rb-gsl],
17
+
18
+ required_ruby_version: '>= 1.9.3'
19
+ }
20
+ }}
21
+ rescue LoadError => err
22
+ warn "Please install the `hen' gem. (#{err})"
23
+ end
@@ -0,0 +1,11 @@
1
+ #! /usr/bin/env ruby
2
+
3
+ require 'hal4r'
4
+
5
+ abort "Usage: #{$0} <words...> [<window>]" if ARGV.empty? && $stdin.tty?
6
+
7
+ window_size = ARGV.pop.to_i if ARGV.last =~ /\A\d+\z/
8
+
9
+ terms = ARGV.empty? ? $stdin.readlines.flat_map(&:split).grep(/\w/) : ARGV
10
+
11
+ puts Hal4R.new(terms, window_size)
@@ -0,0 +1,157 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # hal4r -- Hyperspace analogue to language for Ruby #
5
+ # #
6
+ # Copyright (C) 2015 Jens Wille #
7
+ # #
8
+ # Authors: #
9
+ # Jens Wille <jens.wille@gmail.com> #
10
+ # #
11
+ # hal4r is free software; you can redistribute it and/or modify it under the #
12
+ # terms of the GNU Affero General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # hal4r is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
19
+ # more details. #
20
+ # #
21
+ # You should have received a copy of the GNU Affero General Public License #
22
+ # along with hal4r. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ require 'forwardable'
28
+ require 'nuggets/hash/idmap'
29
+
30
+ class Hal4R
31
+
32
+ include Enumerable
33
+
34
+ extend Forwardable
35
+
36
+ DEFAULT_WINDOW_SIZE = 10
37
+
38
+ def_delegator :@idmap, :keys, :terms
39
+
40
+ def_delegators :@idmap, :size, :empty?
41
+
42
+ def initialize(terms = [], window_size = nil)
43
+ reset(window_size)
44
+ add(terms)
45
+ end
46
+
47
+ attr_accessor :window_size
48
+
49
+ def reset(window_size = window_size())
50
+ @idmap, @matrix, @window = Hash.idmap(-1), Matrix.new,
51
+ Array.new(@window_size = window_size || DEFAULT_WINDOW_SIZE)
52
+
53
+ self
54
+ end
55
+
56
+ def <<(term)
57
+ row = @matrix.get(term_index = @idmap[term])
58
+
59
+ @window.each_with_index { |index, weight|
60
+ row[index] += weight + 1 if index
61
+ }.insert(-1, term_index).shift
62
+
63
+ self
64
+ end
65
+
66
+ def add(terms)
67
+ terms.each { |term| self << term }
68
+ self
69
+ end
70
+
71
+ def vector(term, norm = false)
72
+ vector_i(@idmap.fetch(term), norm)
73
+ end
74
+
75
+ alias_method :[], :vector
76
+
77
+ def norm(term)
78
+ vector(term, true)
79
+ end
80
+
81
+ def each_vector(norm = false)
82
+ return enum_for(:each_vector, norm) unless block_given?
83
+
84
+ @idmap.each_value { |index| yield vector_i(index, norm).to_a }
85
+
86
+ self
87
+ end
88
+
89
+ def each_norm(&block)
90
+ each_vector(true, &block)
91
+ end
92
+
93
+ alias_method :each, :each_norm
94
+
95
+ def each_distance(norm = true, dimension = 2)
96
+ return enum_for(:each_distance, norm, dimension) unless block_given?
97
+
98
+ terms.combination(2) { |t| yield *t.sort!, minkowski(*t, dimension, norm) }
99
+
100
+ self
101
+ end
102
+
103
+ def related(term, num = window_size, dimension = 2)
104
+ (terms - [term]).sort_by { |t| minkowski(term, t, dimension) }[0, num]
105
+ end
106
+
107
+ def minkowski(term1, term2, dimension, norm = true)
108
+ [term1, term2].map { |term| vector(term, norm).vector }
109
+ .inject(:-).abs.to_f.pow(dimension).sum ** 1.fdiv(dimension)
110
+ end
111
+
112
+ alias_method :distance, :minkowski
113
+
114
+ def euclidean(term1, term2, norm = true)
115
+ minkowski(term1, term2, 2, norm)
116
+ end
117
+
118
+ def manhattan(term1, term2, norm = true)
119
+ minkowski(term1, term2, 1, norm)
120
+ end
121
+
122
+ alias_method :cityblock, :manhattan
123
+
124
+ def to_a(norm = true)
125
+ norm ? each_norm.to_a : each_vector.to_a
126
+ end
127
+
128
+ def to_s
129
+ cols = [terms.unshift(nil)]
130
+
131
+ @matrix.each_col.with_index { |col, index|
132
+ cols << [@idmap.key(index), *col] unless col.isnull? }
133
+
134
+ fmt = cols.map { |col|
135
+ "%#{col.map { |val| val.to_s.length }.max}s" }.join(' ') << $/
136
+
137
+ cols.first.each_index.map { |index|
138
+ fmt % cols.map { |col| col[index] } }.join
139
+ end
140
+
141
+ def inspect
142
+ '#<%s:0x%x @window_size=%p, @size=%p>' % [
143
+ self.class, object_id, window_size, size
144
+ ]
145
+ end
146
+
147
+ private
148
+
149
+ def vector_i(index, norm)
150
+ @matrix.vector(index, size, norm)
151
+ end
152
+
153
+ end
154
+
155
+ require_relative 'hal4r/matrix'
156
+ require_relative 'hal4r/vector'
157
+ require_relative 'hal4r/version'
@@ -0,0 +1,82 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # hal4r -- Hyperspace analogue to language for Ruby #
5
+ # #
6
+ # Copyright (C) 2015 Jens Wille #
7
+ # #
8
+ # Authors: #
9
+ # Jens Wille <jens.wille@gmail.com> #
10
+ # #
11
+ # hal4r is free software; you can redistribute it and/or modify it under the #
12
+ # terms of the GNU Affero General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # hal4r is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
19
+ # more details. #
20
+ # #
21
+ # You should have received a copy of the GNU Affero General Public License #
22
+ # along with hal4r. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ require 'forwardable'
28
+ require 'gsl'
29
+
30
+ class Hal4R
31
+
32
+ class Matrix
33
+
34
+ include Enumerable
35
+
36
+ extend Forwardable
37
+
38
+ DEFAULT_STEP = 512
39
+
40
+ def_delegator :@matrix, :size1, :size
41
+
42
+ def initialize(step = nil)
43
+ @matrix = matrix(@step = step || DEFAULT_STEP)
44
+ end
45
+
46
+ attr_accessor :step
47
+
48
+ def get(index)
49
+ expand unless index < size
50
+ @matrix.row(index)
51
+ end
52
+
53
+ def vector(index, size = size(), norm = false)
54
+ vector = @matrix.subrow(index, 0, size)
55
+ .concat(@matrix.subcolumn(index, 0, size))
56
+
57
+ Vector.new(norm ? vector.to_f.normalize : vector)
58
+ end
59
+
60
+ def each_col(&block)
61
+ block ? @matrix.each_col(&block) : enum_for(:each_col)
62
+ end
63
+
64
+ def inspect
65
+ '#<%s:0x%x @step=%p, @size=%p>' % [
66
+ self.class, object_id, step, size
67
+ ]
68
+ end
69
+
70
+ private
71
+
72
+ def expand(new_size = size + step)
73
+ @matrix = matrix(new_size).set(range = 0 .. size - 1, range, @matrix)
74
+ end
75
+
76
+ def matrix(size)
77
+ GSL::Matrix::Int.zeros(size)
78
+ end
79
+
80
+ end
81
+
82
+ end
@@ -0,0 +1,51 @@
1
+ #--
2
+ ###############################################################################
3
+ # #
4
+ # hal4r -- Hyperspace analogue to language for Ruby #
5
+ # #
6
+ # Copyright (C) 2015 Jens Wille #
7
+ # #
8
+ # Authors: #
9
+ # Jens Wille <jens.wille@gmail.com> #
10
+ # #
11
+ # hal4r is free software; you can redistribute it and/or modify it under the #
12
+ # terms of the GNU Affero General Public License as published by the Free #
13
+ # Software Foundation; either version 3 of the License, or (at your option) #
14
+ # any later version. #
15
+ # #
16
+ # hal4r is distributed in the hope that it will be useful, but WITHOUT ANY #
17
+ # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS #
18
+ # FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License for #
19
+ # more details. #
20
+ # #
21
+ # You should have received a copy of the GNU Affero General Public License #
22
+ # along with hal4r. If not, see <http://www.gnu.org/licenses/>. #
23
+ # #
24
+ ###############################################################################
25
+ #++
26
+
27
+ class Hal4R
28
+
29
+ class Vector
30
+
31
+ include Enumerable
32
+
33
+ def initialize(vector)
34
+ @vector = vector
35
+ end
36
+
37
+ attr_reader :vector
38
+
39
+ def each(&block)
40
+ block ? vector.each(&block) : enum_for(:each)
41
+ end
42
+
43
+ def inspect
44
+ '#<%s:0x%x @vector=%p>' % [
45
+ self.class, object_id, vector
46
+ ]
47
+ end
48
+
49
+ end
50
+
51
+ end
@@ -0,0 +1,27 @@
1
+ class Hal4R
2
+
3
+ module Version
4
+
5
+ MAJOR = 0
6
+ MINOR = 0
7
+ TINY = 1
8
+
9
+ class << self
10
+
11
+ # Returns array representation.
12
+ def to_a
13
+ [MAJOR, MINOR, TINY]
14
+ end
15
+
16
+ # Short-cut for version string.
17
+ def to_s
18
+ to_a.join('.')
19
+ end
20
+
21
+ end
22
+
23
+ end
24
+
25
+ VERSION = Version.to_s
26
+
27
+ end
@@ -0,0 +1,27 @@
1
+ describe Hal4R::Matrix do
2
+
3
+ step = 5
4
+
5
+ subject { described_class.new(step) }
6
+
7
+ example do
8
+ expect(subject.get(0)).to eq(0)
9
+ expect(subject.vector(0)).to be_an_instance_of(Hal4R::Vector)
10
+ expect{subject.vector(step)}.to raise_error(GSL::ERROR::EINVAL, /row index is out of range/)
11
+ end
12
+
13
+ example do
14
+ expect(subject.get(step)).to eq(0)
15
+ expect(subject.vector(0)).to be_an_instance_of(Hal4R::Vector)
16
+ expect(subject.vector(step)).to be_an_instance_of(Hal4R::Vector)
17
+ end
18
+
19
+ example do
20
+ expect(subject.each_col).to be_an_instance_of(Enumerator)
21
+ end
22
+
23
+ example do
24
+ expect{|b|subject.each_col(&b)}.to yield_successive_args(*[GSL::Vector::Int::Col::View] * subject.size)
25
+ end
26
+
27
+ end