ruby-nuggets 0.9.3 → 0.9.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog +2 -0
- data/README +1 -1
- data/lib/nuggets/argv/option.rb +3 -0
- data/lib/nuggets/argv/option_mixin.rb +85 -0
- data/lib/nuggets/lsi.rb +198 -0
- data/lib/nuggets/midos.rb +16 -3
- data/lib/nuggets/version.rb +1 -1
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1310c734144767d70dee0cd8479b0077f54f5a45
|
4
|
+
data.tar.gz: 6e76159514c5a08df644c83baf042175705a5497
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4cb2f5ba60d908871b3802a0d4795a3b390a2cb5a83f004e484677a8940288d2bf9d1728d191629040629b531a90073b57ee872de80a99612f02a97101764329
|
7
|
+
data.tar.gz: 981e031f0731040b3b08857ca9a772fe48c5af564b47a0367ce9c80aa777b6ae7283b0394ceaee04696bdf214781c010784430576efbd3f05f844852713ebad5
|
data/ChangeLog
CHANGED
data/README
CHANGED
@@ -0,0 +1,85 @@
|
|
1
|
+
#--
|
2
|
+
###############################################################################
|
3
|
+
# #
|
4
|
+
# A component of ruby-nuggets, some extensions to the Ruby programming #
|
5
|
+
# language. #
|
6
|
+
# #
|
7
|
+
# Copyright (C) 2007-2013 Jens Wille #
|
8
|
+
# #
|
9
|
+
# Authors: #
|
10
|
+
# Jens Wille <jens.wille@gmail.com> #
|
11
|
+
# #
|
12
|
+
# ruby-nuggets is free software; you can redistribute it and/or modify it #
|
13
|
+
# under the terms of the GNU Affero General Public License as published by #
|
14
|
+
# the Free Software Foundation; either version 3 of the License, or (at your #
|
15
|
+
# option) any later version. #
|
16
|
+
# #
|
17
|
+
# ruby-nuggets is distributed in the hope that it will be useful, but WITHOUT #
|
18
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License #
|
20
|
+
# for more details. #
|
21
|
+
# #
|
22
|
+
# You should have received a copy of the GNU Affero General Public License #
|
23
|
+
# along with ruby-nuggets. If not, see <http://www.gnu.org/licenses/>. #
|
24
|
+
# #
|
25
|
+
###############################################################################
|
26
|
+
#++
|
27
|
+
|
28
|
+
module Nuggets
|
29
|
+
module Argv
|
30
|
+
module OptionMixin
|
31
|
+
|
32
|
+
# call-seq:
|
33
|
+
# ARGV.switch(short[, long]) -> true | false
|
34
|
+
#
|
35
|
+
# Whether ARGV includes the switch +short+ (or +long+).
|
36
|
+
def switch(*args)
|
37
|
+
!!(__key(*args) { |key| include?(key) })
|
38
|
+
end
|
39
|
+
|
40
|
+
# call-seq:
|
41
|
+
# ARGV.option(short[, long]) -> aString
|
42
|
+
# ARGV.option(short[, long]) { |value| ... } -> anObject
|
43
|
+
#
|
44
|
+
# Returns the value associated with the option +short+ (or +long+) if present
|
45
|
+
# in ARGV. Yields that value to the block if given and returns its result.
|
46
|
+
def option(*args, &block)
|
47
|
+
__opt(block, *args) { |index| at(index + 1) }
|
48
|
+
end
|
49
|
+
|
50
|
+
# call-seq:
|
51
|
+
# ARGV.switch!(short[, long]) -> true | false
|
52
|
+
#
|
53
|
+
# Whether ARGV includes the switch +short+ (or +long+). Removes the matching
|
54
|
+
# switch from ARGV.
|
55
|
+
def switch!(*args)
|
56
|
+
!!(__key(*args) { |key| delete(key) })
|
57
|
+
end
|
58
|
+
|
59
|
+
# call-seq:
|
60
|
+
# ARGV.option!(short[, long]) -> aString
|
61
|
+
# ARGV.option!(short[, long]) { |value| ... } -> anObject
|
62
|
+
#
|
63
|
+
# Returns the value associated with the option +short+ (or +long+) if present
|
64
|
+
# in ARGV and removes both from ARGV. Yields that value to the block if given
|
65
|
+
# and returns its result.
|
66
|
+
def option!(*args, &block)
|
67
|
+
__opt(block, *args) { |index| delete_at(index); delete_at(index) }
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
def __key(short, long = nil) # :yield: key
|
73
|
+
long && yield("--#{long}") || yield("-#{short}")
|
74
|
+
end
|
75
|
+
|
76
|
+
def __opt(block, *args)
|
77
|
+
index = __key(*args) { |key| index(key) } or return
|
78
|
+
|
79
|
+
value = yield(index)
|
80
|
+
block ? block[value] : value
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
data/lib/nuggets/lsi.rb
ADDED
@@ -0,0 +1,198 @@
|
|
1
|
+
#--
|
2
|
+
###############################################################################
|
3
|
+
# #
|
4
|
+
# A component of ruby-nuggets, some extensions to the Ruby programming #
|
5
|
+
# language. #
|
6
|
+
# #
|
7
|
+
# Copyright (C) 2007-2013 Jens Wille #
|
8
|
+
# #
|
9
|
+
# Authors: #
|
10
|
+
# Jens Wille <jens.wille@gmail.com> #
|
11
|
+
# #
|
12
|
+
# ruby-nuggets is free software; you can redistribute it and/or modify it #
|
13
|
+
# under the terms of the GNU Affero General Public License as published by #
|
14
|
+
# the Free Software Foundation; either version 3 of the License, or (at your #
|
15
|
+
# option) any later version. #
|
16
|
+
# #
|
17
|
+
# ruby-nuggets is distributed in the hope that it will be useful, but WITHOUT #
|
18
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License #
|
20
|
+
# for more details. #
|
21
|
+
# #
|
22
|
+
# You should have received a copy of the GNU Affero General Public License #
|
23
|
+
# along with ruby-nuggets. If not, see <http://www.gnu.org/licenses/>. #
|
24
|
+
# #
|
25
|
+
###############################################################################
|
26
|
+
#++
|
27
|
+
|
28
|
+
require 'gsl'
|
29
|
+
|
30
|
+
module Nuggets
|
31
|
+
|
32
|
+
class LSI
|
33
|
+
|
34
|
+
include Enumerable
|
35
|
+
|
36
|
+
DEFAULT_EPSILON = Float::EPSILON * 10
|
37
|
+
|
38
|
+
def self.each_norm(items, options = {}, &block)
|
39
|
+
lsi = new(items)
|
40
|
+
lsi.each_norm(nil, options, &block) if lsi.build
|
41
|
+
end
|
42
|
+
|
43
|
+
def initialize(items = {})
|
44
|
+
reset
|
45
|
+
items.each { |k, v| self[k] = v }
|
46
|
+
end
|
47
|
+
|
48
|
+
def [](key)
|
49
|
+
@hash[key]
|
50
|
+
end
|
51
|
+
|
52
|
+
def []=(key, value)
|
53
|
+
@hash[key] = Doc.new(key, value, @list)
|
54
|
+
end
|
55
|
+
|
56
|
+
def add(key, value = key)
|
57
|
+
self[key] = value
|
58
|
+
self
|
59
|
+
end
|
60
|
+
|
61
|
+
def <<(value)
|
62
|
+
add(value.object_id, value)
|
63
|
+
end
|
64
|
+
|
65
|
+
def size
|
66
|
+
@hash.size
|
67
|
+
end
|
68
|
+
|
69
|
+
def keys
|
70
|
+
@hash.keys
|
71
|
+
end
|
72
|
+
|
73
|
+
def docs
|
74
|
+
@hash.values
|
75
|
+
end
|
76
|
+
|
77
|
+
def each(&block)
|
78
|
+
@hash.each(&block)
|
79
|
+
end
|
80
|
+
|
81
|
+
# min:: minimum value to consider
|
82
|
+
# abs:: minimum absolute value to consider
|
83
|
+
# nul:: exclude null values (true or Float)
|
84
|
+
# new:: exclude original terms / only yield new ones
|
85
|
+
def each_norm(key = nil, options = {})
|
86
|
+
min, abs, nul, new = options.values_at(:min, :abs, :nul, :new)
|
87
|
+
nul = DEFAULT_EPSILON if nul == true
|
88
|
+
|
89
|
+
list = @invlist
|
90
|
+
|
91
|
+
(key ? [self[key]] : docs).each { |doc|
|
92
|
+
if doc && norm = doc.norm
|
93
|
+
i = 0
|
94
|
+
|
95
|
+
norm.each { |v|
|
96
|
+
yield doc, list[i], v unless (min && v < min) ||
|
97
|
+
(abs && v.abs < abs) ||
|
98
|
+
(nul && v.abs < nul) ||
|
99
|
+
(new && doc.include?(i))
|
100
|
+
i += 1
|
101
|
+
}
|
102
|
+
end
|
103
|
+
}
|
104
|
+
end
|
105
|
+
|
106
|
+
def related(key, num = 5)
|
107
|
+
if doc = self[key] and norm = doc.norm
|
108
|
+
a = []; norm *= -1
|
109
|
+
each { |k, v| a << [norm * v.norm.col, k] unless k == key }
|
110
|
+
a.sort![0, num].map! { |_, k| k }
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def build(cutoff = 0.75)
|
115
|
+
build!(docs, @list, cutoff) if size > 1
|
116
|
+
end
|
117
|
+
|
118
|
+
def reset
|
119
|
+
@hash, @list, @invlist = {}, Hash.new { |h, k| h[k] = h.size }, {}
|
120
|
+
end
|
121
|
+
|
122
|
+
private
|
123
|
+
|
124
|
+
def build!(docs, list, cutoff)
|
125
|
+
u, v, s = GSL::Matrix.alloc(*vectors(docs, list)).trans.SV_decomp
|
126
|
+
reduce(u, v, cutoff(s, cutoff), docs)
|
127
|
+
size
|
128
|
+
end
|
129
|
+
|
130
|
+
def vectors(docs, list)
|
131
|
+
@invlist, size = list.invert, list.size
|
132
|
+
docs.map { |doc| transform(doc.raw_vector(size)) }
|
133
|
+
end
|
134
|
+
|
135
|
+
# FIXME: "first-order association transform" ???
|
136
|
+
def transform(vec, q = 0)
|
137
|
+
return vec #unless (sum = vec.sum) > 1
|
138
|
+
|
139
|
+
vec.each { |v| q -= (w = v / sum) * Math.log(w) if v > 0 }
|
140
|
+
vec.map! { |v| Math.log(v + 1) / q }
|
141
|
+
end
|
142
|
+
|
143
|
+
def cutoff(s, c)
|
144
|
+
w, i = s.sort[-(s.size * c).round], 0
|
145
|
+
s.each { |v| s[i] = 0 if v < w; i += 1 }
|
146
|
+
s
|
147
|
+
end
|
148
|
+
|
149
|
+
def reduce(u, v, s, d, i = -1)
|
150
|
+
(u * GSL::Matrix.diagonal(s) * v.trans).each_col { |c|
|
151
|
+
d[i += 1].vector = c.row
|
152
|
+
}
|
153
|
+
end
|
154
|
+
|
155
|
+
class Doc
|
156
|
+
|
157
|
+
TOKEN_RE = %r{\s+}
|
158
|
+
|
159
|
+
def initialize(key, value, list)
|
160
|
+
@key = key
|
161
|
+
@map = !value.is_a?(Hash) ? build_hash(value, list) :
|
162
|
+
value.inject({}) { |h, (k, v)| h[list[k]] = v; h }
|
163
|
+
end
|
164
|
+
|
165
|
+
attr_reader :key, :vector, :norm
|
166
|
+
|
167
|
+
def raw_vector(size)
|
168
|
+
vec = GSL::Vector.alloc(size)
|
169
|
+
@map.each { |k, v| vec[k] = v }
|
170
|
+
vec
|
171
|
+
end
|
172
|
+
|
173
|
+
def vector=(vec)
|
174
|
+
@vector, @norm = vec, vec.normalize
|
175
|
+
end
|
176
|
+
|
177
|
+
def include?(k)
|
178
|
+
@map.include?(k)
|
179
|
+
end
|
180
|
+
|
181
|
+
private
|
182
|
+
|
183
|
+
def build_hash(value, list, hash = Hash.new(0))
|
184
|
+
build_array(value).each { |i| hash[list[i]] += 1 }
|
185
|
+
hash
|
186
|
+
end
|
187
|
+
|
188
|
+
def build_array(value, re = TOKEN_RE)
|
189
|
+
value = value.read if value.respond_to?(:read)
|
190
|
+
value = value.split(re) if value.respond_to?(:split)
|
191
|
+
value
|
192
|
+
end
|
193
|
+
|
194
|
+
end
|
195
|
+
|
196
|
+
end
|
197
|
+
|
198
|
+
end
|
data/lib/nuggets/midos.rb
CHANGED
@@ -42,9 +42,22 @@ module Nuggets
|
|
42
42
|
# Line break indicator
|
43
43
|
DEFAULT_NL = '^'
|
44
44
|
|
45
|
-
|
46
|
-
|
47
|
-
|
45
|
+
# Default encoding for ::parse_file.
|
46
|
+
DEFAULT_ENCODING = 'iso-8859-1'
|
47
|
+
|
48
|
+
class << self
|
49
|
+
|
50
|
+
def parse(input, *args, &block)
|
51
|
+
parser = new(*args).parse(input, &block)
|
52
|
+
block_given? ? parser : parser.records
|
53
|
+
end
|
54
|
+
|
55
|
+
def parse_file(file, encoding = nil, *args, &block)
|
56
|
+
File.open(file, :encoding => encoding || DEFAULT_ENCODING) { |input|
|
57
|
+
parse(input, *args, &block)
|
58
|
+
}
|
59
|
+
end
|
60
|
+
|
48
61
|
end
|
49
62
|
|
50
63
|
def initialize(options = {})
|
data/lib/nuggets/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-nuggets
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jens Wille
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-06-07 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Some extensions to the Ruby programming language.
|
14
14
|
email: jens.wille@gmail.com
|
@@ -23,6 +23,8 @@ files:
|
|
23
23
|
- lib/nuggets/all.rb
|
24
24
|
- lib/nuggets/all_mixins.rb
|
25
25
|
- lib/nuggets/ansicolor2css.rb
|
26
|
+
- lib/nuggets/argv/option.rb
|
27
|
+
- lib/nuggets/argv/option_mixin.rb
|
26
28
|
- lib/nuggets/array/boost.rb
|
27
29
|
- lib/nuggets/array/boost_mixin.rb
|
28
30
|
- lib/nuggets/array/combination.rb
|
@@ -105,6 +107,7 @@ files:
|
|
105
107
|
- lib/nuggets/log_parser.rb
|
106
108
|
- lib/nuggets/log_parser/apache.rb
|
107
109
|
- lib/nuggets/log_parser/rails.rb
|
110
|
+
- lib/nuggets/lsi.rb
|
108
111
|
- lib/nuggets/midos.rb
|
109
112
|
- lib/nuggets/mysql.rb
|
110
113
|
- lib/nuggets/net/success.rb
|
@@ -224,7 +227,7 @@ rdoc_options:
|
|
224
227
|
- --line-numbers
|
225
228
|
- --all
|
226
229
|
- --title
|
227
|
-
- ruby-nuggets Application documentation (v0.9.
|
230
|
+
- ruby-nuggets Application documentation (v0.9.4)
|
228
231
|
- --main
|
229
232
|
- README
|
230
233
|
require_paths:
|