ruby-nuggets 0.9.3 → 0.9.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ChangeLog +2 -0
- data/README +1 -1
- data/lib/nuggets/argv/option.rb +3 -0
- data/lib/nuggets/argv/option_mixin.rb +85 -0
- data/lib/nuggets/lsi.rb +198 -0
- data/lib/nuggets/midos.rb +16 -3
- data/lib/nuggets/version.rb +1 -1
- metadata +6 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1310c734144767d70dee0cd8479b0077f54f5a45
|
4
|
+
data.tar.gz: 6e76159514c5a08df644c83baf042175705a5497
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4cb2f5ba60d908871b3802a0d4795a3b390a2cb5a83f004e484677a8940288d2bf9d1728d191629040629b531a90073b57ee872de80a99612f02a97101764329
|
7
|
+
data.tar.gz: 981e031f0731040b3b08857ca9a772fe48c5af564b47a0367ce9c80aa777b6ae7283b0394ceaee04696bdf214781c010784430576efbd3f05f844852713ebad5
|
data/ChangeLog
CHANGED
data/README
CHANGED
@@ -0,0 +1,85 @@
|
|
1
|
+
#--
|
2
|
+
###############################################################################
|
3
|
+
# #
|
4
|
+
# A component of ruby-nuggets, some extensions to the Ruby programming #
|
5
|
+
# language. #
|
6
|
+
# #
|
7
|
+
# Copyright (C) 2007-2013 Jens Wille #
|
8
|
+
# #
|
9
|
+
# Authors: #
|
10
|
+
# Jens Wille <jens.wille@gmail.com> #
|
11
|
+
# #
|
12
|
+
# ruby-nuggets is free software; you can redistribute it and/or modify it #
|
13
|
+
# under the terms of the GNU Affero General Public License as published by #
|
14
|
+
# the Free Software Foundation; either version 3 of the License, or (at your #
|
15
|
+
# option) any later version. #
|
16
|
+
# #
|
17
|
+
# ruby-nuggets is distributed in the hope that it will be useful, but WITHOUT #
|
18
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License #
|
20
|
+
# for more details. #
|
21
|
+
# #
|
22
|
+
# You should have received a copy of the GNU Affero General Public License #
|
23
|
+
# along with ruby-nuggets. If not, see <http://www.gnu.org/licenses/>. #
|
24
|
+
# #
|
25
|
+
###############################################################################
|
26
|
+
#++
|
27
|
+
|
28
|
+
module Nuggets
|
29
|
+
module Argv
|
30
|
+
module OptionMixin
|
31
|
+
|
32
|
+
# call-seq:
|
33
|
+
# ARGV.switch(short[, long]) -> true | false
|
34
|
+
#
|
35
|
+
# Whether ARGV includes the switch +short+ (or +long+).
|
36
|
+
def switch(*args)
|
37
|
+
!!(__key(*args) { |key| include?(key) })
|
38
|
+
end
|
39
|
+
|
40
|
+
# call-seq:
|
41
|
+
# ARGV.option(short[, long]) -> aString
|
42
|
+
# ARGV.option(short[, long]) { |value| ... } -> anObject
|
43
|
+
#
|
44
|
+
# Returns the value associated with the option +short+ (or +long+) if present
|
45
|
+
# in ARGV. Yields that value to the block if given and returns its result.
|
46
|
+
def option(*args, &block)
|
47
|
+
__opt(block, *args) { |index| at(index + 1) }
|
48
|
+
end
|
49
|
+
|
50
|
+
# call-seq:
|
51
|
+
# ARGV.switch!(short[, long]) -> true | false
|
52
|
+
#
|
53
|
+
# Whether ARGV includes the switch +short+ (or +long+). Removes the matching
|
54
|
+
# switch from ARGV.
|
55
|
+
def switch!(*args)
|
56
|
+
!!(__key(*args) { |key| delete(key) })
|
57
|
+
end
|
58
|
+
|
59
|
+
# call-seq:
|
60
|
+
# ARGV.option!(short[, long]) -> aString
|
61
|
+
# ARGV.option!(short[, long]) { |value| ... } -> anObject
|
62
|
+
#
|
63
|
+
# Returns the value associated with the option +short+ (or +long+) if present
|
64
|
+
# in ARGV and removes both from ARGV. Yields that value to the block if given
|
65
|
+
# and returns its result.
|
66
|
+
def option!(*args, &block)
|
67
|
+
__opt(block, *args) { |index| delete_at(index); delete_at(index) }
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
def __key(short, long = nil) # :yield: key
|
73
|
+
long && yield("--#{long}") || yield("-#{short}")
|
74
|
+
end
|
75
|
+
|
76
|
+
def __opt(block, *args)
|
77
|
+
index = __key(*args) { |key| index(key) } or return
|
78
|
+
|
79
|
+
value = yield(index)
|
80
|
+
block ? block[value] : value
|
81
|
+
end
|
82
|
+
|
83
|
+
end
|
84
|
+
end
|
85
|
+
end
|
data/lib/nuggets/lsi.rb
ADDED
@@ -0,0 +1,198 @@
|
|
1
|
+
#--
|
2
|
+
###############################################################################
|
3
|
+
# #
|
4
|
+
# A component of ruby-nuggets, some extensions to the Ruby programming #
|
5
|
+
# language. #
|
6
|
+
# #
|
7
|
+
# Copyright (C) 2007-2013 Jens Wille #
|
8
|
+
# #
|
9
|
+
# Authors: #
|
10
|
+
# Jens Wille <jens.wille@gmail.com> #
|
11
|
+
# #
|
12
|
+
# ruby-nuggets is free software; you can redistribute it and/or modify it #
|
13
|
+
# under the terms of the GNU Affero General Public License as published by #
|
14
|
+
# the Free Software Foundation; either version 3 of the License, or (at your #
|
15
|
+
# option) any later version. #
|
16
|
+
# #
|
17
|
+
# ruby-nuggets is distributed in the hope that it will be useful, but WITHOUT #
|
18
|
+
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License #
|
20
|
+
# for more details. #
|
21
|
+
# #
|
22
|
+
# You should have received a copy of the GNU Affero General Public License #
|
23
|
+
# along with ruby-nuggets. If not, see <http://www.gnu.org/licenses/>. #
|
24
|
+
# #
|
25
|
+
###############################################################################
|
26
|
+
#++
|
27
|
+
|
28
|
+
require 'gsl'
|
29
|
+
|
30
|
+
module Nuggets
|
31
|
+
|
32
|
+
class LSI
|
33
|
+
|
34
|
+
include Enumerable
|
35
|
+
|
36
|
+
DEFAULT_EPSILON = Float::EPSILON * 10
|
37
|
+
|
38
|
+
def self.each_norm(items, options = {}, &block)
|
39
|
+
lsi = new(items)
|
40
|
+
lsi.each_norm(nil, options, &block) if lsi.build
|
41
|
+
end
|
42
|
+
|
43
|
+
def initialize(items = {})
|
44
|
+
reset
|
45
|
+
items.each { |k, v| self[k] = v }
|
46
|
+
end
|
47
|
+
|
48
|
+
def [](key)
|
49
|
+
@hash[key]
|
50
|
+
end
|
51
|
+
|
52
|
+
def []=(key, value)
|
53
|
+
@hash[key] = Doc.new(key, value, @list)
|
54
|
+
end
|
55
|
+
|
56
|
+
def add(key, value = key)
|
57
|
+
self[key] = value
|
58
|
+
self
|
59
|
+
end
|
60
|
+
|
61
|
+
def <<(value)
|
62
|
+
add(value.object_id, value)
|
63
|
+
end
|
64
|
+
|
65
|
+
def size
|
66
|
+
@hash.size
|
67
|
+
end
|
68
|
+
|
69
|
+
def keys
|
70
|
+
@hash.keys
|
71
|
+
end
|
72
|
+
|
73
|
+
def docs
|
74
|
+
@hash.values
|
75
|
+
end
|
76
|
+
|
77
|
+
def each(&block)
|
78
|
+
@hash.each(&block)
|
79
|
+
end
|
80
|
+
|
81
|
+
# min:: minimum value to consider
|
82
|
+
# abs:: minimum absolute value to consider
|
83
|
+
# nul:: exclude null values (true or Float)
|
84
|
+
# new:: exclude original terms / only yield new ones
|
85
|
+
def each_norm(key = nil, options = {})
|
86
|
+
min, abs, nul, new = options.values_at(:min, :abs, :nul, :new)
|
87
|
+
nul = DEFAULT_EPSILON if nul == true
|
88
|
+
|
89
|
+
list = @invlist
|
90
|
+
|
91
|
+
(key ? [self[key]] : docs).each { |doc|
|
92
|
+
if doc && norm = doc.norm
|
93
|
+
i = 0
|
94
|
+
|
95
|
+
norm.each { |v|
|
96
|
+
yield doc, list[i], v unless (min && v < min) ||
|
97
|
+
(abs && v.abs < abs) ||
|
98
|
+
(nul && v.abs < nul) ||
|
99
|
+
(new && doc.include?(i))
|
100
|
+
i += 1
|
101
|
+
}
|
102
|
+
end
|
103
|
+
}
|
104
|
+
end
|
105
|
+
|
106
|
+
def related(key, num = 5)
|
107
|
+
if doc = self[key] and norm = doc.norm
|
108
|
+
a = []; norm *= -1
|
109
|
+
each { |k, v| a << [norm * v.norm.col, k] unless k == key }
|
110
|
+
a.sort![0, num].map! { |_, k| k }
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def build(cutoff = 0.75)
|
115
|
+
build!(docs, @list, cutoff) if size > 1
|
116
|
+
end
|
117
|
+
|
118
|
+
def reset
|
119
|
+
@hash, @list, @invlist = {}, Hash.new { |h, k| h[k] = h.size }, {}
|
120
|
+
end
|
121
|
+
|
122
|
+
private
|
123
|
+
|
124
|
+
def build!(docs, list, cutoff)
|
125
|
+
u, v, s = GSL::Matrix.alloc(*vectors(docs, list)).trans.SV_decomp
|
126
|
+
reduce(u, v, cutoff(s, cutoff), docs)
|
127
|
+
size
|
128
|
+
end
|
129
|
+
|
130
|
+
def vectors(docs, list)
|
131
|
+
@invlist, size = list.invert, list.size
|
132
|
+
docs.map { |doc| transform(doc.raw_vector(size)) }
|
133
|
+
end
|
134
|
+
|
135
|
+
# FIXME: "first-order association transform" ???
|
136
|
+
def transform(vec, q = 0)
|
137
|
+
return vec #unless (sum = vec.sum) > 1
|
138
|
+
|
139
|
+
vec.each { |v| q -= (w = v / sum) * Math.log(w) if v > 0 }
|
140
|
+
vec.map! { |v| Math.log(v + 1) / q }
|
141
|
+
end
|
142
|
+
|
143
|
+
def cutoff(s, c)
|
144
|
+
w, i = s.sort[-(s.size * c).round], 0
|
145
|
+
s.each { |v| s[i] = 0 if v < w; i += 1 }
|
146
|
+
s
|
147
|
+
end
|
148
|
+
|
149
|
+
def reduce(u, v, s, d, i = -1)
|
150
|
+
(u * GSL::Matrix.diagonal(s) * v.trans).each_col { |c|
|
151
|
+
d[i += 1].vector = c.row
|
152
|
+
}
|
153
|
+
end
|
154
|
+
|
155
|
+
class Doc
|
156
|
+
|
157
|
+
TOKEN_RE = %r{\s+}
|
158
|
+
|
159
|
+
def initialize(key, value, list)
|
160
|
+
@key = key
|
161
|
+
@map = !value.is_a?(Hash) ? build_hash(value, list) :
|
162
|
+
value.inject({}) { |h, (k, v)| h[list[k]] = v; h }
|
163
|
+
end
|
164
|
+
|
165
|
+
attr_reader :key, :vector, :norm
|
166
|
+
|
167
|
+
def raw_vector(size)
|
168
|
+
vec = GSL::Vector.alloc(size)
|
169
|
+
@map.each { |k, v| vec[k] = v }
|
170
|
+
vec
|
171
|
+
end
|
172
|
+
|
173
|
+
def vector=(vec)
|
174
|
+
@vector, @norm = vec, vec.normalize
|
175
|
+
end
|
176
|
+
|
177
|
+
def include?(k)
|
178
|
+
@map.include?(k)
|
179
|
+
end
|
180
|
+
|
181
|
+
private
|
182
|
+
|
183
|
+
def build_hash(value, list, hash = Hash.new(0))
|
184
|
+
build_array(value).each { |i| hash[list[i]] += 1 }
|
185
|
+
hash
|
186
|
+
end
|
187
|
+
|
188
|
+
def build_array(value, re = TOKEN_RE)
|
189
|
+
value = value.read if value.respond_to?(:read)
|
190
|
+
value = value.split(re) if value.respond_to?(:split)
|
191
|
+
value
|
192
|
+
end
|
193
|
+
|
194
|
+
end
|
195
|
+
|
196
|
+
end
|
197
|
+
|
198
|
+
end
|
data/lib/nuggets/midos.rb
CHANGED
@@ -42,9 +42,22 @@ module Nuggets
|
|
42
42
|
# Line break indicator
|
43
43
|
DEFAULT_NL = '^'
|
44
44
|
|
45
|
-
|
46
|
-
|
47
|
-
|
45
|
+
# Default encoding for ::parse_file.
|
46
|
+
DEFAULT_ENCODING = 'iso-8859-1'
|
47
|
+
|
48
|
+
class << self
|
49
|
+
|
50
|
+
def parse(input, *args, &block)
|
51
|
+
parser = new(*args).parse(input, &block)
|
52
|
+
block_given? ? parser : parser.records
|
53
|
+
end
|
54
|
+
|
55
|
+
def parse_file(file, encoding = nil, *args, &block)
|
56
|
+
File.open(file, :encoding => encoding || DEFAULT_ENCODING) { |input|
|
57
|
+
parse(input, *args, &block)
|
58
|
+
}
|
59
|
+
end
|
60
|
+
|
48
61
|
end
|
49
62
|
|
50
63
|
def initialize(options = {})
|
data/lib/nuggets/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-nuggets
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jens Wille
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2013-
|
11
|
+
date: 2013-06-07 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Some extensions to the Ruby programming language.
|
14
14
|
email: jens.wille@gmail.com
|
@@ -23,6 +23,8 @@ files:
|
|
23
23
|
- lib/nuggets/all.rb
|
24
24
|
- lib/nuggets/all_mixins.rb
|
25
25
|
- lib/nuggets/ansicolor2css.rb
|
26
|
+
- lib/nuggets/argv/option.rb
|
27
|
+
- lib/nuggets/argv/option_mixin.rb
|
26
28
|
- lib/nuggets/array/boost.rb
|
27
29
|
- lib/nuggets/array/boost_mixin.rb
|
28
30
|
- lib/nuggets/array/combination.rb
|
@@ -105,6 +107,7 @@ files:
|
|
105
107
|
- lib/nuggets/log_parser.rb
|
106
108
|
- lib/nuggets/log_parser/apache.rb
|
107
109
|
- lib/nuggets/log_parser/rails.rb
|
110
|
+
- lib/nuggets/lsi.rb
|
108
111
|
- lib/nuggets/midos.rb
|
109
112
|
- lib/nuggets/mysql.rb
|
110
113
|
- lib/nuggets/net/success.rb
|
@@ -224,7 +227,7 @@ rdoc_options:
|
|
224
227
|
- --line-numbers
|
225
228
|
- --all
|
226
229
|
- --title
|
227
|
-
- ruby-nuggets Application documentation (v0.9.
|
230
|
+
- ruby-nuggets Application documentation (v0.9.4)
|
228
231
|
- --main
|
229
232
|
- README
|
230
233
|
require_paths:
|