ruby-nuggets 0.9.7 → 0.9.8.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README +5 -5
- data/Rakefile +11 -1
- data/lib/nuggets/cli.rb +6 -242
- data/lib/nuggets/hash/zip.rb +5 -0
- data/lib/nuggets/{midos/reader.rb → hash/zip_mixin.rb} +101 -58
- data/lib/nuggets/lsi.rb +6 -277
- data/lib/nuggets/midos.rb +6 -89
- data/lib/nuggets/version.rb +2 -2
- data/spec/nuggets/object/singleton_class_spec.rb +1 -1
- metadata +76 -7
- data/lib/nuggets/midos/base.rb +0 -81
- data/lib/nuggets/midos/writer.rb +0 -252
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 20e3a6fc4cdbc194e6fae2ce281cba22be27d19b
|
4
|
+
data.tar.gz: f96c5db576f411f940de1effc136ba022699a13f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d6b4aa7f85cb7cd2ca92ffe24cac338dcb45dd7264b63e5beb855a7d62b45da42c4779111164a08f6bea6a109c71495e4363c92b2f57cbb3f8b15ef7e8856f78
|
7
|
+
data.tar.gz: aff5162458b1fa163a0a8142ddf16720ec0a08c975535d3e2a1f0c312015e0979197a25451aba6807a08c44463bbfdf344f2b582498393a6d59e2e2bb6581546
|
data/README
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
== VERSION
|
4
4
|
|
5
|
-
This documentation refers to ruby-nuggets version 0.9.
|
5
|
+
This documentation refers to ruby-nuggets version 0.9.8
|
6
6
|
|
7
7
|
|
8
8
|
== DESCRIPTION
|
@@ -33,10 +33,10 @@ cause other libraries to misbehave. Use at your own risk!
|
|
33
33
|
|
34
34
|
== LINKS
|
35
35
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
36
|
+
Documentation:: https://blackwinter.github.io/ruby-nuggets/
|
37
|
+
Source code:: https://github.com/blackwinter/ruby-nuggets
|
38
|
+
RubyGem:: https://rubygems.org/gems/ruby-nuggets
|
39
|
+
Travis CI:: https://travis-ci.org/blackwinter/ruby-nuggets
|
40
40
|
|
41
41
|
|
42
42
|
== AUTHORS
|
data/Rakefile
CHANGED
@@ -12,7 +12,17 @@ begin
|
|
12
12
|
:email => %q{jens.wille@gmail.com},
|
13
13
|
:license => %q{AGPL-3.0},
|
14
14
|
:homepage => :blackwinter,
|
15
|
-
:dependencies => %w[]
|
15
|
+
:dependencies => %w[],
|
16
|
+
|
17
|
+
:development_dependencies => [
|
18
|
+
#'amatch', # enumerable/agrep
|
19
|
+
'mime-types', # content_type
|
20
|
+
'open4', # ruby
|
21
|
+
#'rbzip2', # rdf/compression
|
22
|
+
#'rdf', # rdf/{prefix,uri,compression}
|
23
|
+
#'rdf-turtle', # rdf/turtle
|
24
|
+
#'ruby-filemagic' # content_type
|
25
|
+
]
|
16
26
|
}
|
17
27
|
}}
|
18
28
|
rescue LoadError => err
|
data/lib/nuggets/cli.rb
CHANGED
@@ -1,244 +1,8 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
# A component of ruby-nuggets, some extensions to the Ruby programming #
|
5
|
-
# language. #
|
6
|
-
# #
|
7
|
-
# Copyright (C) 2007-2011 Jens Wille #
|
8
|
-
# #
|
9
|
-
# Authors: #
|
10
|
-
# Jens Wille <jens.wille@gmail.com> #
|
11
|
-
# #
|
12
|
-
# ruby-nuggets is free software; you can redistribute it and/or modify it #
|
13
|
-
# under the terms of the GNU Affero General Public License as published by #
|
14
|
-
# the Free Software Foundation; either version 3 of the License, or (at your #
|
15
|
-
# option) any later version. #
|
16
|
-
# #
|
17
|
-
# ruby-nuggets is distributed in the hope that it will be useful, but WITHOUT #
|
18
|
-
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
|
19
|
-
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License #
|
20
|
-
# for more details. #
|
21
|
-
# #
|
22
|
-
# You should have received a copy of the GNU Affero General Public License #
|
23
|
-
# along with ruby-nuggets. If not, see <http://www.gnu.org/licenses/>. #
|
24
|
-
# #
|
25
|
-
###############################################################################
|
26
|
-
#++
|
1
|
+
begin
|
2
|
+
require 'cyclops'
|
3
|
+
module Nuggets; CLI = ::Cyclops; end
|
27
4
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
require 'highline'
|
32
|
-
|
33
|
-
module Nuggets
|
34
|
-
class CLI
|
35
|
-
|
36
|
-
class << self
|
37
|
-
|
38
|
-
def usage(prog)
|
39
|
-
"Usage: #{prog} [-h|--help] [options]"
|
40
|
-
end
|
41
|
-
|
42
|
-
def version
|
43
|
-
parent_const_get(:VERSION)
|
44
|
-
end
|
45
|
-
|
46
|
-
def defaults
|
47
|
-
{}
|
48
|
-
end
|
49
|
-
|
50
|
-
def execute(*args)
|
51
|
-
new.execute(*args)
|
52
|
-
end
|
53
|
-
|
54
|
-
private
|
55
|
-
|
56
|
-
def parent_const_get(const, range = 0...-1)
|
57
|
-
name.split('::').inject([::Object]) { |memo, name|
|
58
|
-
memo << memo.last.const_get(name)
|
59
|
-
}.reverse[range].each { |mod|
|
60
|
-
return mod.const_get(const) if mod.const_defined?(const)
|
61
|
-
}
|
62
|
-
|
63
|
-
raise ::NameError, "uninitialized constant #{self}::#{const}"
|
64
|
-
end
|
65
|
-
|
66
|
-
end
|
67
|
-
|
68
|
-
attr_reader :options, :config, :defaults
|
69
|
-
attr_reader :stdin, :stdout, :stderr
|
70
|
-
|
71
|
-
attr_accessor :prog
|
72
|
-
|
73
|
-
def initialize(defaults = nil, *args)
|
74
|
-
@defaults, @prog = defaults || self.class.defaults, $0
|
75
|
-
|
76
|
-
init(*args)
|
77
|
-
|
78
|
-
# prevent backtrace on ^C
|
79
|
-
trap(:INT) { exit 130 }
|
80
|
-
end
|
81
|
-
|
82
|
-
def progname
|
83
|
-
::File.basename(prog)
|
84
|
-
end
|
85
|
-
|
86
|
-
def usage
|
87
|
-
self.class.usage(prog)
|
88
|
-
end
|
89
|
-
|
90
|
-
def version
|
91
|
-
self.class.version
|
92
|
-
end
|
93
|
-
|
94
|
-
def execute(arguments = ::ARGV, *inouterr)
|
95
|
-
reset(*inouterr)
|
96
|
-
parse_options(arguments)
|
97
|
-
run(arguments)
|
98
|
-
rescue => err
|
99
|
-
raise if $VERBOSE
|
100
|
-
abort "#{err.backtrace.first}: #{err} (#{err.class})"
|
101
|
-
ensure
|
102
|
-
options.each_value { |value|
|
103
|
-
value.close if value.is_a?(::Zlib::GzipWriter)
|
104
|
-
}
|
105
|
-
end
|
106
|
-
|
107
|
-
def run(arguments)
|
108
|
-
raise ::NotImplementedError, 'must be implemented by subclass'
|
109
|
-
end
|
110
|
-
|
111
|
-
def reset(stdin = ::STDIN, stdout = ::STDOUT, stderr = ::STDERR)
|
112
|
-
@stdin, @stdout, @stderr = stdin, stdout, stderr
|
113
|
-
@options, @config = {}, {}
|
114
|
-
end
|
115
|
-
|
116
|
-
private
|
117
|
-
|
118
|
-
def init(*args)
|
119
|
-
reset
|
120
|
-
end
|
121
|
-
|
122
|
-
def ask(question, &block)
|
123
|
-
::HighLine.new(stdin, stdout).ask(question, &block)
|
124
|
-
end
|
125
|
-
|
126
|
-
def puts(*msg)
|
127
|
-
stdout.puts(*msg)
|
128
|
-
end
|
129
|
-
|
130
|
-
def warn(*msg)
|
131
|
-
stderr.puts(*msg)
|
132
|
-
end
|
133
|
-
|
134
|
-
def quit(msg = nil, include_usage = msg != false)
|
135
|
-
out = []
|
136
|
-
|
137
|
-
out << "#{progname}: #{msg}" if msg
|
138
|
-
out << usage if include_usage
|
139
|
-
|
140
|
-
abort out.any? && out.join("\n\n")
|
141
|
-
end
|
142
|
-
|
143
|
-
def abort(msg = nil, status = 1)
|
144
|
-
warn(msg) if msg
|
145
|
-
exit(status)
|
146
|
-
end
|
147
|
-
|
148
|
-
def shut(msg = nil, status = 0)
|
149
|
-
puts(msg) if msg
|
150
|
-
exit(status)
|
151
|
-
end
|
152
|
-
|
153
|
-
def exit(status = 0)
|
154
|
-
::Kernel.exit(status)
|
155
|
-
end
|
156
|
-
|
157
|
-
def open_file_or_std(file, write = false)
|
158
|
-
if file == '-'
|
159
|
-
write ? stdout : stdin
|
160
|
-
else
|
161
|
-
gz = file =~ /\.gz\z/i
|
162
|
-
|
163
|
-
if write
|
164
|
-
gz ? ::Zlib::GzipWriter.open(file) : ::File.open(file, 'w')
|
165
|
-
else
|
166
|
-
quit "No such file: #{file}" unless ::File.readable?(file)
|
167
|
-
(gz ? ::Zlib::GzipReader : ::File).open(file)
|
168
|
-
end
|
169
|
-
end
|
170
|
-
end
|
171
|
-
|
172
|
-
def load_config(file = options[:config] || default = defaults[:config])
|
173
|
-
return unless file
|
174
|
-
|
175
|
-
if ::File.readable?(file)
|
176
|
-
@config = ::YAML.load_file(file)
|
177
|
-
else
|
178
|
-
quit "No such file: #{file}" unless default
|
179
|
-
end
|
180
|
-
end
|
181
|
-
|
182
|
-
def merge_config(args = [config, defaults])
|
183
|
-
args.each { |hash| hash && hash.each { |key, value|
|
184
|
-
options[key] = value unless options.key?(key)
|
185
|
-
} }
|
186
|
-
end
|
187
|
-
|
188
|
-
def parse_options(arguments)
|
189
|
-
option_parser.parse!(arguments)
|
190
|
-
|
191
|
-
load_config
|
192
|
-
merge_config
|
193
|
-
end
|
194
|
-
|
195
|
-
def option_parser
|
196
|
-
::OptionParser.new { |opts|
|
197
|
-
opts.banner = usage
|
198
|
-
|
199
|
-
pre_opts(opts)
|
200
|
-
|
201
|
-
opts.separator ''
|
202
|
-
opts.separator 'Options:'
|
203
|
-
|
204
|
-
opts(opts)
|
205
|
-
|
206
|
-
opts.separator ''
|
207
|
-
opts.separator 'Generic options:'
|
208
|
-
|
209
|
-
generic_opts(opts)
|
210
|
-
post_opts(opts)
|
211
|
-
}.extend(Nuggets::CLI::OptionParserExtension)
|
212
|
-
end
|
213
|
-
|
214
|
-
def pre_opts(opts)
|
215
|
-
end
|
216
|
-
|
217
|
-
def opts(opts)
|
218
|
-
end
|
219
|
-
|
220
|
-
def generic_opts(opts)
|
221
|
-
opts.on('-h', '--help', 'Print this help message and exit') {
|
222
|
-
shut opts
|
223
|
-
}
|
224
|
-
|
225
|
-
opts.on('--version', 'Print program version and exit') {
|
226
|
-
shut "#{progname} v#{version}"
|
227
|
-
}
|
228
|
-
end
|
229
|
-
|
230
|
-
def post_opts(opts)
|
231
|
-
end
|
232
|
-
|
233
|
-
module OptionParserExtension
|
234
|
-
|
235
|
-
KEY_POOL = ('A'..'Z').to_a + ('a'..'z').to_a + ('0'..'9').to_a
|
236
|
-
|
237
|
-
def keys
|
238
|
-
{ :used => keys = top.short.keys, :free => KEY_POOL - keys }
|
239
|
-
end
|
240
|
-
|
241
|
-
end
|
242
|
-
|
243
|
-
end
|
5
|
+
warn "#{__FILE__}: Nuggets::CLI is deprecated, use Cyclops instead."
|
6
|
+
rescue LoadError => err
|
7
|
+
warn "#{__FILE__}: Nuggets::CLI is no longer available; install `cyclops' instead. (#{err})"
|
244
8
|
end
|
@@ -1,5 +1,3 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
1
|
#--
|
4
2
|
###############################################################################
|
5
3
|
# #
|
@@ -27,89 +25,134 @@
|
|
27
25
|
###############################################################################
|
28
26
|
#++
|
29
27
|
|
28
|
+
require 'zlib'
|
29
|
+
|
30
30
|
module Nuggets
|
31
|
-
|
32
|
-
|
31
|
+
class Hash
|
32
|
+
module ZipMixin
|
33
|
+
|
34
|
+
def zip(*args, &block)
|
35
|
+
ZipHash.new(*args, &block)
|
36
|
+
end
|
33
37
|
|
34
|
-
|
38
|
+
def zipval(*args, &block)
|
39
|
+
ZipValHash.new(*args, &block)
|
40
|
+
end
|
35
41
|
|
36
|
-
|
42
|
+
def zipkey(*args, &block)
|
43
|
+
ZipKeyHash.new(*args, &block)
|
44
|
+
end
|
37
45
|
|
38
|
-
|
39
|
-
|
40
|
-
|
46
|
+
class ZipHash < ::Hash
|
47
|
+
|
48
|
+
def [](key)
|
49
|
+
unzipval(super(zipkey(key)))
|
41
50
|
end
|
42
51
|
|
43
|
-
def
|
44
|
-
|
52
|
+
def []=(key, value)
|
53
|
+
super(zipkey(key), zipval(value))
|
45
54
|
end
|
46
55
|
|
47
|
-
|
56
|
+
def fetch(key, *args)
|
57
|
+
unzipval(super(zipkey(key), *args))
|
58
|
+
end
|
48
59
|
|
49
|
-
|
60
|
+
def store(key, value)
|
61
|
+
super(zipkey(key), zipval(value))
|
62
|
+
end
|
50
63
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
64
|
+
private
|
65
|
+
|
66
|
+
def zipval(value)
|
67
|
+
value.is_a?(ZipVal) ? value : ZipVal.new(value)
|
68
|
+
end
|
69
|
+
|
70
|
+
def unzipval(value)
|
71
|
+
value.is_a?(ZipVal) ? value.to_s : value
|
72
|
+
end
|
73
|
+
|
74
|
+
def zipkey(key)
|
75
|
+
key.is_a?(ZipKey) ? key : ZipKey.new(key)
|
76
|
+
end
|
77
|
+
|
78
|
+
def unzipkey(key)
|
79
|
+
key.is_a?(ZipKey) ? key.to_s : key
|
80
|
+
end
|
55
81
|
|
56
|
-
def vs=(vs)
|
57
|
-
@vs = vs.is_a?(::Regexp) ? vs : %r{\s*#{::Regexp.escape(vs)}\s*}
|
58
82
|
end
|
59
83
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
84
|
+
class ZipValHash < ZipHash
|
85
|
+
|
86
|
+
private
|
87
|
+
|
88
|
+
def zipkey(key)
|
89
|
+
key
|
65
90
|
end
|
66
91
|
|
67
|
-
|
68
|
-
|
92
|
+
def unzipkey(key)
|
93
|
+
key
|
94
|
+
end
|
69
95
|
|
70
|
-
|
71
|
-
|
96
|
+
end
|
97
|
+
|
98
|
+
class ZipKeyHash < ZipHash
|
72
99
|
|
73
|
-
|
74
|
-
block[key ? id : auto_id.call, record]
|
75
|
-
id, record = nil, {}
|
76
|
-
else
|
77
|
-
k, v = line.split(fs, 2)
|
100
|
+
private
|
78
101
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
else
|
83
|
-
v.gsub!(nl, "\n")
|
84
|
-
v = v.split(vs) if v.index(vs)
|
85
|
-
end
|
102
|
+
def zipval(value)
|
103
|
+
value
|
104
|
+
end
|
86
105
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
}
|
106
|
+
def unzipval(value)
|
107
|
+
value
|
108
|
+
end
|
91
109
|
|
92
|
-
self
|
93
110
|
end
|
94
111
|
|
95
|
-
|
112
|
+
class ZipVal
|
96
113
|
|
97
|
-
|
98
|
-
return block unless $VERBOSE && k = @key
|
114
|
+
include Comparable
|
99
115
|
|
100
|
-
|
116
|
+
def initialize(value)
|
117
|
+
@value = zip(value)
|
118
|
+
end
|
101
119
|
|
102
|
-
|
103
|
-
|
104
|
-
|
120
|
+
def <=>(other)
|
121
|
+
to_s <=> other.to_s if self.class.equal?(other.class)
|
122
|
+
end
|
105
123
|
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
124
|
+
def to_s
|
125
|
+
unzip(@value)
|
126
|
+
end
|
127
|
+
|
128
|
+
def inspect
|
129
|
+
!((s = to_s).length > 64 || s.include?($/)) ? to_s :
|
130
|
+
'#<%s:0x%x length=%p>' % [self.class, object_id, @value.length]
|
131
|
+
end
|
132
|
+
|
133
|
+
def hash
|
134
|
+
to_s.hash
|
135
|
+
end
|
136
|
+
|
137
|
+
def eql?(other)
|
138
|
+
(self <=> other) == 0
|
139
|
+
end
|
140
|
+
|
141
|
+
alias_method :==, :eql?
|
142
|
+
|
143
|
+
private
|
144
|
+
|
145
|
+
def zip(string)
|
146
|
+
Zlib::Deflate.deflate(string)
|
147
|
+
end
|
148
|
+
|
149
|
+
def unzip(string)
|
150
|
+
Zlib::Inflate.inflate(string)
|
151
|
+
end
|
152
|
+
|
153
|
+
end
|
110
154
|
|
111
|
-
|
112
|
-
}
|
155
|
+
class ZipKey < ZipVal
|
113
156
|
end
|
114
157
|
|
115
158
|
end
|
data/lib/nuggets/lsi.rb
CHANGED
@@ -1,279 +1,8 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
# A component of ruby-nuggets, some extensions to the Ruby programming #
|
5
|
-
# language. #
|
6
|
-
# #
|
7
|
-
# Copyright (C) 2007-2013 Jens Wille #
|
8
|
-
# #
|
9
|
-
# Authors: #
|
10
|
-
# Jens Wille <jens.wille@gmail.com> #
|
11
|
-
# #
|
12
|
-
# ruby-nuggets is free software; you can redistribute it and/or modify it #
|
13
|
-
# under the terms of the GNU Affero General Public License as published by #
|
14
|
-
# the Free Software Foundation; either version 3 of the License, or (at your #
|
15
|
-
# option) any later version. #
|
16
|
-
# #
|
17
|
-
# ruby-nuggets is distributed in the hope that it will be useful, but WITHOUT #
|
18
|
-
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
|
19
|
-
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License #
|
20
|
-
# for more details. #
|
21
|
-
# #
|
22
|
-
# You should have received a copy of the GNU Affero General Public License #
|
23
|
-
# along with ruby-nuggets. If not, see <http://www.gnu.org/licenses/>. #
|
24
|
-
# #
|
25
|
-
###############################################################################
|
26
|
-
#++
|
27
|
-
|
28
|
-
require 'forwardable'
|
29
|
-
require 'gsl'
|
30
|
-
|
31
|
-
module Nuggets
|
32
|
-
|
33
|
-
class LSI
|
34
|
-
|
35
|
-
include ::Enumerable
|
36
|
-
|
37
|
-
extend ::Forwardable
|
38
|
-
|
39
|
-
DEFAULT_EPSILON = ::Float::EPSILON * 10
|
40
|
-
|
41
|
-
DEFAULT_PRECISION = 2
|
42
|
-
|
43
|
-
DEFAULT_TRANSFORM = :tfidf
|
44
|
-
|
45
|
-
DEFAULT_CUTOFF = 0.75
|
46
|
-
|
47
|
-
class << self
|
48
|
-
|
49
|
-
def build(items, options = {})
|
50
|
-
lsi = new(items)
|
51
|
-
lsi if lsi.build(options)
|
52
|
-
end
|
53
|
-
|
54
|
-
def each_norm(items, options = {}, build_options = {}, &block)
|
55
|
-
lsi = new(items)
|
56
|
-
lsi.each_norm(nil, options, &block) if lsi.build(build_options)
|
57
|
-
end
|
58
|
-
|
59
|
-
end
|
60
|
-
|
61
|
-
def initialize(items = {})
|
62
|
-
reset
|
63
|
-
items.each { |k, v| self[k] = v || k }
|
64
|
-
end
|
65
|
-
|
66
|
-
def_delegators :@hash, :[], :each, :include?, :key, :keys, :size
|
67
|
-
|
68
|
-
def_delegator :@hash, :values, :docs
|
69
|
-
def_delegator :@hash, :values_at, :docs_at
|
70
|
-
|
71
|
-
def_delegator :@list, :keys, :terms
|
72
|
-
|
73
|
-
alias_method :doc, :[]
|
74
|
-
|
75
|
-
def []=(key, value)
|
76
|
-
@hash[key] = Doc.new(key, value, @list, @freq)
|
77
|
-
end
|
78
|
-
|
79
|
-
def add(key, value = key)
|
80
|
-
self[key] = value
|
81
|
-
self
|
82
|
-
end
|
83
|
-
|
84
|
-
def <<(value)
|
85
|
-
add(value.object_id, value)
|
86
|
-
end
|
87
|
-
|
88
|
-
# min:: minimum value to consider
|
89
|
-
# abs:: minimum absolute value to consider
|
90
|
-
# nul:: exclude null values (true or Float)
|
91
|
-
# new:: exclude original terms / only yield new ones
|
92
|
-
def each_term(key = nil, options = {})
|
93
|
-
return enum_for(:each_term, key, options) unless block_given?
|
94
|
-
|
95
|
-
min, abs, nul, new = options.values_at(:min, :abs, :nul, :new)
|
96
|
-
nul = DEFAULT_EPSILON if nul == true
|
97
|
-
|
98
|
-
list, norm = @invlist, options[:norm]
|
99
|
-
|
100
|
-
(key ? [self[key]] : docs).each { |doc|
|
101
|
-
if doc && vec = norm ? doc.norm : doc.vector
|
102
|
-
vec.enum_for(:each).with_index { |v, i|
|
103
|
-
yield doc, list[i], v unless v.nan? ||
|
104
|
-
(min && v < min) ||
|
105
|
-
(abs && v.abs < abs) ||
|
106
|
-
(nul && v.abs < nul) ||
|
107
|
-
(new && doc.include?(i))
|
108
|
-
}
|
109
|
-
end
|
110
|
-
}
|
111
|
-
end
|
112
|
-
|
113
|
-
def each_norm(key = nil, options = {}, &block)
|
114
|
-
each_term(key, options.merge(:norm => true), &block)
|
115
|
-
end
|
116
|
-
|
117
|
-
def related(key, num = 5)
|
118
|
-
if doc = self[key] and norm = doc.norm
|
119
|
-
temp = sort_by { |k, v| -norm * v.norm.col }
|
120
|
-
temp.map! { |k,| k }.delete(key)
|
121
|
-
temp[0, num]
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
def related_score(key, num = 5)
|
126
|
-
if doc = self[key] and norm = doc.norm
|
127
|
-
temp = map { |k, v| [k, norm * v.norm.col] }.sort_by { |_, i| -i }
|
128
|
-
temp.delete(temp.assoc(key))
|
129
|
-
temp[0, num]
|
130
|
-
end
|
131
|
-
end
|
132
|
-
|
133
|
-
def build(options = {})
|
134
|
-
build!(docs, @list, options.is_a?(::Hash) ?
|
135
|
-
options : { :cutoff => options }) if size > 1
|
136
|
-
end
|
137
|
-
|
138
|
-
def reset
|
139
|
-
@hash, @list, @freq, @invlist =
|
140
|
-
{}, ::Hash.new { |h, k| h[k] = h.size }, ::Hash.new(0), {}
|
141
|
-
end
|
142
|
-
|
143
|
-
def inspect
|
144
|
-
'%s@%d/%d' % [self.class, size, @list.size]
|
145
|
-
end
|
146
|
-
|
147
|
-
def to_a(norm = true)
|
148
|
-
(norm ? map { |_, doc| doc.norm.to_a } :
|
149
|
-
map { |_, doc| doc.vector.to_a }).transpose
|
150
|
-
end
|
151
|
-
|
152
|
-
private
|
153
|
-
|
154
|
-
def build!(docs, list, options)
|
155
|
-
Doc.transform = options.fetch(:transform, DEFAULT_TRANSFORM)
|
156
|
-
|
157
|
-
@invlist = list.invert
|
158
|
-
|
159
|
-
# TODO: GSL::ERROR::EUNIMPL: Ruby/GSL error code 24, svd of
|
160
|
-
# MxN matrix, M<N, is not implemented (file svd.c, line 61)
|
161
|
-
u, v, s = matrix(docs, list.size, size = docs.size).SV_decomp
|
162
|
-
|
163
|
-
(u * reduce(s, options.fetch(:cutoff, DEFAULT_CUTOFF)) * v.trans).
|
164
|
-
enum_for(:each_col).with_index { |c, i| docs[i].vector = c.row }
|
165
|
-
|
166
|
-
size
|
167
|
-
end
|
168
|
-
|
169
|
-
def matrix(d = docs, m = @list.size, n = d.size)
|
170
|
-
x = ::GSL::Matrix.alloc(m, n)
|
171
|
-
d.each_with_index { |i, j| x.set_col(j, i.transformed_vector(m, n)) }
|
172
|
-
x
|
173
|
-
end
|
174
|
-
|
175
|
-
# k == nil:: keep all
|
176
|
-
# k >= 1:: keep this many
|
177
|
-
# k < 1:: keep (at most) this proportion
|
178
|
-
def reduce(s, k, m = s.size)
|
179
|
-
if k && k < m
|
180
|
-
k > 0 ? s[k = (k < 1 ? m * k : k).floor, m - k] = 0 : s.set_zero
|
181
|
-
end
|
182
|
-
|
183
|
-
s.to_m_diagonal
|
184
|
-
end
|
185
|
-
|
186
|
-
class Doc
|
187
|
-
|
188
|
-
include ::Enumerable
|
189
|
-
|
190
|
-
extend ::Forwardable
|
191
|
-
|
192
|
-
TOKEN_RE = %r{\s+}
|
193
|
-
|
194
|
-
class << self
|
195
|
-
|
196
|
-
attr_reader :transform
|
197
|
-
|
198
|
-
def transform=(transform)
|
199
|
-
method = :transformed_vector
|
200
|
-
|
201
|
-
case transform
|
202
|
-
when ::Proc then define_method(method, &transform)
|
203
|
-
when ::UnboundMethod then define_method(method, transform)
|
204
|
-
else alias_method(method, "#{transform ||= :raw}_vector")
|
205
|
-
end
|
206
|
-
|
207
|
-
@transform = transform.to_sym
|
208
|
-
end
|
209
|
-
|
210
|
-
end
|
211
|
-
|
212
|
-
def initialize(key, value, list, freq)
|
213
|
-
@key, @list, @freq, @total = key, list, freq, 1
|
214
|
-
|
215
|
-
@map = !value.is_a?(::Hash) ? build_hash(value, list) :
|
216
|
-
value.inject({}) { |h, (k, v)| h[list[k]] = v; h }
|
217
|
-
|
218
|
-
@map.each_key { |k| freq[k] += 1 }
|
219
|
-
|
220
|
-
self.vector = raw_vector
|
221
|
-
end
|
222
|
-
|
223
|
-
attr_reader :key, :vector, :norm
|
224
|
-
|
225
|
-
def_delegators :@map, :each, :include?
|
226
|
-
|
227
|
-
def_delegator :raw_vector, :sum, :size
|
228
|
-
|
229
|
-
def raw_vector(size = @list.size, *)
|
230
|
-
vec = ::GSL::Vector.calloc(size)
|
231
|
-
each { |k, v| vec[k] = v }
|
232
|
-
vec
|
233
|
-
end
|
234
|
-
|
235
|
-
# TODO: "first-order association transform" ???
|
236
|
-
def foat_vector(*args)
|
237
|
-
vec, q = raw_vector(*args), 0
|
238
|
-
return vec unless (s = vec.sum) > 1
|
239
|
-
|
240
|
-
vec.each { |v| q -= (w = v / s) * ::Math.log(w) if v > 0 }
|
241
|
-
vec.map { |v| ::Math.log(v + 1) / q }
|
242
|
-
end
|
243
|
-
|
244
|
-
def tfidf_vector(*args)
|
245
|
-
vec, f = raw_vector(*args), @freq
|
246
|
-
s, d = vec.sum, @total = args.fetch(1, @total).to_f
|
247
|
-
|
248
|
-
vec.enum_for(:map).with_index { |v, i|
|
249
|
-
v > 0 ? ::Math.log(d / f[i]) * v / s : v }
|
250
|
-
end
|
251
|
-
|
252
|
-
self.transform = DEFAULT_TRANSFORM
|
253
|
-
|
254
|
-
def vector=(vec)
|
255
|
-
@vector, @norm = vec, vec.normalize
|
256
|
-
end
|
257
|
-
|
258
|
-
def inspect
|
259
|
-
'%s@%p/%d' % [self.class, key, size]
|
260
|
-
end
|
261
|
-
|
262
|
-
private
|
263
|
-
|
264
|
-
def build_hash(value, list, hash = ::Hash.new(0))
|
265
|
-
build_enum(value).each { |i| hash[list[i]] += 1 }
|
266
|
-
hash
|
267
|
-
end
|
268
|
-
|
269
|
-
def build_enum(value, re = TOKEN_RE)
|
270
|
-
value = value.read if value.respond_to?(:read)
|
271
|
-
value = value.split(re) if value.respond_to?(:split)
|
272
|
-
value
|
273
|
-
end
|
274
|
-
|
275
|
-
end
|
276
|
-
|
277
|
-
end
|
1
|
+
begin
|
2
|
+
require 'lsi4r'
|
3
|
+
module Nuggets; LSI = ::Lsi4R; end
|
278
4
|
|
5
|
+
warn "#{__FILE__}: Nuggets::LSI is deprecated, use Lsi4R instead."
|
6
|
+
rescue LoadError => err
|
7
|
+
warn "#{__FILE__}: Nuggets::LSI is no longer available; install `lsi4r' instead. (#{err})"
|
279
8
|
end
|
data/lib/nuggets/midos.rb
CHANGED
@@ -1,91 +1,8 @@
|
|
1
|
-
|
1
|
+
begin
|
2
|
+
require 'midos'
|
3
|
+
module Nuggets; Midos = ::Midos; end
|
2
4
|
|
3
|
-
|
4
|
-
|
5
|
-
#
|
6
|
-
# A component of ruby-nuggets, some extensions to the Ruby programming #
|
7
|
-
# language. #
|
8
|
-
# #
|
9
|
-
# Copyright (C) 2007-2014 Jens Wille #
|
10
|
-
# #
|
11
|
-
# Authors: #
|
12
|
-
# Jens Wille <jens.wille@gmail.com> #
|
13
|
-
# #
|
14
|
-
# ruby-nuggets is free software; you can redistribute it and/or modify it #
|
15
|
-
# under the terms of the GNU Affero General Public License as published by #
|
16
|
-
# the Free Software Foundation; either version 3 of the License, or (at your #
|
17
|
-
# option) any later version. #
|
18
|
-
# #
|
19
|
-
# ruby-nuggets is distributed in the hope that it will be useful, but WITHOUT #
|
20
|
-
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
|
21
|
-
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License #
|
22
|
-
# for more details. #
|
23
|
-
# #
|
24
|
-
# You should have received a copy of the GNU Affero General Public License #
|
25
|
-
# along with ruby-nuggets. If not, see <http://www.gnu.org/licenses/>. #
|
26
|
-
# #
|
27
|
-
###############################################################################
|
28
|
-
#++
|
29
|
-
|
30
|
-
require 'nuggets/midos/base'
|
31
|
-
require 'nuggets/midos/reader'
|
32
|
-
require 'nuggets/midos/writer'
|
33
|
-
|
34
|
-
module Nuggets
|
35
|
-
module Midos
|
36
|
-
|
37
|
-
# Record separator
|
38
|
-
DEFAULT_RS = '&&&'
|
39
|
-
|
40
|
-
# Field separator
|
41
|
-
DEFAULT_FS = ':'
|
42
|
-
|
43
|
-
# Value separator
|
44
|
-
DEFAULT_VS = '|'
|
45
|
-
|
46
|
-
# Line break indicator
|
47
|
-
DEFAULT_NL = '^'
|
48
|
-
|
49
|
-
# Line ending
|
50
|
-
DEFAULT_LE = "\r\n"
|
51
|
-
|
52
|
-
# Default file encoding
|
53
|
-
DEFAULT_ENCODING = 'iso-8859-1'
|
54
|
-
|
55
|
-
class << self
|
56
|
-
|
57
|
-
def filter(source, target, source_options = {}, target_options = source_options)
|
58
|
-
writer, size = Writer.new(target_options.merge(:io => target)), 0
|
59
|
-
|
60
|
-
Reader.parse(source, source_options) { |*args|
|
61
|
-
writer << args and size += 1 if yield(*args)
|
62
|
-
}
|
63
|
-
|
64
|
-
size
|
65
|
-
end
|
66
|
-
|
67
|
-
def filter_file(source_file, target_file, source_options = {}, target_options = source_options, &block)
|
68
|
-
open_file(source_file, source_options) { |source|
|
69
|
-
open_file(target_file, target_options, 'w') { |target|
|
70
|
-
filter(source, target, source_options, target_options, &block)
|
71
|
-
}
|
72
|
-
}
|
73
|
-
end
|
74
|
-
|
75
|
-
def convert(*args)
|
76
|
-
filter(*args) { |*| true }
|
77
|
-
end
|
78
|
-
|
79
|
-
def convert_file(*args)
|
80
|
-
filter_file(*args) { |*| true }
|
81
|
-
end
|
82
|
-
|
83
|
-
def open_file(file, options = {}, mode = 'r', &block)
|
84
|
-
encoding = options[:encoding] ||= DEFAULT_ENCODING
|
85
|
-
::File.open(file, mode, :encoding => encoding, &block)
|
86
|
-
end
|
87
|
-
|
88
|
-
end
|
89
|
-
|
90
|
-
end
|
5
|
+
warn "#{__FILE__}: Nuggets::Midos is deprecated, use Midos instead."
|
6
|
+
rescue LoadError => err
|
7
|
+
warn "#{__FILE__}: Nuggets::Midos is no longer available; install `midos' instead. (#{err})"
|
91
8
|
end
|
data/lib/nuggets/version.rb
CHANGED
@@ -23,7 +23,7 @@ describe Object, 'when extended by', Nuggets::Object::SingletonClassMixin do
|
|
23
23
|
|
24
24
|
example do
|
25
25
|
nil.singleton_class.should == NilClass
|
26
|
-
NilClass.should be_a_singleton_class
|
26
|
+
#NilClass.should be_a_singleton_class
|
27
27
|
NilClass.singleton_object.should be_equal(nil)
|
28
28
|
end
|
29
29
|
|
metadata
CHANGED
@@ -1,15 +1,85 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-nuggets
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.8.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jens Wille
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
12
|
-
dependencies:
|
11
|
+
date: 2014-04-14 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: mime-types
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: open4
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: hen
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rspec
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
13
83
|
description: Some extensions to the Ruby programming language.
|
14
84
|
email: jens.wille@gmail.com
|
15
85
|
executables: []
|
@@ -99,6 +169,8 @@ files:
|
|
99
169
|
- lib/nuggets/hash/seen_mixin.rb
|
100
170
|
- lib/nuggets/hash/unroll.rb
|
101
171
|
- lib/nuggets/hash/unroll_mixin.rb
|
172
|
+
- lib/nuggets/hash/zip.rb
|
173
|
+
- lib/nuggets/hash/zip_mixin.rb
|
102
174
|
- lib/nuggets/i18n.rb
|
103
175
|
- lib/nuggets/integer/factorial.rb
|
104
176
|
- lib/nuggets/integer/length.rb
|
@@ -120,9 +192,6 @@ files:
|
|
120
192
|
- lib/nuggets/log_parser/rails.rb
|
121
193
|
- lib/nuggets/lsi.rb
|
122
194
|
- lib/nuggets/midos.rb
|
123
|
-
- lib/nuggets/midos/base.rb
|
124
|
-
- lib/nuggets/midos/reader.rb
|
125
|
-
- lib/nuggets/midos/writer.rb
|
126
195
|
- lib/nuggets/mysql.rb
|
127
196
|
- lib/nuggets/net/success.rb
|
128
197
|
- lib/nuggets/numeric/between.rb
|
@@ -240,7 +309,7 @@ metadata: {}
|
|
240
309
|
post_install_message:
|
241
310
|
rdoc_options:
|
242
311
|
- "--title"
|
243
|
-
- ruby-nuggets Application documentation (v0.9.
|
312
|
+
- ruby-nuggets Application documentation (v0.9.8.1)
|
244
313
|
- "--charset"
|
245
314
|
- UTF-8
|
246
315
|
- "--line-numbers"
|
data/lib/nuggets/midos/base.rb
DELETED
@@ -1,81 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
#--
|
4
|
-
###############################################################################
|
5
|
-
# #
|
6
|
-
# A component of ruby-nuggets, some extensions to the Ruby programming #
|
7
|
-
# language. #
|
8
|
-
# #
|
9
|
-
# Copyright (C) 2007-2014 Jens Wille #
|
10
|
-
# #
|
11
|
-
# Authors: #
|
12
|
-
# Jens Wille <jens.wille@gmail.com> #
|
13
|
-
# #
|
14
|
-
# ruby-nuggets is free software; you can redistribute it and/or modify it #
|
15
|
-
# under the terms of the GNU Affero General Public License as published by #
|
16
|
-
# the Free Software Foundation; either version 3 of the License, or (at your #
|
17
|
-
# option) any later version. #
|
18
|
-
# #
|
19
|
-
# ruby-nuggets is distributed in the hope that it will be useful, but WITHOUT #
|
20
|
-
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
|
21
|
-
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License #
|
22
|
-
# for more details. #
|
23
|
-
# #
|
24
|
-
# You should have received a copy of the GNU Affero General Public License #
|
25
|
-
# along with ruby-nuggets. If not, see <http://www.gnu.org/licenses/>. #
|
26
|
-
# #
|
27
|
-
###############################################################################
|
28
|
-
#++
|
29
|
-
|
30
|
-
module Nuggets
|
31
|
-
module Midos
|
32
|
-
class Base
|
33
|
-
|
34
|
-
class << self
|
35
|
-
|
36
|
-
private
|
37
|
-
|
38
|
-
def file_method(method, mode, file, options = {}, *args, &block)
|
39
|
-
Midos.open_file(file, options, mode) { |io|
|
40
|
-
args.unshift(options.merge(:io => io))
|
41
|
-
method ? send(method, *args, &block) : block[new(*args)]
|
42
|
-
}
|
43
|
-
end
|
44
|
-
|
45
|
-
def extract_options!(args)
|
46
|
-
args.last.is_a?(::Hash) ? args.pop : {}
|
47
|
-
end
|
48
|
-
|
49
|
-
end
|
50
|
-
|
51
|
-
def initialize(options = {}, &block)
|
52
|
-
self.key = options[:key]
|
53
|
-
|
54
|
-
self.rs = options[:rs] || DEFAULT_RS
|
55
|
-
self.fs = options[:fs] || DEFAULT_FS
|
56
|
-
self.vs = options[:vs] || DEFAULT_VS
|
57
|
-
self.nl = options[:nl] || DEFAULT_NL
|
58
|
-
self.le = options[:le] || DEFAULT_LE
|
59
|
-
self.io = options[:io] || self.class::DEFAULT_IO
|
60
|
-
|
61
|
-
@auto_id_block = options[:auto_id] || block
|
62
|
-
reset
|
63
|
-
end
|
64
|
-
|
65
|
-
attr_accessor :key, :rs, :fs, :nl, :le, :io, :auto_id
|
66
|
-
|
67
|
-
attr_reader :vs
|
68
|
-
|
69
|
-
def reset
|
70
|
-
@auto_id = @auto_id_block ? @auto_id_block.call : default_auto_id
|
71
|
-
end
|
72
|
-
|
73
|
-
private
|
74
|
-
|
75
|
-
def default_auto_id(n = 0)
|
76
|
-
lambda { n += 1 }
|
77
|
-
end
|
78
|
-
|
79
|
-
end
|
80
|
-
end
|
81
|
-
end
|
data/lib/nuggets/midos/writer.rb
DELETED
@@ -1,252 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
#--
|
4
|
-
###############################################################################
|
5
|
-
# #
|
6
|
-
# A component of ruby-nuggets, some extensions to the Ruby programming #
|
7
|
-
# language. #
|
8
|
-
# #
|
9
|
-
# Copyright (C) 2007-2014 Jens Wille #
|
10
|
-
# #
|
11
|
-
# Authors: #
|
12
|
-
# Jens Wille <jens.wille@gmail.com> #
|
13
|
-
# #
|
14
|
-
# ruby-nuggets is free software; you can redistribute it and/or modify it #
|
15
|
-
# under the terms of the GNU Affero General Public License as published by #
|
16
|
-
# the Free Software Foundation; either version 3 of the License, or (at your #
|
17
|
-
# option) any later version. #
|
18
|
-
# #
|
19
|
-
# ruby-nuggets is distributed in the hope that it will be useful, but WITHOUT #
|
20
|
-
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
|
21
|
-
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License #
|
22
|
-
# for more details. #
|
23
|
-
# #
|
24
|
-
# You should have received a copy of the GNU Affero General Public License #
|
25
|
-
# along with ruby-nuggets. If not, see <http://www.gnu.org/licenses/>. #
|
26
|
-
# #
|
27
|
-
###############################################################################
|
28
|
-
#++
|
29
|
-
|
30
|
-
require 'nuggets/hash/idmap'
|
31
|
-
|
32
|
-
module Nuggets
|
33
|
-
module Midos
|
34
|
-
class Writer < Base
|
35
|
-
|
36
|
-
DEFAULT_IO = $stdout
|
37
|
-
|
38
|
-
class << self
|
39
|
-
|
40
|
-
def write(*args, &block)
|
41
|
-
new(extract_options!(args), &block).write(*args)
|
42
|
-
end
|
43
|
-
|
44
|
-
def write_file(*args, &block)
|
45
|
-
file_method(:write, 'w', *args, &block)
|
46
|
-
end
|
47
|
-
|
48
|
-
def open(*args, &block)
|
49
|
-
file_method(nil, 'w', *args, &block)
|
50
|
-
end
|
51
|
-
|
52
|
-
end
|
53
|
-
|
54
|
-
def vs=(vs)
|
55
|
-
vs.is_a?(::String) ? @vs = vs : raise(::TypeError,
|
56
|
-
"wrong argument type #{vs.class} (expected String)")
|
57
|
-
end
|
58
|
-
|
59
|
-
def write(records, *args)
|
60
|
-
if records.is_a?(::Hash)
|
61
|
-
records.each { |id, record| write_i(id, record, *args) }
|
62
|
-
else
|
63
|
-
records.each { |record| write_i(nil, record, *args) }
|
64
|
-
end
|
65
|
-
|
66
|
-
self
|
67
|
-
end
|
68
|
-
|
69
|
-
def put(record, *args)
|
70
|
-
if record.is_a?(::Hash)
|
71
|
-
write_i(nil, record, *args)
|
72
|
-
else
|
73
|
-
write_i(*args.unshift(*record))
|
74
|
-
end
|
75
|
-
|
76
|
-
self
|
77
|
-
end
|
78
|
-
|
79
|
-
alias_method :<<, :put
|
80
|
-
|
81
|
-
private
|
82
|
-
|
83
|
-
def write_i(id, record, io = io)
|
84
|
-
return if record.empty?
|
85
|
-
|
86
|
-
if @key && !record.key?(@key)
|
87
|
-
record[@key] = id || @auto_id.call
|
88
|
-
end
|
89
|
-
|
90
|
-
record.each { |k, v|
|
91
|
-
if v
|
92
|
-
if k
|
93
|
-
v = v.is_a?(::Array) ? v.join(@vs) : v.to_s
|
94
|
-
io << k << @fs << v.gsub("\n", @nl) << @le
|
95
|
-
else
|
96
|
-
Array(v).each { |w| io << w.to_s << @le }
|
97
|
-
end
|
98
|
-
end
|
99
|
-
}
|
100
|
-
|
101
|
-
io << @rs << @le << @le
|
102
|
-
end
|
103
|
-
|
104
|
-
class Thesaurus < self
|
105
|
-
|
106
|
-
PROLOGUE = {
|
107
|
-
:PAR => '1011111111110000000010001000000000000010',
|
108
|
-
:DAT => '00000000',
|
109
|
-
:DES => 'DE',
|
110
|
-
:TOP => 'TP~TP',
|
111
|
-
:KLA => 'CC~CC',
|
112
|
-
:OBR => 'BT~BT',
|
113
|
-
:UTR => 'NT~NT',
|
114
|
-
:SYN => 'UF~USE',
|
115
|
-
:FRU => 'PT~PT für',
|
116
|
-
:VER => 'RT~RT',
|
117
|
-
:SP1 => 'ENG~ENG für',
|
118
|
-
:SP2 => 'FRA~FRA für',
|
119
|
-
:SP3 => 'SPA~SPA für',
|
120
|
-
:SP4 => 'ITA~ITA für',
|
121
|
-
:SP5 => 'GRI~GRI für',
|
122
|
-
:SP6 => 'RUS~RUS für',
|
123
|
-
:SP7 => 'POL~POL für',
|
124
|
-
:SP8 => 'UNG~UNG für',
|
125
|
-
:SP9 => 'TSC~TSC für',
|
126
|
-
:SN1 => 'SN1',
|
127
|
-
:SN2 => 'SN2',
|
128
|
-
:SN3 => 'SN3',
|
129
|
-
:SN4 => 'SN4',
|
130
|
-
:SN5 => 'SN5',
|
131
|
-
:DA1 => 'DATE1',
|
132
|
-
:DA2 => 'DATE2',
|
133
|
-
:DA3 => 'DATE3',
|
134
|
-
:DA4 => 'DATE4',
|
135
|
-
:KLD => 'MIDOS Thesaurus',
|
136
|
-
:KOM => ' / ',
|
137
|
-
:KO1 => 'UF',
|
138
|
-
:KO2 => 'USE',
|
139
|
-
:TLE => ' 32000 Zeichen',
|
140
|
-
:PAW => '',
|
141
|
-
:ART => '00000',
|
142
|
-
:REL => ' 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 18 19 20 21 22 23 24 25'
|
143
|
-
}
|
144
|
-
|
145
|
-
EPILOGUE = {
|
146
|
-
:DE => '*****NICHTDESKRIPTORRELATIONEN*****'
|
147
|
-
}
|
148
|
-
|
149
|
-
RESOLVE_FROM = [:OBR, :UTR, :VER]
|
150
|
-
|
151
|
-
RESOLVE_TO = :DES
|
152
|
-
|
153
|
-
NAME = :KLD
|
154
|
-
|
155
|
-
class << self
|
156
|
-
|
157
|
-
def write(*args, &block)
|
158
|
-
new(extract_options!(args), &block).instruct! { |mth| mth.write(*args) }
|
159
|
-
end
|
160
|
-
|
161
|
-
def open(*args, &block)
|
162
|
-
super { |mth| mth.instruct!(&block) }
|
163
|
-
end
|
164
|
-
|
165
|
-
end
|
166
|
-
|
167
|
-
def initialize(options = {}, prologue = {}, epilogue = {}, &block)
|
168
|
-
super(options, &block)
|
169
|
-
|
170
|
-
prologue[self.class::NAME] ||= options[:name]
|
171
|
-
|
172
|
-
@prologue = self.class::PROLOGUE.merge(prologue)
|
173
|
-
@epilogue = self.class::EPILOGUE.merge(epilogue)
|
174
|
-
end
|
175
|
-
|
176
|
-
attr_reader :prologue, :epilogue
|
177
|
-
|
178
|
-
def instruct!(*args)
|
179
|
-
put(prologue, *args)
|
180
|
-
yield self
|
181
|
-
put(epilogue, *args)
|
182
|
-
end
|
183
|
-
|
184
|
-
private
|
185
|
-
|
186
|
-
def merge_records(hash, records, *args)
|
187
|
-
args = [hash, records, *resolve_from_to(*args)]
|
188
|
-
|
189
|
-
records.each { |id, record|
|
190
|
-
new_record = hash[id] = {}
|
191
|
-
record.each { |key, value| new_record[key] = resolve(key, value, *args) }
|
192
|
-
}
|
193
|
-
end
|
194
|
-
|
195
|
-
def resolve_from_to(from = nil, to = prologue[RESOLVE_TO])
|
196
|
-
if from.nil? || from == true
|
197
|
-
from = prologue.values_at(*RESOLVE_FROM).map { |v| v.split('~').first }
|
198
|
-
end
|
199
|
-
|
200
|
-
[from, to]
|
201
|
-
end
|
202
|
-
|
203
|
-
def resolve(key, value, hash, records, from = nil, to = nil)
|
204
|
-
from && from.include?(key) ? value.map { |id| records[id][to] } : value
|
205
|
-
end
|
206
|
-
|
207
|
-
end
|
208
|
-
|
209
|
-
class ThesaurusX < Thesaurus
|
210
|
-
|
211
|
-
PROLOGUE = {
|
212
|
-
'MTX-PARAMETER' => '',
|
213
|
-
:BEZ => 'MIDOS Thesaurus',
|
214
|
-
:KOM => ' / ',
|
215
|
-
:TXL => 0,
|
216
|
-
:REL => '',
|
217
|
-
nil => %w[
|
218
|
-
TT1|Topterm|TT1||||||
|
219
|
-
BT1|Oberbegriff|BT1||||||
|
220
|
-
NT1|Unterbegriff|NT1||||||
|
221
|
-
RT1|Verwandter\ Begriff|RT1||||||
|
222
|
-
SY1|Synonym1|SY1|SY1FOR|||||
|
223
|
-
]
|
224
|
-
}
|
225
|
-
|
226
|
-
EPILOGUE = {}
|
227
|
-
|
228
|
-
NAME = :BEZ
|
229
|
-
|
230
|
-
private
|
231
|
-
|
232
|
-
def merge_records(hash, *)
|
233
|
-
idmap = hash[:__list__] = ::Hash.idmap
|
234
|
-
|
235
|
-
super
|
236
|
-
|
237
|
-
idmap.replace(nil => idmap.map { |key, id| "#{key}|DE|#{id}" })
|
238
|
-
end
|
239
|
-
|
240
|
-
def resolve_from_to(*)
|
241
|
-
# nothing to do
|
242
|
-
end
|
243
|
-
|
244
|
-
def resolve(key, value, hash, *)
|
245
|
-
value.map { |id| hash[:__list__][id] }
|
246
|
-
end
|
247
|
-
|
248
|
-
end
|
249
|
-
|
250
|
-
end
|
251
|
-
end
|
252
|
-
end
|