ruby-nuggets 0.9.7 → 0.9.8.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README +5 -5
- data/Rakefile +11 -1
- data/lib/nuggets/cli.rb +6 -242
- data/lib/nuggets/hash/zip.rb +5 -0
- data/lib/nuggets/{midos/reader.rb → hash/zip_mixin.rb} +101 -58
- data/lib/nuggets/lsi.rb +6 -277
- data/lib/nuggets/midos.rb +6 -89
- data/lib/nuggets/version.rb +2 -2
- data/spec/nuggets/object/singleton_class_spec.rb +1 -1
- metadata +76 -7
- data/lib/nuggets/midos/base.rb +0 -81
- data/lib/nuggets/midos/writer.rb +0 -252
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 20e3a6fc4cdbc194e6fae2ce281cba22be27d19b
|
4
|
+
data.tar.gz: f96c5db576f411f940de1effc136ba022699a13f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d6b4aa7f85cb7cd2ca92ffe24cac338dcb45dd7264b63e5beb855a7d62b45da42c4779111164a08f6bea6a109c71495e4363c92b2f57cbb3f8b15ef7e8856f78
|
7
|
+
data.tar.gz: aff5162458b1fa163a0a8142ddf16720ec0a08c975535d3e2a1f0c312015e0979197a25451aba6807a08c44463bbfdf344f2b582498393a6d59e2e2bb6581546
|
data/README
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
== VERSION
|
4
4
|
|
5
|
-
This documentation refers to ruby-nuggets version 0.9.
|
5
|
+
This documentation refers to ruby-nuggets version 0.9.8
|
6
6
|
|
7
7
|
|
8
8
|
== DESCRIPTION
|
@@ -33,10 +33,10 @@ cause other libraries to misbehave. Use at your own risk!
|
|
33
33
|
|
34
34
|
== LINKS
|
35
35
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
36
|
+
Documentation:: https://blackwinter.github.io/ruby-nuggets/
|
37
|
+
Source code:: https://github.com/blackwinter/ruby-nuggets
|
38
|
+
RubyGem:: https://rubygems.org/gems/ruby-nuggets
|
39
|
+
Travis CI:: https://travis-ci.org/blackwinter/ruby-nuggets
|
40
40
|
|
41
41
|
|
42
42
|
== AUTHORS
|
data/Rakefile
CHANGED
@@ -12,7 +12,17 @@ begin
|
|
12
12
|
:email => %q{jens.wille@gmail.com},
|
13
13
|
:license => %q{AGPL-3.0},
|
14
14
|
:homepage => :blackwinter,
|
15
|
-
:dependencies => %w[]
|
15
|
+
:dependencies => %w[],
|
16
|
+
|
17
|
+
:development_dependencies => [
|
18
|
+
#'amatch', # enumerable/agrep
|
19
|
+
'mime-types', # content_type
|
20
|
+
'open4', # ruby
|
21
|
+
#'rbzip2', # rdf/compression
|
22
|
+
#'rdf', # rdf/{prefix,uri,compression}
|
23
|
+
#'rdf-turtle', # rdf/turtle
|
24
|
+
#'ruby-filemagic' # content_type
|
25
|
+
]
|
16
26
|
}
|
17
27
|
}}
|
18
28
|
rescue LoadError => err
|
data/lib/nuggets/cli.rb
CHANGED
@@ -1,244 +1,8 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
# A component of ruby-nuggets, some extensions to the Ruby programming #
|
5
|
-
# language. #
|
6
|
-
# #
|
7
|
-
# Copyright (C) 2007-2011 Jens Wille #
|
8
|
-
# #
|
9
|
-
# Authors: #
|
10
|
-
# Jens Wille <jens.wille@gmail.com> #
|
11
|
-
# #
|
12
|
-
# ruby-nuggets is free software; you can redistribute it and/or modify it #
|
13
|
-
# under the terms of the GNU Affero General Public License as published by #
|
14
|
-
# the Free Software Foundation; either version 3 of the License, or (at your #
|
15
|
-
# option) any later version. #
|
16
|
-
# #
|
17
|
-
# ruby-nuggets is distributed in the hope that it will be useful, but WITHOUT #
|
18
|
-
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
|
19
|
-
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License #
|
20
|
-
# for more details. #
|
21
|
-
# #
|
22
|
-
# You should have received a copy of the GNU Affero General Public License #
|
23
|
-
# along with ruby-nuggets. If not, see <http://www.gnu.org/licenses/>. #
|
24
|
-
# #
|
25
|
-
###############################################################################
|
26
|
-
#++
|
1
|
+
begin
|
2
|
+
require 'cyclops'
|
3
|
+
module Nuggets; CLI = ::Cyclops; end
|
27
4
|
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
require 'highline'
|
32
|
-
|
33
|
-
module Nuggets
|
34
|
-
class CLI
|
35
|
-
|
36
|
-
class << self
|
37
|
-
|
38
|
-
def usage(prog)
|
39
|
-
"Usage: #{prog} [-h|--help] [options]"
|
40
|
-
end
|
41
|
-
|
42
|
-
def version
|
43
|
-
parent_const_get(:VERSION)
|
44
|
-
end
|
45
|
-
|
46
|
-
def defaults
|
47
|
-
{}
|
48
|
-
end
|
49
|
-
|
50
|
-
def execute(*args)
|
51
|
-
new.execute(*args)
|
52
|
-
end
|
53
|
-
|
54
|
-
private
|
55
|
-
|
56
|
-
def parent_const_get(const, range = 0...-1)
|
57
|
-
name.split('::').inject([::Object]) { |memo, name|
|
58
|
-
memo << memo.last.const_get(name)
|
59
|
-
}.reverse[range].each { |mod|
|
60
|
-
return mod.const_get(const) if mod.const_defined?(const)
|
61
|
-
}
|
62
|
-
|
63
|
-
raise ::NameError, "uninitialized constant #{self}::#{const}"
|
64
|
-
end
|
65
|
-
|
66
|
-
end
|
67
|
-
|
68
|
-
attr_reader :options, :config, :defaults
|
69
|
-
attr_reader :stdin, :stdout, :stderr
|
70
|
-
|
71
|
-
attr_accessor :prog
|
72
|
-
|
73
|
-
def initialize(defaults = nil, *args)
|
74
|
-
@defaults, @prog = defaults || self.class.defaults, $0
|
75
|
-
|
76
|
-
init(*args)
|
77
|
-
|
78
|
-
# prevent backtrace on ^C
|
79
|
-
trap(:INT) { exit 130 }
|
80
|
-
end
|
81
|
-
|
82
|
-
def progname
|
83
|
-
::File.basename(prog)
|
84
|
-
end
|
85
|
-
|
86
|
-
def usage
|
87
|
-
self.class.usage(prog)
|
88
|
-
end
|
89
|
-
|
90
|
-
def version
|
91
|
-
self.class.version
|
92
|
-
end
|
93
|
-
|
94
|
-
def execute(arguments = ::ARGV, *inouterr)
|
95
|
-
reset(*inouterr)
|
96
|
-
parse_options(arguments)
|
97
|
-
run(arguments)
|
98
|
-
rescue => err
|
99
|
-
raise if $VERBOSE
|
100
|
-
abort "#{err.backtrace.first}: #{err} (#{err.class})"
|
101
|
-
ensure
|
102
|
-
options.each_value { |value|
|
103
|
-
value.close if value.is_a?(::Zlib::GzipWriter)
|
104
|
-
}
|
105
|
-
end
|
106
|
-
|
107
|
-
def run(arguments)
|
108
|
-
raise ::NotImplementedError, 'must be implemented by subclass'
|
109
|
-
end
|
110
|
-
|
111
|
-
def reset(stdin = ::STDIN, stdout = ::STDOUT, stderr = ::STDERR)
|
112
|
-
@stdin, @stdout, @stderr = stdin, stdout, stderr
|
113
|
-
@options, @config = {}, {}
|
114
|
-
end
|
115
|
-
|
116
|
-
private
|
117
|
-
|
118
|
-
def init(*args)
|
119
|
-
reset
|
120
|
-
end
|
121
|
-
|
122
|
-
def ask(question, &block)
|
123
|
-
::HighLine.new(stdin, stdout).ask(question, &block)
|
124
|
-
end
|
125
|
-
|
126
|
-
def puts(*msg)
|
127
|
-
stdout.puts(*msg)
|
128
|
-
end
|
129
|
-
|
130
|
-
def warn(*msg)
|
131
|
-
stderr.puts(*msg)
|
132
|
-
end
|
133
|
-
|
134
|
-
def quit(msg = nil, include_usage = msg != false)
|
135
|
-
out = []
|
136
|
-
|
137
|
-
out << "#{progname}: #{msg}" if msg
|
138
|
-
out << usage if include_usage
|
139
|
-
|
140
|
-
abort out.any? && out.join("\n\n")
|
141
|
-
end
|
142
|
-
|
143
|
-
def abort(msg = nil, status = 1)
|
144
|
-
warn(msg) if msg
|
145
|
-
exit(status)
|
146
|
-
end
|
147
|
-
|
148
|
-
def shut(msg = nil, status = 0)
|
149
|
-
puts(msg) if msg
|
150
|
-
exit(status)
|
151
|
-
end
|
152
|
-
|
153
|
-
def exit(status = 0)
|
154
|
-
::Kernel.exit(status)
|
155
|
-
end
|
156
|
-
|
157
|
-
def open_file_or_std(file, write = false)
|
158
|
-
if file == '-'
|
159
|
-
write ? stdout : stdin
|
160
|
-
else
|
161
|
-
gz = file =~ /\.gz\z/i
|
162
|
-
|
163
|
-
if write
|
164
|
-
gz ? ::Zlib::GzipWriter.open(file) : ::File.open(file, 'w')
|
165
|
-
else
|
166
|
-
quit "No such file: #{file}" unless ::File.readable?(file)
|
167
|
-
(gz ? ::Zlib::GzipReader : ::File).open(file)
|
168
|
-
end
|
169
|
-
end
|
170
|
-
end
|
171
|
-
|
172
|
-
def load_config(file = options[:config] || default = defaults[:config])
|
173
|
-
return unless file
|
174
|
-
|
175
|
-
if ::File.readable?(file)
|
176
|
-
@config = ::YAML.load_file(file)
|
177
|
-
else
|
178
|
-
quit "No such file: #{file}" unless default
|
179
|
-
end
|
180
|
-
end
|
181
|
-
|
182
|
-
def merge_config(args = [config, defaults])
|
183
|
-
args.each { |hash| hash && hash.each { |key, value|
|
184
|
-
options[key] = value unless options.key?(key)
|
185
|
-
} }
|
186
|
-
end
|
187
|
-
|
188
|
-
def parse_options(arguments)
|
189
|
-
option_parser.parse!(arguments)
|
190
|
-
|
191
|
-
load_config
|
192
|
-
merge_config
|
193
|
-
end
|
194
|
-
|
195
|
-
def option_parser
|
196
|
-
::OptionParser.new { |opts|
|
197
|
-
opts.banner = usage
|
198
|
-
|
199
|
-
pre_opts(opts)
|
200
|
-
|
201
|
-
opts.separator ''
|
202
|
-
opts.separator 'Options:'
|
203
|
-
|
204
|
-
opts(opts)
|
205
|
-
|
206
|
-
opts.separator ''
|
207
|
-
opts.separator 'Generic options:'
|
208
|
-
|
209
|
-
generic_opts(opts)
|
210
|
-
post_opts(opts)
|
211
|
-
}.extend(Nuggets::CLI::OptionParserExtension)
|
212
|
-
end
|
213
|
-
|
214
|
-
def pre_opts(opts)
|
215
|
-
end
|
216
|
-
|
217
|
-
def opts(opts)
|
218
|
-
end
|
219
|
-
|
220
|
-
def generic_opts(opts)
|
221
|
-
opts.on('-h', '--help', 'Print this help message and exit') {
|
222
|
-
shut opts
|
223
|
-
}
|
224
|
-
|
225
|
-
opts.on('--version', 'Print program version and exit') {
|
226
|
-
shut "#{progname} v#{version}"
|
227
|
-
}
|
228
|
-
end
|
229
|
-
|
230
|
-
def post_opts(opts)
|
231
|
-
end
|
232
|
-
|
233
|
-
module OptionParserExtension
|
234
|
-
|
235
|
-
KEY_POOL = ('A'..'Z').to_a + ('a'..'z').to_a + ('0'..'9').to_a
|
236
|
-
|
237
|
-
def keys
|
238
|
-
{ :used => keys = top.short.keys, :free => KEY_POOL - keys }
|
239
|
-
end
|
240
|
-
|
241
|
-
end
|
242
|
-
|
243
|
-
end
|
5
|
+
warn "#{__FILE__}: Nuggets::CLI is deprecated, use Cyclops instead."
|
6
|
+
rescue LoadError => err
|
7
|
+
warn "#{__FILE__}: Nuggets::CLI is no longer available; install `cyclops' instead. (#{err})"
|
244
8
|
end
|
@@ -1,5 +1,3 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
1
|
#--
|
4
2
|
###############################################################################
|
5
3
|
# #
|
@@ -27,89 +25,134 @@
|
|
27
25
|
###############################################################################
|
28
26
|
#++
|
29
27
|
|
28
|
+
require 'zlib'
|
29
|
+
|
30
30
|
module Nuggets
|
31
|
-
|
32
|
-
|
31
|
+
class Hash
|
32
|
+
module ZipMixin
|
33
|
+
|
34
|
+
def zip(*args, &block)
|
35
|
+
ZipHash.new(*args, &block)
|
36
|
+
end
|
33
37
|
|
34
|
-
|
38
|
+
def zipval(*args, &block)
|
39
|
+
ZipValHash.new(*args, &block)
|
40
|
+
end
|
35
41
|
|
36
|
-
|
42
|
+
def zipkey(*args, &block)
|
43
|
+
ZipKeyHash.new(*args, &block)
|
44
|
+
end
|
37
45
|
|
38
|
-
|
39
|
-
|
40
|
-
|
46
|
+
class ZipHash < ::Hash
|
47
|
+
|
48
|
+
def [](key)
|
49
|
+
unzipval(super(zipkey(key)))
|
41
50
|
end
|
42
51
|
|
43
|
-
def
|
44
|
-
|
52
|
+
def []=(key, value)
|
53
|
+
super(zipkey(key), zipval(value))
|
45
54
|
end
|
46
55
|
|
47
|
-
|
56
|
+
def fetch(key, *args)
|
57
|
+
unzipval(super(zipkey(key), *args))
|
58
|
+
end
|
48
59
|
|
49
|
-
|
60
|
+
def store(key, value)
|
61
|
+
super(zipkey(key), zipval(value))
|
62
|
+
end
|
50
63
|
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
64
|
+
private
|
65
|
+
|
66
|
+
def zipval(value)
|
67
|
+
value.is_a?(ZipVal) ? value : ZipVal.new(value)
|
68
|
+
end
|
69
|
+
|
70
|
+
def unzipval(value)
|
71
|
+
value.is_a?(ZipVal) ? value.to_s : value
|
72
|
+
end
|
73
|
+
|
74
|
+
def zipkey(key)
|
75
|
+
key.is_a?(ZipKey) ? key : ZipKey.new(key)
|
76
|
+
end
|
77
|
+
|
78
|
+
def unzipkey(key)
|
79
|
+
key.is_a?(ZipKey) ? key.to_s : key
|
80
|
+
end
|
55
81
|
|
56
|
-
def vs=(vs)
|
57
|
-
@vs = vs.is_a?(::Regexp) ? vs : %r{\s*#{::Regexp.escape(vs)}\s*}
|
58
82
|
end
|
59
83
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
84
|
+
class ZipValHash < ZipHash
|
85
|
+
|
86
|
+
private
|
87
|
+
|
88
|
+
def zipkey(key)
|
89
|
+
key
|
65
90
|
end
|
66
91
|
|
67
|
-
|
68
|
-
|
92
|
+
def unzipkey(key)
|
93
|
+
key
|
94
|
+
end
|
69
95
|
|
70
|
-
|
71
|
-
|
96
|
+
end
|
97
|
+
|
98
|
+
class ZipKeyHash < ZipHash
|
72
99
|
|
73
|
-
|
74
|
-
block[key ? id : auto_id.call, record]
|
75
|
-
id, record = nil, {}
|
76
|
-
else
|
77
|
-
k, v = line.split(fs, 2)
|
100
|
+
private
|
78
101
|
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
else
|
83
|
-
v.gsub!(nl, "\n")
|
84
|
-
v = v.split(vs) if v.index(vs)
|
85
|
-
end
|
102
|
+
def zipval(value)
|
103
|
+
value
|
104
|
+
end
|
86
105
|
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
}
|
106
|
+
def unzipval(value)
|
107
|
+
value
|
108
|
+
end
|
91
109
|
|
92
|
-
self
|
93
110
|
end
|
94
111
|
|
95
|
-
|
112
|
+
class ZipVal
|
96
113
|
|
97
|
-
|
98
|
-
return block unless $VERBOSE && k = @key
|
114
|
+
include Comparable
|
99
115
|
|
100
|
-
|
116
|
+
def initialize(value)
|
117
|
+
@value = zip(value)
|
118
|
+
end
|
101
119
|
|
102
|
-
|
103
|
-
|
104
|
-
|
120
|
+
def <=>(other)
|
121
|
+
to_s <=> other.to_s if self.class.equal?(other.class)
|
122
|
+
end
|
105
123
|
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
124
|
+
def to_s
|
125
|
+
unzip(@value)
|
126
|
+
end
|
127
|
+
|
128
|
+
def inspect
|
129
|
+
!((s = to_s).length > 64 || s.include?($/)) ? to_s :
|
130
|
+
'#<%s:0x%x length=%p>' % [self.class, object_id, @value.length]
|
131
|
+
end
|
132
|
+
|
133
|
+
def hash
|
134
|
+
to_s.hash
|
135
|
+
end
|
136
|
+
|
137
|
+
def eql?(other)
|
138
|
+
(self <=> other) == 0
|
139
|
+
end
|
140
|
+
|
141
|
+
alias_method :==, :eql?
|
142
|
+
|
143
|
+
private
|
144
|
+
|
145
|
+
def zip(string)
|
146
|
+
Zlib::Deflate.deflate(string)
|
147
|
+
end
|
148
|
+
|
149
|
+
def unzip(string)
|
150
|
+
Zlib::Inflate.inflate(string)
|
151
|
+
end
|
152
|
+
|
153
|
+
end
|
110
154
|
|
111
|
-
|
112
|
-
}
|
155
|
+
class ZipKey < ZipVal
|
113
156
|
end
|
114
157
|
|
115
158
|
end
|
data/lib/nuggets/lsi.rb
CHANGED
@@ -1,279 +1,8 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
# A component of ruby-nuggets, some extensions to the Ruby programming #
|
5
|
-
# language. #
|
6
|
-
# #
|
7
|
-
# Copyright (C) 2007-2013 Jens Wille #
|
8
|
-
# #
|
9
|
-
# Authors: #
|
10
|
-
# Jens Wille <jens.wille@gmail.com> #
|
11
|
-
# #
|
12
|
-
# ruby-nuggets is free software; you can redistribute it and/or modify it #
|
13
|
-
# under the terms of the GNU Affero General Public License as published by #
|
14
|
-
# the Free Software Foundation; either version 3 of the License, or (at your #
|
15
|
-
# option) any later version. #
|
16
|
-
# #
|
17
|
-
# ruby-nuggets is distributed in the hope that it will be useful, but WITHOUT #
|
18
|
-
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
|
19
|
-
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License #
|
20
|
-
# for more details. #
|
21
|
-
# #
|
22
|
-
# You should have received a copy of the GNU Affero General Public License #
|
23
|
-
# along with ruby-nuggets. If not, see <http://www.gnu.org/licenses/>. #
|
24
|
-
# #
|
25
|
-
###############################################################################
|
26
|
-
#++
|
27
|
-
|
28
|
-
require 'forwardable'
|
29
|
-
require 'gsl'
|
30
|
-
|
31
|
-
module Nuggets
|
32
|
-
|
33
|
-
class LSI
|
34
|
-
|
35
|
-
include ::Enumerable
|
36
|
-
|
37
|
-
extend ::Forwardable
|
38
|
-
|
39
|
-
DEFAULT_EPSILON = ::Float::EPSILON * 10
|
40
|
-
|
41
|
-
DEFAULT_PRECISION = 2
|
42
|
-
|
43
|
-
DEFAULT_TRANSFORM = :tfidf
|
44
|
-
|
45
|
-
DEFAULT_CUTOFF = 0.75
|
46
|
-
|
47
|
-
class << self
|
48
|
-
|
49
|
-
def build(items, options = {})
|
50
|
-
lsi = new(items)
|
51
|
-
lsi if lsi.build(options)
|
52
|
-
end
|
53
|
-
|
54
|
-
def each_norm(items, options = {}, build_options = {}, &block)
|
55
|
-
lsi = new(items)
|
56
|
-
lsi.each_norm(nil, options, &block) if lsi.build(build_options)
|
57
|
-
end
|
58
|
-
|
59
|
-
end
|
60
|
-
|
61
|
-
def initialize(items = {})
|
62
|
-
reset
|
63
|
-
items.each { |k, v| self[k] = v || k }
|
64
|
-
end
|
65
|
-
|
66
|
-
def_delegators :@hash, :[], :each, :include?, :key, :keys, :size
|
67
|
-
|
68
|
-
def_delegator :@hash, :values, :docs
|
69
|
-
def_delegator :@hash, :values_at, :docs_at
|
70
|
-
|
71
|
-
def_delegator :@list, :keys, :terms
|
72
|
-
|
73
|
-
alias_method :doc, :[]
|
74
|
-
|
75
|
-
def []=(key, value)
|
76
|
-
@hash[key] = Doc.new(key, value, @list, @freq)
|
77
|
-
end
|
78
|
-
|
79
|
-
def add(key, value = key)
|
80
|
-
self[key] = value
|
81
|
-
self
|
82
|
-
end
|
83
|
-
|
84
|
-
def <<(value)
|
85
|
-
add(value.object_id, value)
|
86
|
-
end
|
87
|
-
|
88
|
-
# min:: minimum value to consider
|
89
|
-
# abs:: minimum absolute value to consider
|
90
|
-
# nul:: exclude null values (true or Float)
|
91
|
-
# new:: exclude original terms / only yield new ones
|
92
|
-
def each_term(key = nil, options = {})
|
93
|
-
return enum_for(:each_term, key, options) unless block_given?
|
94
|
-
|
95
|
-
min, abs, nul, new = options.values_at(:min, :abs, :nul, :new)
|
96
|
-
nul = DEFAULT_EPSILON if nul == true
|
97
|
-
|
98
|
-
list, norm = @invlist, options[:norm]
|
99
|
-
|
100
|
-
(key ? [self[key]] : docs).each { |doc|
|
101
|
-
if doc && vec = norm ? doc.norm : doc.vector
|
102
|
-
vec.enum_for(:each).with_index { |v, i|
|
103
|
-
yield doc, list[i], v unless v.nan? ||
|
104
|
-
(min && v < min) ||
|
105
|
-
(abs && v.abs < abs) ||
|
106
|
-
(nul && v.abs < nul) ||
|
107
|
-
(new && doc.include?(i))
|
108
|
-
}
|
109
|
-
end
|
110
|
-
}
|
111
|
-
end
|
112
|
-
|
113
|
-
def each_norm(key = nil, options = {}, &block)
|
114
|
-
each_term(key, options.merge(:norm => true), &block)
|
115
|
-
end
|
116
|
-
|
117
|
-
def related(key, num = 5)
|
118
|
-
if doc = self[key] and norm = doc.norm
|
119
|
-
temp = sort_by { |k, v| -norm * v.norm.col }
|
120
|
-
temp.map! { |k,| k }.delete(key)
|
121
|
-
temp[0, num]
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
def related_score(key, num = 5)
|
126
|
-
if doc = self[key] and norm = doc.norm
|
127
|
-
temp = map { |k, v| [k, norm * v.norm.col] }.sort_by { |_, i| -i }
|
128
|
-
temp.delete(temp.assoc(key))
|
129
|
-
temp[0, num]
|
130
|
-
end
|
131
|
-
end
|
132
|
-
|
133
|
-
def build(options = {})
|
134
|
-
build!(docs, @list, options.is_a?(::Hash) ?
|
135
|
-
options : { :cutoff => options }) if size > 1
|
136
|
-
end
|
137
|
-
|
138
|
-
def reset
|
139
|
-
@hash, @list, @freq, @invlist =
|
140
|
-
{}, ::Hash.new { |h, k| h[k] = h.size }, ::Hash.new(0), {}
|
141
|
-
end
|
142
|
-
|
143
|
-
def inspect
|
144
|
-
'%s@%d/%d' % [self.class, size, @list.size]
|
145
|
-
end
|
146
|
-
|
147
|
-
def to_a(norm = true)
|
148
|
-
(norm ? map { |_, doc| doc.norm.to_a } :
|
149
|
-
map { |_, doc| doc.vector.to_a }).transpose
|
150
|
-
end
|
151
|
-
|
152
|
-
private
|
153
|
-
|
154
|
-
def build!(docs, list, options)
|
155
|
-
Doc.transform = options.fetch(:transform, DEFAULT_TRANSFORM)
|
156
|
-
|
157
|
-
@invlist = list.invert
|
158
|
-
|
159
|
-
# TODO: GSL::ERROR::EUNIMPL: Ruby/GSL error code 24, svd of
|
160
|
-
# MxN matrix, M<N, is not implemented (file svd.c, line 61)
|
161
|
-
u, v, s = matrix(docs, list.size, size = docs.size).SV_decomp
|
162
|
-
|
163
|
-
(u * reduce(s, options.fetch(:cutoff, DEFAULT_CUTOFF)) * v.trans).
|
164
|
-
enum_for(:each_col).with_index { |c, i| docs[i].vector = c.row }
|
165
|
-
|
166
|
-
size
|
167
|
-
end
|
168
|
-
|
169
|
-
def matrix(d = docs, m = @list.size, n = d.size)
|
170
|
-
x = ::GSL::Matrix.alloc(m, n)
|
171
|
-
d.each_with_index { |i, j| x.set_col(j, i.transformed_vector(m, n)) }
|
172
|
-
x
|
173
|
-
end
|
174
|
-
|
175
|
-
# k == nil:: keep all
|
176
|
-
# k >= 1:: keep this many
|
177
|
-
# k < 1:: keep (at most) this proportion
|
178
|
-
def reduce(s, k, m = s.size)
|
179
|
-
if k && k < m
|
180
|
-
k > 0 ? s[k = (k < 1 ? m * k : k).floor, m - k] = 0 : s.set_zero
|
181
|
-
end
|
182
|
-
|
183
|
-
s.to_m_diagonal
|
184
|
-
end
|
185
|
-
|
186
|
-
class Doc
|
187
|
-
|
188
|
-
include ::Enumerable
|
189
|
-
|
190
|
-
extend ::Forwardable
|
191
|
-
|
192
|
-
TOKEN_RE = %r{\s+}
|
193
|
-
|
194
|
-
class << self
|
195
|
-
|
196
|
-
attr_reader :transform
|
197
|
-
|
198
|
-
def transform=(transform)
|
199
|
-
method = :transformed_vector
|
200
|
-
|
201
|
-
case transform
|
202
|
-
when ::Proc then define_method(method, &transform)
|
203
|
-
when ::UnboundMethod then define_method(method, transform)
|
204
|
-
else alias_method(method, "#{transform ||= :raw}_vector")
|
205
|
-
end
|
206
|
-
|
207
|
-
@transform = transform.to_sym
|
208
|
-
end
|
209
|
-
|
210
|
-
end
|
211
|
-
|
212
|
-
def initialize(key, value, list, freq)
|
213
|
-
@key, @list, @freq, @total = key, list, freq, 1
|
214
|
-
|
215
|
-
@map = !value.is_a?(::Hash) ? build_hash(value, list) :
|
216
|
-
value.inject({}) { |h, (k, v)| h[list[k]] = v; h }
|
217
|
-
|
218
|
-
@map.each_key { |k| freq[k] += 1 }
|
219
|
-
|
220
|
-
self.vector = raw_vector
|
221
|
-
end
|
222
|
-
|
223
|
-
attr_reader :key, :vector, :norm
|
224
|
-
|
225
|
-
def_delegators :@map, :each, :include?
|
226
|
-
|
227
|
-
def_delegator :raw_vector, :sum, :size
|
228
|
-
|
229
|
-
def raw_vector(size = @list.size, *)
|
230
|
-
vec = ::GSL::Vector.calloc(size)
|
231
|
-
each { |k, v| vec[k] = v }
|
232
|
-
vec
|
233
|
-
end
|
234
|
-
|
235
|
-
# TODO: "first-order association transform" ???
|
236
|
-
def foat_vector(*args)
|
237
|
-
vec, q = raw_vector(*args), 0
|
238
|
-
return vec unless (s = vec.sum) > 1
|
239
|
-
|
240
|
-
vec.each { |v| q -= (w = v / s) * ::Math.log(w) if v > 0 }
|
241
|
-
vec.map { |v| ::Math.log(v + 1) / q }
|
242
|
-
end
|
243
|
-
|
244
|
-
def tfidf_vector(*args)
|
245
|
-
vec, f = raw_vector(*args), @freq
|
246
|
-
s, d = vec.sum, @total = args.fetch(1, @total).to_f
|
247
|
-
|
248
|
-
vec.enum_for(:map).with_index { |v, i|
|
249
|
-
v > 0 ? ::Math.log(d / f[i]) * v / s : v }
|
250
|
-
end
|
251
|
-
|
252
|
-
self.transform = DEFAULT_TRANSFORM
|
253
|
-
|
254
|
-
def vector=(vec)
|
255
|
-
@vector, @norm = vec, vec.normalize
|
256
|
-
end
|
257
|
-
|
258
|
-
def inspect
|
259
|
-
'%s@%p/%d' % [self.class, key, size]
|
260
|
-
end
|
261
|
-
|
262
|
-
private
|
263
|
-
|
264
|
-
def build_hash(value, list, hash = ::Hash.new(0))
|
265
|
-
build_enum(value).each { |i| hash[list[i]] += 1 }
|
266
|
-
hash
|
267
|
-
end
|
268
|
-
|
269
|
-
def build_enum(value, re = TOKEN_RE)
|
270
|
-
value = value.read if value.respond_to?(:read)
|
271
|
-
value = value.split(re) if value.respond_to?(:split)
|
272
|
-
value
|
273
|
-
end
|
274
|
-
|
275
|
-
end
|
276
|
-
|
277
|
-
end
|
1
|
+
begin
|
2
|
+
require 'lsi4r'
|
3
|
+
module Nuggets; LSI = ::Lsi4R; end
|
278
4
|
|
5
|
+
warn "#{__FILE__}: Nuggets::LSI is deprecated, use Lsi4R instead."
|
6
|
+
rescue LoadError => err
|
7
|
+
warn "#{__FILE__}: Nuggets::LSI is no longer available; install `lsi4r' instead. (#{err})"
|
279
8
|
end
|
data/lib/nuggets/midos.rb
CHANGED
@@ -1,91 +1,8 @@
|
|
1
|
-
|
1
|
+
begin
|
2
|
+
require 'midos'
|
3
|
+
module Nuggets; Midos = ::Midos; end
|
2
4
|
|
3
|
-
|
4
|
-
|
5
|
-
#
|
6
|
-
# A component of ruby-nuggets, some extensions to the Ruby programming #
|
7
|
-
# language. #
|
8
|
-
# #
|
9
|
-
# Copyright (C) 2007-2014 Jens Wille #
|
10
|
-
# #
|
11
|
-
# Authors: #
|
12
|
-
# Jens Wille <jens.wille@gmail.com> #
|
13
|
-
# #
|
14
|
-
# ruby-nuggets is free software; you can redistribute it and/or modify it #
|
15
|
-
# under the terms of the GNU Affero General Public License as published by #
|
16
|
-
# the Free Software Foundation; either version 3 of the License, or (at your #
|
17
|
-
# option) any later version. #
|
18
|
-
# #
|
19
|
-
# ruby-nuggets is distributed in the hope that it will be useful, but WITHOUT #
|
20
|
-
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
|
21
|
-
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License #
|
22
|
-
# for more details. #
|
23
|
-
# #
|
24
|
-
# You should have received a copy of the GNU Affero General Public License #
|
25
|
-
# along with ruby-nuggets. If not, see <http://www.gnu.org/licenses/>. #
|
26
|
-
# #
|
27
|
-
###############################################################################
|
28
|
-
#++
|
29
|
-
|
30
|
-
require 'nuggets/midos/base'
|
31
|
-
require 'nuggets/midos/reader'
|
32
|
-
require 'nuggets/midos/writer'
|
33
|
-
|
34
|
-
module Nuggets
|
35
|
-
module Midos
|
36
|
-
|
37
|
-
# Record separator
|
38
|
-
DEFAULT_RS = '&&&'
|
39
|
-
|
40
|
-
# Field separator
|
41
|
-
DEFAULT_FS = ':'
|
42
|
-
|
43
|
-
# Value separator
|
44
|
-
DEFAULT_VS = '|'
|
45
|
-
|
46
|
-
# Line break indicator
|
47
|
-
DEFAULT_NL = '^'
|
48
|
-
|
49
|
-
# Line ending
|
50
|
-
DEFAULT_LE = "\r\n"
|
51
|
-
|
52
|
-
# Default file encoding
|
53
|
-
DEFAULT_ENCODING = 'iso-8859-1'
|
54
|
-
|
55
|
-
class << self
|
56
|
-
|
57
|
-
def filter(source, target, source_options = {}, target_options = source_options)
|
58
|
-
writer, size = Writer.new(target_options.merge(:io => target)), 0
|
59
|
-
|
60
|
-
Reader.parse(source, source_options) { |*args|
|
61
|
-
writer << args and size += 1 if yield(*args)
|
62
|
-
}
|
63
|
-
|
64
|
-
size
|
65
|
-
end
|
66
|
-
|
67
|
-
def filter_file(source_file, target_file, source_options = {}, target_options = source_options, &block)
|
68
|
-
open_file(source_file, source_options) { |source|
|
69
|
-
open_file(target_file, target_options, 'w') { |target|
|
70
|
-
filter(source, target, source_options, target_options, &block)
|
71
|
-
}
|
72
|
-
}
|
73
|
-
end
|
74
|
-
|
75
|
-
def convert(*args)
|
76
|
-
filter(*args) { |*| true }
|
77
|
-
end
|
78
|
-
|
79
|
-
def convert_file(*args)
|
80
|
-
filter_file(*args) { |*| true }
|
81
|
-
end
|
82
|
-
|
83
|
-
def open_file(file, options = {}, mode = 'r', &block)
|
84
|
-
encoding = options[:encoding] ||= DEFAULT_ENCODING
|
85
|
-
::File.open(file, mode, :encoding => encoding, &block)
|
86
|
-
end
|
87
|
-
|
88
|
-
end
|
89
|
-
|
90
|
-
end
|
5
|
+
warn "#{__FILE__}: Nuggets::Midos is deprecated, use Midos instead."
|
6
|
+
rescue LoadError => err
|
7
|
+
warn "#{__FILE__}: Nuggets::Midos is no longer available; install `midos' instead. (#{err})"
|
91
8
|
end
|
data/lib/nuggets/version.rb
CHANGED
@@ -23,7 +23,7 @@ describe Object, 'when extended by', Nuggets::Object::SingletonClassMixin do
|
|
23
23
|
|
24
24
|
example do
|
25
25
|
nil.singleton_class.should == NilClass
|
26
|
-
NilClass.should be_a_singleton_class
|
26
|
+
#NilClass.should be_a_singleton_class
|
27
27
|
NilClass.singleton_object.should be_equal(nil)
|
28
28
|
end
|
29
29
|
|
metadata
CHANGED
@@ -1,15 +1,85 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruby-nuggets
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.9.
|
4
|
+
version: 0.9.8.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jens Wille
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
12
|
-
dependencies:
|
11
|
+
date: 2014-04-14 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: mime-types
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: open4
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - ">="
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: hen
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - ">="
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '0'
|
55
|
+
- !ruby/object:Gem::Dependency
|
56
|
+
name: rake
|
57
|
+
requirement: !ruby/object:Gem::Requirement
|
58
|
+
requirements:
|
59
|
+
- - ">="
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
type: :development
|
63
|
+
prerelease: false
|
64
|
+
version_requirements: !ruby/object:Gem::Requirement
|
65
|
+
requirements:
|
66
|
+
- - ">="
|
67
|
+
- !ruby/object:Gem::Version
|
68
|
+
version: '0'
|
69
|
+
- !ruby/object:Gem::Dependency
|
70
|
+
name: rspec
|
71
|
+
requirement: !ruby/object:Gem::Requirement
|
72
|
+
requirements:
|
73
|
+
- - ">="
|
74
|
+
- !ruby/object:Gem::Version
|
75
|
+
version: '0'
|
76
|
+
type: :development
|
77
|
+
prerelease: false
|
78
|
+
version_requirements: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - ">="
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: '0'
|
13
83
|
description: Some extensions to the Ruby programming language.
|
14
84
|
email: jens.wille@gmail.com
|
15
85
|
executables: []
|
@@ -99,6 +169,8 @@ files:
|
|
99
169
|
- lib/nuggets/hash/seen_mixin.rb
|
100
170
|
- lib/nuggets/hash/unroll.rb
|
101
171
|
- lib/nuggets/hash/unroll_mixin.rb
|
172
|
+
- lib/nuggets/hash/zip.rb
|
173
|
+
- lib/nuggets/hash/zip_mixin.rb
|
102
174
|
- lib/nuggets/i18n.rb
|
103
175
|
- lib/nuggets/integer/factorial.rb
|
104
176
|
- lib/nuggets/integer/length.rb
|
@@ -120,9 +192,6 @@ files:
|
|
120
192
|
- lib/nuggets/log_parser/rails.rb
|
121
193
|
- lib/nuggets/lsi.rb
|
122
194
|
- lib/nuggets/midos.rb
|
123
|
-
- lib/nuggets/midos/base.rb
|
124
|
-
- lib/nuggets/midos/reader.rb
|
125
|
-
- lib/nuggets/midos/writer.rb
|
126
195
|
- lib/nuggets/mysql.rb
|
127
196
|
- lib/nuggets/net/success.rb
|
128
197
|
- lib/nuggets/numeric/between.rb
|
@@ -240,7 +309,7 @@ metadata: {}
|
|
240
309
|
post_install_message:
|
241
310
|
rdoc_options:
|
242
311
|
- "--title"
|
243
|
-
- ruby-nuggets Application documentation (v0.9.
|
312
|
+
- ruby-nuggets Application documentation (v0.9.8.1)
|
244
313
|
- "--charset"
|
245
314
|
- UTF-8
|
246
315
|
- "--line-numbers"
|
data/lib/nuggets/midos/base.rb
DELETED
@@ -1,81 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
#--
|
4
|
-
###############################################################################
|
5
|
-
# #
|
6
|
-
# A component of ruby-nuggets, some extensions to the Ruby programming #
|
7
|
-
# language. #
|
8
|
-
# #
|
9
|
-
# Copyright (C) 2007-2014 Jens Wille #
|
10
|
-
# #
|
11
|
-
# Authors: #
|
12
|
-
# Jens Wille <jens.wille@gmail.com> #
|
13
|
-
# #
|
14
|
-
# ruby-nuggets is free software; you can redistribute it and/or modify it #
|
15
|
-
# under the terms of the GNU Affero General Public License as published by #
|
16
|
-
# the Free Software Foundation; either version 3 of the License, or (at your #
|
17
|
-
# option) any later version. #
|
18
|
-
# #
|
19
|
-
# ruby-nuggets is distributed in the hope that it will be useful, but WITHOUT #
|
20
|
-
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
|
21
|
-
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License #
|
22
|
-
# for more details. #
|
23
|
-
# #
|
24
|
-
# You should have received a copy of the GNU Affero General Public License #
|
25
|
-
# along with ruby-nuggets. If not, see <http://www.gnu.org/licenses/>. #
|
26
|
-
# #
|
27
|
-
###############################################################################
|
28
|
-
#++
|
29
|
-
|
30
|
-
module Nuggets
|
31
|
-
module Midos
|
32
|
-
class Base
|
33
|
-
|
34
|
-
class << self
|
35
|
-
|
36
|
-
private
|
37
|
-
|
38
|
-
def file_method(method, mode, file, options = {}, *args, &block)
|
39
|
-
Midos.open_file(file, options, mode) { |io|
|
40
|
-
args.unshift(options.merge(:io => io))
|
41
|
-
method ? send(method, *args, &block) : block[new(*args)]
|
42
|
-
}
|
43
|
-
end
|
44
|
-
|
45
|
-
def extract_options!(args)
|
46
|
-
args.last.is_a?(::Hash) ? args.pop : {}
|
47
|
-
end
|
48
|
-
|
49
|
-
end
|
50
|
-
|
51
|
-
def initialize(options = {}, &block)
|
52
|
-
self.key = options[:key]
|
53
|
-
|
54
|
-
self.rs = options[:rs] || DEFAULT_RS
|
55
|
-
self.fs = options[:fs] || DEFAULT_FS
|
56
|
-
self.vs = options[:vs] || DEFAULT_VS
|
57
|
-
self.nl = options[:nl] || DEFAULT_NL
|
58
|
-
self.le = options[:le] || DEFAULT_LE
|
59
|
-
self.io = options[:io] || self.class::DEFAULT_IO
|
60
|
-
|
61
|
-
@auto_id_block = options[:auto_id] || block
|
62
|
-
reset
|
63
|
-
end
|
64
|
-
|
65
|
-
attr_accessor :key, :rs, :fs, :nl, :le, :io, :auto_id
|
66
|
-
|
67
|
-
attr_reader :vs
|
68
|
-
|
69
|
-
def reset
|
70
|
-
@auto_id = @auto_id_block ? @auto_id_block.call : default_auto_id
|
71
|
-
end
|
72
|
-
|
73
|
-
private
|
74
|
-
|
75
|
-
def default_auto_id(n = 0)
|
76
|
-
lambda { n += 1 }
|
77
|
-
end
|
78
|
-
|
79
|
-
end
|
80
|
-
end
|
81
|
-
end
|
data/lib/nuggets/midos/writer.rb
DELETED
@@ -1,252 +0,0 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
|
3
|
-
#--
|
4
|
-
###############################################################################
|
5
|
-
# #
|
6
|
-
# A component of ruby-nuggets, some extensions to the Ruby programming #
|
7
|
-
# language. #
|
8
|
-
# #
|
9
|
-
# Copyright (C) 2007-2014 Jens Wille #
|
10
|
-
# #
|
11
|
-
# Authors: #
|
12
|
-
# Jens Wille <jens.wille@gmail.com> #
|
13
|
-
# #
|
14
|
-
# ruby-nuggets is free software; you can redistribute it and/or modify it #
|
15
|
-
# under the terms of the GNU Affero General Public License as published by #
|
16
|
-
# the Free Software Foundation; either version 3 of the License, or (at your #
|
17
|
-
# option) any later version. #
|
18
|
-
# #
|
19
|
-
# ruby-nuggets is distributed in the hope that it will be useful, but WITHOUT #
|
20
|
-
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
|
21
|
-
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License #
|
22
|
-
# for more details. #
|
23
|
-
# #
|
24
|
-
# You should have received a copy of the GNU Affero General Public License #
|
25
|
-
# along with ruby-nuggets. If not, see <http://www.gnu.org/licenses/>. #
|
26
|
-
# #
|
27
|
-
###############################################################################
|
28
|
-
#++
|
29
|
-
|
30
|
-
require 'nuggets/hash/idmap'
|
31
|
-
|
32
|
-
module Nuggets
|
33
|
-
module Midos
|
34
|
-
class Writer < Base
|
35
|
-
|
36
|
-
DEFAULT_IO = $stdout
|
37
|
-
|
38
|
-
class << self
|
39
|
-
|
40
|
-
def write(*args, &block)
|
41
|
-
new(extract_options!(args), &block).write(*args)
|
42
|
-
end
|
43
|
-
|
44
|
-
def write_file(*args, &block)
|
45
|
-
file_method(:write, 'w', *args, &block)
|
46
|
-
end
|
47
|
-
|
48
|
-
def open(*args, &block)
|
49
|
-
file_method(nil, 'w', *args, &block)
|
50
|
-
end
|
51
|
-
|
52
|
-
end
|
53
|
-
|
54
|
-
def vs=(vs)
|
55
|
-
vs.is_a?(::String) ? @vs = vs : raise(::TypeError,
|
56
|
-
"wrong argument type #{vs.class} (expected String)")
|
57
|
-
end
|
58
|
-
|
59
|
-
def write(records, *args)
|
60
|
-
if records.is_a?(::Hash)
|
61
|
-
records.each { |id, record| write_i(id, record, *args) }
|
62
|
-
else
|
63
|
-
records.each { |record| write_i(nil, record, *args) }
|
64
|
-
end
|
65
|
-
|
66
|
-
self
|
67
|
-
end
|
68
|
-
|
69
|
-
def put(record, *args)
|
70
|
-
if record.is_a?(::Hash)
|
71
|
-
write_i(nil, record, *args)
|
72
|
-
else
|
73
|
-
write_i(*args.unshift(*record))
|
74
|
-
end
|
75
|
-
|
76
|
-
self
|
77
|
-
end
|
78
|
-
|
79
|
-
alias_method :<<, :put
|
80
|
-
|
81
|
-
private
|
82
|
-
|
83
|
-
def write_i(id, record, io = io)
|
84
|
-
return if record.empty?
|
85
|
-
|
86
|
-
if @key && !record.key?(@key)
|
87
|
-
record[@key] = id || @auto_id.call
|
88
|
-
end
|
89
|
-
|
90
|
-
record.each { |k, v|
|
91
|
-
if v
|
92
|
-
if k
|
93
|
-
v = v.is_a?(::Array) ? v.join(@vs) : v.to_s
|
94
|
-
io << k << @fs << v.gsub("\n", @nl) << @le
|
95
|
-
else
|
96
|
-
Array(v).each { |w| io << w.to_s << @le }
|
97
|
-
end
|
98
|
-
end
|
99
|
-
}
|
100
|
-
|
101
|
-
io << @rs << @le << @le
|
102
|
-
end
|
103
|
-
|
104
|
-
class Thesaurus < self
|
105
|
-
|
106
|
-
PROLOGUE = {
|
107
|
-
:PAR => '1011111111110000000010001000000000000010',
|
108
|
-
:DAT => '00000000',
|
109
|
-
:DES => 'DE',
|
110
|
-
:TOP => 'TP~TP',
|
111
|
-
:KLA => 'CC~CC',
|
112
|
-
:OBR => 'BT~BT',
|
113
|
-
:UTR => 'NT~NT',
|
114
|
-
:SYN => 'UF~USE',
|
115
|
-
:FRU => 'PT~PT für',
|
116
|
-
:VER => 'RT~RT',
|
117
|
-
:SP1 => 'ENG~ENG für',
|
118
|
-
:SP2 => 'FRA~FRA für',
|
119
|
-
:SP3 => 'SPA~SPA für',
|
120
|
-
:SP4 => 'ITA~ITA für',
|
121
|
-
:SP5 => 'GRI~GRI für',
|
122
|
-
:SP6 => 'RUS~RUS für',
|
123
|
-
:SP7 => 'POL~POL für',
|
124
|
-
:SP8 => 'UNG~UNG für',
|
125
|
-
:SP9 => 'TSC~TSC für',
|
126
|
-
:SN1 => 'SN1',
|
127
|
-
:SN2 => 'SN2',
|
128
|
-
:SN3 => 'SN3',
|
129
|
-
:SN4 => 'SN4',
|
130
|
-
:SN5 => 'SN5',
|
131
|
-
:DA1 => 'DATE1',
|
132
|
-
:DA2 => 'DATE2',
|
133
|
-
:DA3 => 'DATE3',
|
134
|
-
:DA4 => 'DATE4',
|
135
|
-
:KLD => 'MIDOS Thesaurus',
|
136
|
-
:KOM => ' / ',
|
137
|
-
:KO1 => 'UF',
|
138
|
-
:KO2 => 'USE',
|
139
|
-
:TLE => ' 32000 Zeichen',
|
140
|
-
:PAW => '',
|
141
|
-
:ART => '00000',
|
142
|
-
:REL => ' 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 18 19 20 21 22 23 24 25'
|
143
|
-
}
|
144
|
-
|
145
|
-
EPILOGUE = {
|
146
|
-
:DE => '*****NICHTDESKRIPTORRELATIONEN*****'
|
147
|
-
}
|
148
|
-
|
149
|
-
RESOLVE_FROM = [:OBR, :UTR, :VER]
|
150
|
-
|
151
|
-
RESOLVE_TO = :DES
|
152
|
-
|
153
|
-
NAME = :KLD
|
154
|
-
|
155
|
-
class << self
|
156
|
-
|
157
|
-
def write(*args, &block)
|
158
|
-
new(extract_options!(args), &block).instruct! { |mth| mth.write(*args) }
|
159
|
-
end
|
160
|
-
|
161
|
-
def open(*args, &block)
|
162
|
-
super { |mth| mth.instruct!(&block) }
|
163
|
-
end
|
164
|
-
|
165
|
-
end
|
166
|
-
|
167
|
-
def initialize(options = {}, prologue = {}, epilogue = {}, &block)
|
168
|
-
super(options, &block)
|
169
|
-
|
170
|
-
prologue[self.class::NAME] ||= options[:name]
|
171
|
-
|
172
|
-
@prologue = self.class::PROLOGUE.merge(prologue)
|
173
|
-
@epilogue = self.class::EPILOGUE.merge(epilogue)
|
174
|
-
end
|
175
|
-
|
176
|
-
attr_reader :prologue, :epilogue
|
177
|
-
|
178
|
-
def instruct!(*args)
|
179
|
-
put(prologue, *args)
|
180
|
-
yield self
|
181
|
-
put(epilogue, *args)
|
182
|
-
end
|
183
|
-
|
184
|
-
private
|
185
|
-
|
186
|
-
def merge_records(hash, records, *args)
|
187
|
-
args = [hash, records, *resolve_from_to(*args)]
|
188
|
-
|
189
|
-
records.each { |id, record|
|
190
|
-
new_record = hash[id] = {}
|
191
|
-
record.each { |key, value| new_record[key] = resolve(key, value, *args) }
|
192
|
-
}
|
193
|
-
end
|
194
|
-
|
195
|
-
def resolve_from_to(from = nil, to = prologue[RESOLVE_TO])
|
196
|
-
if from.nil? || from == true
|
197
|
-
from = prologue.values_at(*RESOLVE_FROM).map { |v| v.split('~').first }
|
198
|
-
end
|
199
|
-
|
200
|
-
[from, to]
|
201
|
-
end
|
202
|
-
|
203
|
-
def resolve(key, value, hash, records, from = nil, to = nil)
|
204
|
-
from && from.include?(key) ? value.map { |id| records[id][to] } : value
|
205
|
-
end
|
206
|
-
|
207
|
-
end
|
208
|
-
|
209
|
-
class ThesaurusX < Thesaurus
|
210
|
-
|
211
|
-
PROLOGUE = {
|
212
|
-
'MTX-PARAMETER' => '',
|
213
|
-
:BEZ => 'MIDOS Thesaurus',
|
214
|
-
:KOM => ' / ',
|
215
|
-
:TXL => 0,
|
216
|
-
:REL => '',
|
217
|
-
nil => %w[
|
218
|
-
TT1|Topterm|TT1||||||
|
219
|
-
BT1|Oberbegriff|BT1||||||
|
220
|
-
NT1|Unterbegriff|NT1||||||
|
221
|
-
RT1|Verwandter\ Begriff|RT1||||||
|
222
|
-
SY1|Synonym1|SY1|SY1FOR|||||
|
223
|
-
]
|
224
|
-
}
|
225
|
-
|
226
|
-
EPILOGUE = {}
|
227
|
-
|
228
|
-
NAME = :BEZ
|
229
|
-
|
230
|
-
private
|
231
|
-
|
232
|
-
def merge_records(hash, *)
|
233
|
-
idmap = hash[:__list__] = ::Hash.idmap
|
234
|
-
|
235
|
-
super
|
236
|
-
|
237
|
-
idmap.replace(nil => idmap.map { |key, id| "#{key}|DE|#{id}" })
|
238
|
-
end
|
239
|
-
|
240
|
-
def resolve_from_to(*)
|
241
|
-
# nothing to do
|
242
|
-
end
|
243
|
-
|
244
|
-
def resolve(key, value, hash, *)
|
245
|
-
value.map { |id| hash[:__list__][id] }
|
246
|
-
end
|
247
|
-
|
248
|
-
end
|
249
|
-
|
250
|
-
end
|
251
|
-
end
|
252
|
-
end
|