ruby-nuggets 0.9.7 → 0.9.8.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 0085d350ab71ac8d9eb89f589930da057d40769e
4
- data.tar.gz: 14ad33894ecf8bfde16551bd44ec760a5f37bf64
3
+ metadata.gz: 20e3a6fc4cdbc194e6fae2ce281cba22be27d19b
4
+ data.tar.gz: f96c5db576f411f940de1effc136ba022699a13f
5
5
  SHA512:
6
- metadata.gz: e1075bd40b0f1e876d6363833bddcb9dc928d7171544ea674d1c1dca21f73ec4160c7e923b5d5cfbe057d402a84c9b395e5755f9858cd4a58eda3b69c089fff0
7
- data.tar.gz: 472db2f820fa47e20ba1b9fcfc9518df4ea73f7d080182d0ec084205236b8353cd17fe82041320e4d9d04c5a726d959eef5d1471730fdbc8d43e5f24da819b2b
6
+ metadata.gz: d6b4aa7f85cb7cd2ca92ffe24cac338dcb45dd7264b63e5beb855a7d62b45da42c4779111164a08f6bea6a109c71495e4363c92b2f57cbb3f8b15ef7e8856f78
7
+ data.tar.gz: aff5162458b1fa163a0a8142ddf16720ec0a08c975535d3e2a1f0c312015e0979197a25451aba6807a08c44463bbfdf344f2b582498393a6d59e2e2bb6581546
data/README CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  == VERSION
4
4
 
5
- This documentation refers to ruby-nuggets version 0.9.7
5
+ This documentation refers to ruby-nuggets version 0.9.8
6
6
 
7
7
 
8
8
  == DESCRIPTION
@@ -33,10 +33,10 @@ cause other libraries to misbehave. Use at your own risk!
33
33
 
34
34
  == LINKS
35
35
 
36
- <b></b>
37
- Documentation:: http://blackwinter.github.com/ruby-nuggets
38
- Source code:: http://github.com/blackwinter/ruby-nuggets
39
- RubyGem:: http://rubygems.org/gems/ruby-nuggets
36
+ Documentation:: https://blackwinter.github.io/ruby-nuggets/
37
+ Source code:: https://github.com/blackwinter/ruby-nuggets
38
+ RubyGem:: https://rubygems.org/gems/ruby-nuggets
39
+ Travis CI:: https://travis-ci.org/blackwinter/ruby-nuggets
40
40
 
41
41
 
42
42
  == AUTHORS
data/Rakefile CHANGED
@@ -12,7 +12,17 @@ begin
12
12
  :email => %q{jens.wille@gmail.com},
13
13
  :license => %q{AGPL-3.0},
14
14
  :homepage => :blackwinter,
15
- :dependencies => %w[]
15
+ :dependencies => %w[],
16
+
17
+ :development_dependencies => [
18
+ #'amatch', # enumerable/agrep
19
+ 'mime-types', # content_type
20
+ 'open4', # ruby
21
+ #'rbzip2', # rdf/compression
22
+ #'rdf', # rdf/{prefix,uri,compression}
23
+ #'rdf-turtle', # rdf/turtle
24
+ #'ruby-filemagic' # content_type
25
+ ]
16
26
  }
17
27
  }}
18
28
  rescue LoadError => err
data/lib/nuggets/cli.rb CHANGED
@@ -1,244 +1,8 @@
1
- #--
2
- ###############################################################################
3
- # #
4
- # A component of ruby-nuggets, some extensions to the Ruby programming #
5
- # language. #
6
- # #
7
- # Copyright (C) 2007-2011 Jens Wille #
8
- # #
9
- # Authors: #
10
- # Jens Wille <jens.wille@gmail.com> #
11
- # #
12
- # ruby-nuggets is free software; you can redistribute it and/or modify it #
13
- # under the terms of the GNU Affero General Public License as published by #
14
- # the Free Software Foundation; either version 3 of the License, or (at your #
15
- # option) any later version. #
16
- # #
17
- # ruby-nuggets is distributed in the hope that it will be useful, but WITHOUT #
18
- # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
19
- # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License #
20
- # for more details. #
21
- # #
22
- # You should have received a copy of the GNU Affero General Public License #
23
- # along with ruby-nuggets. If not, see <http://www.gnu.org/licenses/>. #
24
- # #
25
- ###############################################################################
26
- #++
1
+ begin
2
+ require 'cyclops'
3
+ module Nuggets; CLI = ::Cyclops; end
27
4
 
28
- require 'optparse'
29
- require 'yaml'
30
- require 'zlib'
31
- require 'highline'
32
-
33
- module Nuggets
34
- class CLI
35
-
36
- class << self
37
-
38
- def usage(prog)
39
- "Usage: #{prog} [-h|--help] [options]"
40
- end
41
-
42
- def version
43
- parent_const_get(:VERSION)
44
- end
45
-
46
- def defaults
47
- {}
48
- end
49
-
50
- def execute(*args)
51
- new.execute(*args)
52
- end
53
-
54
- private
55
-
56
- def parent_const_get(const, range = 0...-1)
57
- name.split('::').inject([::Object]) { |memo, name|
58
- memo << memo.last.const_get(name)
59
- }.reverse[range].each { |mod|
60
- return mod.const_get(const) if mod.const_defined?(const)
61
- }
62
-
63
- raise ::NameError, "uninitialized constant #{self}::#{const}"
64
- end
65
-
66
- end
67
-
68
- attr_reader :options, :config, :defaults
69
- attr_reader :stdin, :stdout, :stderr
70
-
71
- attr_accessor :prog
72
-
73
- def initialize(defaults = nil, *args)
74
- @defaults, @prog = defaults || self.class.defaults, $0
75
-
76
- init(*args)
77
-
78
- # prevent backtrace on ^C
79
- trap(:INT) { exit 130 }
80
- end
81
-
82
- def progname
83
- ::File.basename(prog)
84
- end
85
-
86
- def usage
87
- self.class.usage(prog)
88
- end
89
-
90
- def version
91
- self.class.version
92
- end
93
-
94
- def execute(arguments = ::ARGV, *inouterr)
95
- reset(*inouterr)
96
- parse_options(arguments)
97
- run(arguments)
98
- rescue => err
99
- raise if $VERBOSE
100
- abort "#{err.backtrace.first}: #{err} (#{err.class})"
101
- ensure
102
- options.each_value { |value|
103
- value.close if value.is_a?(::Zlib::GzipWriter)
104
- }
105
- end
106
-
107
- def run(arguments)
108
- raise ::NotImplementedError, 'must be implemented by subclass'
109
- end
110
-
111
- def reset(stdin = ::STDIN, stdout = ::STDOUT, stderr = ::STDERR)
112
- @stdin, @stdout, @stderr = stdin, stdout, stderr
113
- @options, @config = {}, {}
114
- end
115
-
116
- private
117
-
118
- def init(*args)
119
- reset
120
- end
121
-
122
- def ask(question, &block)
123
- ::HighLine.new(stdin, stdout).ask(question, &block)
124
- end
125
-
126
- def puts(*msg)
127
- stdout.puts(*msg)
128
- end
129
-
130
- def warn(*msg)
131
- stderr.puts(*msg)
132
- end
133
-
134
- def quit(msg = nil, include_usage = msg != false)
135
- out = []
136
-
137
- out << "#{progname}: #{msg}" if msg
138
- out << usage if include_usage
139
-
140
- abort out.any? && out.join("\n\n")
141
- end
142
-
143
- def abort(msg = nil, status = 1)
144
- warn(msg) if msg
145
- exit(status)
146
- end
147
-
148
- def shut(msg = nil, status = 0)
149
- puts(msg) if msg
150
- exit(status)
151
- end
152
-
153
- def exit(status = 0)
154
- ::Kernel.exit(status)
155
- end
156
-
157
- def open_file_or_std(file, write = false)
158
- if file == '-'
159
- write ? stdout : stdin
160
- else
161
- gz = file =~ /\.gz\z/i
162
-
163
- if write
164
- gz ? ::Zlib::GzipWriter.open(file) : ::File.open(file, 'w')
165
- else
166
- quit "No such file: #{file}" unless ::File.readable?(file)
167
- (gz ? ::Zlib::GzipReader : ::File).open(file)
168
- end
169
- end
170
- end
171
-
172
- def load_config(file = options[:config] || default = defaults[:config])
173
- return unless file
174
-
175
- if ::File.readable?(file)
176
- @config = ::YAML.load_file(file)
177
- else
178
- quit "No such file: #{file}" unless default
179
- end
180
- end
181
-
182
- def merge_config(args = [config, defaults])
183
- args.each { |hash| hash && hash.each { |key, value|
184
- options[key] = value unless options.key?(key)
185
- } }
186
- end
187
-
188
- def parse_options(arguments)
189
- option_parser.parse!(arguments)
190
-
191
- load_config
192
- merge_config
193
- end
194
-
195
- def option_parser
196
- ::OptionParser.new { |opts|
197
- opts.banner = usage
198
-
199
- pre_opts(opts)
200
-
201
- opts.separator ''
202
- opts.separator 'Options:'
203
-
204
- opts(opts)
205
-
206
- opts.separator ''
207
- opts.separator 'Generic options:'
208
-
209
- generic_opts(opts)
210
- post_opts(opts)
211
- }.extend(Nuggets::CLI::OptionParserExtension)
212
- end
213
-
214
- def pre_opts(opts)
215
- end
216
-
217
- def opts(opts)
218
- end
219
-
220
- def generic_opts(opts)
221
- opts.on('-h', '--help', 'Print this help message and exit') {
222
- shut opts
223
- }
224
-
225
- opts.on('--version', 'Print program version and exit') {
226
- shut "#{progname} v#{version}"
227
- }
228
- end
229
-
230
- def post_opts(opts)
231
- end
232
-
233
- module OptionParserExtension
234
-
235
- KEY_POOL = ('A'..'Z').to_a + ('a'..'z').to_a + ('0'..'9').to_a
236
-
237
- def keys
238
- { :used => keys = top.short.keys, :free => KEY_POOL - keys }
239
- end
240
-
241
- end
242
-
243
- end
5
+ warn "#{__FILE__}: Nuggets::CLI is deprecated, use Cyclops instead."
6
+ rescue LoadError => err
7
+ warn "#{__FILE__}: Nuggets::CLI is no longer available; install `cyclops' instead. (#{err})"
244
8
  end
@@ -0,0 +1,5 @@
1
+ require 'nuggets/hash/zip_mixin'
2
+
3
+ class Hash
4
+ extend Nuggets::Hash::ZipMixin
5
+ end
@@ -1,5 +1,3 @@
1
- # encoding: utf-8
2
-
3
1
  #--
4
2
  ###############################################################################
5
3
  # #
@@ -27,89 +25,134 @@
27
25
  ###############################################################################
28
26
  #++
29
27
 
28
+ require 'zlib'
29
+
30
30
  module Nuggets
31
- module Midos
32
- class Reader < Base
31
+ class Hash
32
+ module ZipMixin
33
+
34
+ def zip(*args, &block)
35
+ ZipHash.new(*args, &block)
36
+ end
33
37
 
34
- DEFAULT_IO = $stdin
38
+ def zipval(*args, &block)
39
+ ZipValHash.new(*args, &block)
40
+ end
35
41
 
36
- class << self
42
+ def zipkey(*args, &block)
43
+ ZipKeyHash.new(*args, &block)
44
+ end
37
45
 
38
- def parse(*args, &block)
39
- reader = new(extract_options!(args)).parse(*args, &block)
40
- block ? reader : reader.records
46
+ class ZipHash < ::Hash
47
+
48
+ def [](key)
49
+ unzipval(super(zipkey(key)))
41
50
  end
42
51
 
43
- def parse_file(*args, &block)
44
- file_method(:parse, 'r', *args, &block)
52
+ def []=(key, value)
53
+ super(zipkey(key), zipval(value))
45
54
  end
46
55
 
47
- end
56
+ def fetch(key, *args)
57
+ unzipval(super(zipkey(key), *args))
58
+ end
48
59
 
49
- attr_reader :records
60
+ def store(key, value)
61
+ super(zipkey(key), zipval(value))
62
+ end
50
63
 
51
- def reset
52
- super
53
- @records = {}
54
- end
64
+ private
65
+
66
+ def zipval(value)
67
+ value.is_a?(ZipVal) ? value : ZipVal.new(value)
68
+ end
69
+
70
+ def unzipval(value)
71
+ value.is_a?(ZipVal) ? value.to_s : value
72
+ end
73
+
74
+ def zipkey(key)
75
+ key.is_a?(ZipKey) ? key : ZipKey.new(key)
76
+ end
77
+
78
+ def unzipkey(key)
79
+ key.is_a?(ZipKey) ? key.to_s : key
80
+ end
55
81
 
56
- def vs=(vs)
57
- @vs = vs.is_a?(::Regexp) ? vs : %r{\s*#{::Regexp.escape(vs)}\s*}
58
82
  end
59
83
 
60
- def parse(io = io, &block)
61
- unless block
62
- records, block = @records, amend_block { |id, record|
63
- records[id] = record
64
- }
84
+ class ZipValHash < ZipHash
85
+
86
+ private
87
+
88
+ def zipkey(key)
89
+ key
65
90
  end
66
91
 
67
- rs, fs, vs, nl, le, key, auto_id, id, record =
68
- @rs, @fs, @vs, @nl, @le, @key, @auto_id, nil, {}
92
+ def unzipkey(key)
93
+ key
94
+ end
69
95
 
70
- io.each { |line|
71
- line = line.chomp(le)
96
+ end
97
+
98
+ class ZipKeyHash < ZipHash
72
99
 
73
- if line == rs
74
- block[key ? id : auto_id.call, record]
75
- id, record = nil, {}
76
- else
77
- k, v = line.split(fs, 2)
100
+ private
78
101
 
79
- if k && v
80
- if k == key
81
- id = v
82
- else
83
- v.gsub!(nl, "\n")
84
- v = v.split(vs) if v.index(vs)
85
- end
102
+ def zipval(value)
103
+ value
104
+ end
86
105
 
87
- record[k] = v
88
- end
89
- end
90
- }
106
+ def unzipval(value)
107
+ value
108
+ end
91
109
 
92
- self
93
110
  end
94
111
 
95
- private
112
+ class ZipVal
96
113
 
97
- def amend_block(&block)
98
- return block unless $VERBOSE && k = @key
114
+ include Comparable
99
115
 
100
- r, i = block.binding.eval('_ = records, io')
116
+ def initialize(value)
117
+ @value = zip(value)
118
+ end
101
119
 
102
- l = i.respond_to?(:lineno)
103
- s = i.respond_to?(:path) ? i.path :
104
- ::Object.instance_method(:inspect).bind(i).call
120
+ def <=>(other)
121
+ to_s <=> other.to_s if self.class.equal?(other.class)
122
+ end
105
123
 
106
- lambda { |id, *args|
107
- if (r ||= block.binding.eval('records')).key?(id)
108
- warn "Duplicate record in #{s}#{":#{i.lineno}" if l}: »#{k}:#{id}«"
109
- end
124
+ def to_s
125
+ unzip(@value)
126
+ end
127
+
128
+ def inspect
129
+ !((s = to_s).length > 64 || s.include?($/)) ? to_s :
130
+ '#<%s:0x%x length=%p>' % [self.class, object_id, @value.length]
131
+ end
132
+
133
+ def hash
134
+ to_s.hash
135
+ end
136
+
137
+ def eql?(other)
138
+ (self <=> other) == 0
139
+ end
140
+
141
+ alias_method :==, :eql?
142
+
143
+ private
144
+
145
+ def zip(string)
146
+ Zlib::Deflate.deflate(string)
147
+ end
148
+
149
+ def unzip(string)
150
+ Zlib::Inflate.inflate(string)
151
+ end
152
+
153
+ end
110
154
 
111
- block[id, *args]
112
- }
155
+ class ZipKey < ZipVal
113
156
  end
114
157
 
115
158
  end
data/lib/nuggets/lsi.rb CHANGED
@@ -1,279 +1,8 @@
1
- #--
2
- ###############################################################################
3
- # #
4
- # A component of ruby-nuggets, some extensions to the Ruby programming #
5
- # language. #
6
- # #
7
- # Copyright (C) 2007-2013 Jens Wille #
8
- # #
9
- # Authors: #
10
- # Jens Wille <jens.wille@gmail.com> #
11
- # #
12
- # ruby-nuggets is free software; you can redistribute it and/or modify it #
13
- # under the terms of the GNU Affero General Public License as published by #
14
- # the Free Software Foundation; either version 3 of the License, or (at your #
15
- # option) any later version. #
16
- # #
17
- # ruby-nuggets is distributed in the hope that it will be useful, but WITHOUT #
18
- # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
19
- # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License #
20
- # for more details. #
21
- # #
22
- # You should have received a copy of the GNU Affero General Public License #
23
- # along with ruby-nuggets. If not, see <http://www.gnu.org/licenses/>. #
24
- # #
25
- ###############################################################################
26
- #++
27
-
28
- require 'forwardable'
29
- require 'gsl'
30
-
31
- module Nuggets
32
-
33
- class LSI
34
-
35
- include ::Enumerable
36
-
37
- extend ::Forwardable
38
-
39
- DEFAULT_EPSILON = ::Float::EPSILON * 10
40
-
41
- DEFAULT_PRECISION = 2
42
-
43
- DEFAULT_TRANSFORM = :tfidf
44
-
45
- DEFAULT_CUTOFF = 0.75
46
-
47
- class << self
48
-
49
- def build(items, options = {})
50
- lsi = new(items)
51
- lsi if lsi.build(options)
52
- end
53
-
54
- def each_norm(items, options = {}, build_options = {}, &block)
55
- lsi = new(items)
56
- lsi.each_norm(nil, options, &block) if lsi.build(build_options)
57
- end
58
-
59
- end
60
-
61
- def initialize(items = {})
62
- reset
63
- items.each { |k, v| self[k] = v || k }
64
- end
65
-
66
- def_delegators :@hash, :[], :each, :include?, :key, :keys, :size
67
-
68
- def_delegator :@hash, :values, :docs
69
- def_delegator :@hash, :values_at, :docs_at
70
-
71
- def_delegator :@list, :keys, :terms
72
-
73
- alias_method :doc, :[]
74
-
75
- def []=(key, value)
76
- @hash[key] = Doc.new(key, value, @list, @freq)
77
- end
78
-
79
- def add(key, value = key)
80
- self[key] = value
81
- self
82
- end
83
-
84
- def <<(value)
85
- add(value.object_id, value)
86
- end
87
-
88
- # min:: minimum value to consider
89
- # abs:: minimum absolute value to consider
90
- # nul:: exclude null values (true or Float)
91
- # new:: exclude original terms / only yield new ones
92
- def each_term(key = nil, options = {})
93
- return enum_for(:each_term, key, options) unless block_given?
94
-
95
- min, abs, nul, new = options.values_at(:min, :abs, :nul, :new)
96
- nul = DEFAULT_EPSILON if nul == true
97
-
98
- list, norm = @invlist, options[:norm]
99
-
100
- (key ? [self[key]] : docs).each { |doc|
101
- if doc && vec = norm ? doc.norm : doc.vector
102
- vec.enum_for(:each).with_index { |v, i|
103
- yield doc, list[i], v unless v.nan? ||
104
- (min && v < min) ||
105
- (abs && v.abs < abs) ||
106
- (nul && v.abs < nul) ||
107
- (new && doc.include?(i))
108
- }
109
- end
110
- }
111
- end
112
-
113
- def each_norm(key = nil, options = {}, &block)
114
- each_term(key, options.merge(:norm => true), &block)
115
- end
116
-
117
- def related(key, num = 5)
118
- if doc = self[key] and norm = doc.norm
119
- temp = sort_by { |k, v| -norm * v.norm.col }
120
- temp.map! { |k,| k }.delete(key)
121
- temp[0, num]
122
- end
123
- end
124
-
125
- def related_score(key, num = 5)
126
- if doc = self[key] and norm = doc.norm
127
- temp = map { |k, v| [k, norm * v.norm.col] }.sort_by { |_, i| -i }
128
- temp.delete(temp.assoc(key))
129
- temp[0, num]
130
- end
131
- end
132
-
133
- def build(options = {})
134
- build!(docs, @list, options.is_a?(::Hash) ?
135
- options : { :cutoff => options }) if size > 1
136
- end
137
-
138
- def reset
139
- @hash, @list, @freq, @invlist =
140
- {}, ::Hash.new { |h, k| h[k] = h.size }, ::Hash.new(0), {}
141
- end
142
-
143
- def inspect
144
- '%s@%d/%d' % [self.class, size, @list.size]
145
- end
146
-
147
- def to_a(norm = true)
148
- (norm ? map { |_, doc| doc.norm.to_a } :
149
- map { |_, doc| doc.vector.to_a }).transpose
150
- end
151
-
152
- private
153
-
154
- def build!(docs, list, options)
155
- Doc.transform = options.fetch(:transform, DEFAULT_TRANSFORM)
156
-
157
- @invlist = list.invert
158
-
159
- # TODO: GSL::ERROR::EUNIMPL: Ruby/GSL error code 24, svd of
160
- # MxN matrix, M<N, is not implemented (file svd.c, line 61)
161
- u, v, s = matrix(docs, list.size, size = docs.size).SV_decomp
162
-
163
- (u * reduce(s, options.fetch(:cutoff, DEFAULT_CUTOFF)) * v.trans).
164
- enum_for(:each_col).with_index { |c, i| docs[i].vector = c.row }
165
-
166
- size
167
- end
168
-
169
- def matrix(d = docs, m = @list.size, n = d.size)
170
- x = ::GSL::Matrix.alloc(m, n)
171
- d.each_with_index { |i, j| x.set_col(j, i.transformed_vector(m, n)) }
172
- x
173
- end
174
-
175
- # k == nil:: keep all
176
- # k >= 1:: keep this many
177
- # k < 1:: keep (at most) this proportion
178
- def reduce(s, k, m = s.size)
179
- if k && k < m
180
- k > 0 ? s[k = (k < 1 ? m * k : k).floor, m - k] = 0 : s.set_zero
181
- end
182
-
183
- s.to_m_diagonal
184
- end
185
-
186
- class Doc
187
-
188
- include ::Enumerable
189
-
190
- extend ::Forwardable
191
-
192
- TOKEN_RE = %r{\s+}
193
-
194
- class << self
195
-
196
- attr_reader :transform
197
-
198
- def transform=(transform)
199
- method = :transformed_vector
200
-
201
- case transform
202
- when ::Proc then define_method(method, &transform)
203
- when ::UnboundMethod then define_method(method, transform)
204
- else alias_method(method, "#{transform ||= :raw}_vector")
205
- end
206
-
207
- @transform = transform.to_sym
208
- end
209
-
210
- end
211
-
212
- def initialize(key, value, list, freq)
213
- @key, @list, @freq, @total = key, list, freq, 1
214
-
215
- @map = !value.is_a?(::Hash) ? build_hash(value, list) :
216
- value.inject({}) { |h, (k, v)| h[list[k]] = v; h }
217
-
218
- @map.each_key { |k| freq[k] += 1 }
219
-
220
- self.vector = raw_vector
221
- end
222
-
223
- attr_reader :key, :vector, :norm
224
-
225
- def_delegators :@map, :each, :include?
226
-
227
- def_delegator :raw_vector, :sum, :size
228
-
229
- def raw_vector(size = @list.size, *)
230
- vec = ::GSL::Vector.calloc(size)
231
- each { |k, v| vec[k] = v }
232
- vec
233
- end
234
-
235
- # TODO: "first-order association transform" ???
236
- def foat_vector(*args)
237
- vec, q = raw_vector(*args), 0
238
- return vec unless (s = vec.sum) > 1
239
-
240
- vec.each { |v| q -= (w = v / s) * ::Math.log(w) if v > 0 }
241
- vec.map { |v| ::Math.log(v + 1) / q }
242
- end
243
-
244
- def tfidf_vector(*args)
245
- vec, f = raw_vector(*args), @freq
246
- s, d = vec.sum, @total = args.fetch(1, @total).to_f
247
-
248
- vec.enum_for(:map).with_index { |v, i|
249
- v > 0 ? ::Math.log(d / f[i]) * v / s : v }
250
- end
251
-
252
- self.transform = DEFAULT_TRANSFORM
253
-
254
- def vector=(vec)
255
- @vector, @norm = vec, vec.normalize
256
- end
257
-
258
- def inspect
259
- '%s@%p/%d' % [self.class, key, size]
260
- end
261
-
262
- private
263
-
264
- def build_hash(value, list, hash = ::Hash.new(0))
265
- build_enum(value).each { |i| hash[list[i]] += 1 }
266
- hash
267
- end
268
-
269
- def build_enum(value, re = TOKEN_RE)
270
- value = value.read if value.respond_to?(:read)
271
- value = value.split(re) if value.respond_to?(:split)
272
- value
273
- end
274
-
275
- end
276
-
277
- end
1
+ begin
2
+ require 'lsi4r'
3
+ module Nuggets; LSI = ::Lsi4R; end
278
4
 
5
+ warn "#{__FILE__}: Nuggets::LSI is deprecated, use Lsi4R instead."
6
+ rescue LoadError => err
7
+ warn "#{__FILE__}: Nuggets::LSI is no longer available; install `lsi4r' instead. (#{err})"
279
8
  end
data/lib/nuggets/midos.rb CHANGED
@@ -1,91 +1,8 @@
1
- # encoding: utf-8
1
+ begin
2
+ require 'midos'
3
+ module Nuggets; Midos = ::Midos; end
2
4
 
3
- #--
4
- ###############################################################################
5
- # #
6
- # A component of ruby-nuggets, some extensions to the Ruby programming #
7
- # language. #
8
- # #
9
- # Copyright (C) 2007-2014 Jens Wille #
10
- # #
11
- # Authors: #
12
- # Jens Wille <jens.wille@gmail.com> #
13
- # #
14
- # ruby-nuggets is free software; you can redistribute it and/or modify it #
15
- # under the terms of the GNU Affero General Public License as published by #
16
- # the Free Software Foundation; either version 3 of the License, or (at your #
17
- # option) any later version. #
18
- # #
19
- # ruby-nuggets is distributed in the hope that it will be useful, but WITHOUT #
20
- # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
21
- # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License #
22
- # for more details. #
23
- # #
24
- # You should have received a copy of the GNU Affero General Public License #
25
- # along with ruby-nuggets. If not, see <http://www.gnu.org/licenses/>. #
26
- # #
27
- ###############################################################################
28
- #++
29
-
30
- require 'nuggets/midos/base'
31
- require 'nuggets/midos/reader'
32
- require 'nuggets/midos/writer'
33
-
34
- module Nuggets
35
- module Midos
36
-
37
- # Record separator
38
- DEFAULT_RS = '&&&'
39
-
40
- # Field separator
41
- DEFAULT_FS = ':'
42
-
43
- # Value separator
44
- DEFAULT_VS = '|'
45
-
46
- # Line break indicator
47
- DEFAULT_NL = '^'
48
-
49
- # Line ending
50
- DEFAULT_LE = "\r\n"
51
-
52
- # Default file encoding
53
- DEFAULT_ENCODING = 'iso-8859-1'
54
-
55
- class << self
56
-
57
- def filter(source, target, source_options = {}, target_options = source_options)
58
- writer, size = Writer.new(target_options.merge(:io => target)), 0
59
-
60
- Reader.parse(source, source_options) { |*args|
61
- writer << args and size += 1 if yield(*args)
62
- }
63
-
64
- size
65
- end
66
-
67
- def filter_file(source_file, target_file, source_options = {}, target_options = source_options, &block)
68
- open_file(source_file, source_options) { |source|
69
- open_file(target_file, target_options, 'w') { |target|
70
- filter(source, target, source_options, target_options, &block)
71
- }
72
- }
73
- end
74
-
75
- def convert(*args)
76
- filter(*args) { |*| true }
77
- end
78
-
79
- def convert_file(*args)
80
- filter_file(*args) { |*| true }
81
- end
82
-
83
- def open_file(file, options = {}, mode = 'r', &block)
84
- encoding = options[:encoding] ||= DEFAULT_ENCODING
85
- ::File.open(file, mode, :encoding => encoding, &block)
86
- end
87
-
88
- end
89
-
90
- end
5
+ warn "#{__FILE__}: Nuggets::Midos is deprecated, use Midos instead."
6
+ rescue LoadError => err
7
+ warn "#{__FILE__}: Nuggets::Midos is no longer available; install `midos' instead. (#{err})"
91
8
  end
@@ -4,13 +4,13 @@ module Nuggets
4
4
 
5
5
  MAJOR = 0
6
6
  MINOR = 9
7
- TINY = 7
7
+ TINY = 8
8
8
 
9
9
  class << self
10
10
 
11
11
  # Returns array representation.
12
12
  def to_a
13
- [MAJOR, MINOR, TINY]
13
+ [MAJOR, MINOR, TINY] << '1'
14
14
  end
15
15
 
16
16
  # Short-cut for version string.
@@ -23,7 +23,7 @@ describe Object, 'when extended by', Nuggets::Object::SingletonClassMixin do
23
23
 
24
24
  example do
25
25
  nil.singleton_class.should == NilClass
26
- NilClass.should be_a_singleton_class
26
+ #NilClass.should be_a_singleton_class
27
27
  NilClass.singleton_object.should be_equal(nil)
28
28
  end
29
29
 
metadata CHANGED
@@ -1,15 +1,85 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruby-nuggets
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.7
4
+ version: 0.9.8.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jens Wille
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-03-31 00:00:00.000000000 Z
12
- dependencies: []
11
+ date: 2014-04-14 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: mime-types
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - ">="
18
+ - !ruby/object:Gem::Version
19
+ version: '0'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: open4
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: '0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: hen
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - ">="
46
+ - !ruby/object:Gem::Version
47
+ version: '0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: '0'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - ">="
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rspec
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - ">="
74
+ - !ruby/object:Gem::Version
75
+ version: '0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - ">="
81
+ - !ruby/object:Gem::Version
82
+ version: '0'
13
83
  description: Some extensions to the Ruby programming language.
14
84
  email: jens.wille@gmail.com
15
85
  executables: []
@@ -99,6 +169,8 @@ files:
99
169
  - lib/nuggets/hash/seen_mixin.rb
100
170
  - lib/nuggets/hash/unroll.rb
101
171
  - lib/nuggets/hash/unroll_mixin.rb
172
+ - lib/nuggets/hash/zip.rb
173
+ - lib/nuggets/hash/zip_mixin.rb
102
174
  - lib/nuggets/i18n.rb
103
175
  - lib/nuggets/integer/factorial.rb
104
176
  - lib/nuggets/integer/length.rb
@@ -120,9 +192,6 @@ files:
120
192
  - lib/nuggets/log_parser/rails.rb
121
193
  - lib/nuggets/lsi.rb
122
194
  - lib/nuggets/midos.rb
123
- - lib/nuggets/midos/base.rb
124
- - lib/nuggets/midos/reader.rb
125
- - lib/nuggets/midos/writer.rb
126
195
  - lib/nuggets/mysql.rb
127
196
  - lib/nuggets/net/success.rb
128
197
  - lib/nuggets/numeric/between.rb
@@ -240,7 +309,7 @@ metadata: {}
240
309
  post_install_message:
241
310
  rdoc_options:
242
311
  - "--title"
243
- - ruby-nuggets Application documentation (v0.9.7)
312
+ - ruby-nuggets Application documentation (v0.9.8.1)
244
313
  - "--charset"
245
314
  - UTF-8
246
315
  - "--line-numbers"
@@ -1,81 +0,0 @@
1
- # encoding: utf-8
2
-
3
- #--
4
- ###############################################################################
5
- # #
6
- # A component of ruby-nuggets, some extensions to the Ruby programming #
7
- # language. #
8
- # #
9
- # Copyright (C) 2007-2014 Jens Wille #
10
- # #
11
- # Authors: #
12
- # Jens Wille <jens.wille@gmail.com> #
13
- # #
14
- # ruby-nuggets is free software; you can redistribute it and/or modify it #
15
- # under the terms of the GNU Affero General Public License as published by #
16
- # the Free Software Foundation; either version 3 of the License, or (at your #
17
- # option) any later version. #
18
- # #
19
- # ruby-nuggets is distributed in the hope that it will be useful, but WITHOUT #
20
- # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
21
- # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License #
22
- # for more details. #
23
- # #
24
- # You should have received a copy of the GNU Affero General Public License #
25
- # along with ruby-nuggets. If not, see <http://www.gnu.org/licenses/>. #
26
- # #
27
- ###############################################################################
28
- #++
29
-
30
- module Nuggets
31
- module Midos
32
- class Base
33
-
34
- class << self
35
-
36
- private
37
-
38
- def file_method(method, mode, file, options = {}, *args, &block)
39
- Midos.open_file(file, options, mode) { |io|
40
- args.unshift(options.merge(:io => io))
41
- method ? send(method, *args, &block) : block[new(*args)]
42
- }
43
- end
44
-
45
- def extract_options!(args)
46
- args.last.is_a?(::Hash) ? args.pop : {}
47
- end
48
-
49
- end
50
-
51
- def initialize(options = {}, &block)
52
- self.key = options[:key]
53
-
54
- self.rs = options[:rs] || DEFAULT_RS
55
- self.fs = options[:fs] || DEFAULT_FS
56
- self.vs = options[:vs] || DEFAULT_VS
57
- self.nl = options[:nl] || DEFAULT_NL
58
- self.le = options[:le] || DEFAULT_LE
59
- self.io = options[:io] || self.class::DEFAULT_IO
60
-
61
- @auto_id_block = options[:auto_id] || block
62
- reset
63
- end
64
-
65
- attr_accessor :key, :rs, :fs, :nl, :le, :io, :auto_id
66
-
67
- attr_reader :vs
68
-
69
- def reset
70
- @auto_id = @auto_id_block ? @auto_id_block.call : default_auto_id
71
- end
72
-
73
- private
74
-
75
- def default_auto_id(n = 0)
76
- lambda { n += 1 }
77
- end
78
-
79
- end
80
- end
81
- end
@@ -1,252 +0,0 @@
1
- # encoding: utf-8
2
-
3
- #--
4
- ###############################################################################
5
- # #
6
- # A component of ruby-nuggets, some extensions to the Ruby programming #
7
- # language. #
8
- # #
9
- # Copyright (C) 2007-2014 Jens Wille #
10
- # #
11
- # Authors: #
12
- # Jens Wille <jens.wille@gmail.com> #
13
- # #
14
- # ruby-nuggets is free software; you can redistribute it and/or modify it #
15
- # under the terms of the GNU Affero General Public License as published by #
16
- # the Free Software Foundation; either version 3 of the License, or (at your #
17
- # option) any later version. #
18
- # #
19
- # ruby-nuggets is distributed in the hope that it will be useful, but WITHOUT #
20
- # ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or #
21
- # FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero General Public License #
22
- # for more details. #
23
- # #
24
- # You should have received a copy of the GNU Affero General Public License #
25
- # along with ruby-nuggets. If not, see <http://www.gnu.org/licenses/>. #
26
- # #
27
- ###############################################################################
28
- #++
29
-
30
- require 'nuggets/hash/idmap'
31
-
32
- module Nuggets
33
- module Midos
34
- class Writer < Base
35
-
36
- DEFAULT_IO = $stdout
37
-
38
- class << self
39
-
40
- def write(*args, &block)
41
- new(extract_options!(args), &block).write(*args)
42
- end
43
-
44
- def write_file(*args, &block)
45
- file_method(:write, 'w', *args, &block)
46
- end
47
-
48
- def open(*args, &block)
49
- file_method(nil, 'w', *args, &block)
50
- end
51
-
52
- end
53
-
54
- def vs=(vs)
55
- vs.is_a?(::String) ? @vs = vs : raise(::TypeError,
56
- "wrong argument type #{vs.class} (expected String)")
57
- end
58
-
59
- def write(records, *args)
60
- if records.is_a?(::Hash)
61
- records.each { |id, record| write_i(id, record, *args) }
62
- else
63
- records.each { |record| write_i(nil, record, *args) }
64
- end
65
-
66
- self
67
- end
68
-
69
- def put(record, *args)
70
- if record.is_a?(::Hash)
71
- write_i(nil, record, *args)
72
- else
73
- write_i(*args.unshift(*record))
74
- end
75
-
76
- self
77
- end
78
-
79
- alias_method :<<, :put
80
-
81
- private
82
-
83
- def write_i(id, record, io = io)
84
- return if record.empty?
85
-
86
- if @key && !record.key?(@key)
87
- record[@key] = id || @auto_id.call
88
- end
89
-
90
- record.each { |k, v|
91
- if v
92
- if k
93
- v = v.is_a?(::Array) ? v.join(@vs) : v.to_s
94
- io << k << @fs << v.gsub("\n", @nl) << @le
95
- else
96
- Array(v).each { |w| io << w.to_s << @le }
97
- end
98
- end
99
- }
100
-
101
- io << @rs << @le << @le
102
- end
103
-
104
- class Thesaurus < self
105
-
106
- PROLOGUE = {
107
- :PAR => '1011111111110000000010001000000000000010',
108
- :DAT => '00000000',
109
- :DES => 'DE',
110
- :TOP => 'TP~TP',
111
- :KLA => 'CC~CC',
112
- :OBR => 'BT~BT',
113
- :UTR => 'NT~NT',
114
- :SYN => 'UF~USE',
115
- :FRU => 'PT~PT für',
116
- :VER => 'RT~RT',
117
- :SP1 => 'ENG~ENG für',
118
- :SP2 => 'FRA~FRA für',
119
- :SP3 => 'SPA~SPA für',
120
- :SP4 => 'ITA~ITA für',
121
- :SP5 => 'GRI~GRI für',
122
- :SP6 => 'RUS~RUS für',
123
- :SP7 => 'POL~POL für',
124
- :SP8 => 'UNG~UNG für',
125
- :SP9 => 'TSC~TSC für',
126
- :SN1 => 'SN1',
127
- :SN2 => 'SN2',
128
- :SN3 => 'SN3',
129
- :SN4 => 'SN4',
130
- :SN5 => 'SN5',
131
- :DA1 => 'DATE1',
132
- :DA2 => 'DATE2',
133
- :DA3 => 'DATE3',
134
- :DA4 => 'DATE4',
135
- :KLD => 'MIDOS Thesaurus',
136
- :KOM => ' / ',
137
- :KO1 => 'UF',
138
- :KO2 => 'USE',
139
- :TLE => ' 32000 Zeichen',
140
- :PAW => '',
141
- :ART => '00000',
142
- :REL => ' 17 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 18 19 20 21 22 23 24 25'
143
- }
144
-
145
- EPILOGUE = {
146
- :DE => '*****NICHTDESKRIPTORRELATIONEN*****'
147
- }
148
-
149
- RESOLVE_FROM = [:OBR, :UTR, :VER]
150
-
151
- RESOLVE_TO = :DES
152
-
153
- NAME = :KLD
154
-
155
- class << self
156
-
157
- def write(*args, &block)
158
- new(extract_options!(args), &block).instruct! { |mth| mth.write(*args) }
159
- end
160
-
161
- def open(*args, &block)
162
- super { |mth| mth.instruct!(&block) }
163
- end
164
-
165
- end
166
-
167
- def initialize(options = {}, prologue = {}, epilogue = {}, &block)
168
- super(options, &block)
169
-
170
- prologue[self.class::NAME] ||= options[:name]
171
-
172
- @prologue = self.class::PROLOGUE.merge(prologue)
173
- @epilogue = self.class::EPILOGUE.merge(epilogue)
174
- end
175
-
176
- attr_reader :prologue, :epilogue
177
-
178
- def instruct!(*args)
179
- put(prologue, *args)
180
- yield self
181
- put(epilogue, *args)
182
- end
183
-
184
- private
185
-
186
- def merge_records(hash, records, *args)
187
- args = [hash, records, *resolve_from_to(*args)]
188
-
189
- records.each { |id, record|
190
- new_record = hash[id] = {}
191
- record.each { |key, value| new_record[key] = resolve(key, value, *args) }
192
- }
193
- end
194
-
195
- def resolve_from_to(from = nil, to = prologue[RESOLVE_TO])
196
- if from.nil? || from == true
197
- from = prologue.values_at(*RESOLVE_FROM).map { |v| v.split('~').first }
198
- end
199
-
200
- [from, to]
201
- end
202
-
203
- def resolve(key, value, hash, records, from = nil, to = nil)
204
- from && from.include?(key) ? value.map { |id| records[id][to] } : value
205
- end
206
-
207
- end
208
-
209
- class ThesaurusX < Thesaurus
210
-
211
- PROLOGUE = {
212
- 'MTX-PARAMETER' => '',
213
- :BEZ => 'MIDOS Thesaurus',
214
- :KOM => ' / ',
215
- :TXL => 0,
216
- :REL => '',
217
- nil => %w[
218
- TT1|Topterm|TT1||||||
219
- BT1|Oberbegriff|BT1||||||
220
- NT1|Unterbegriff|NT1||||||
221
- RT1|Verwandter\ Begriff|RT1||||||
222
- SY1|Synonym1|SY1|SY1FOR|||||
223
- ]
224
- }
225
-
226
- EPILOGUE = {}
227
-
228
- NAME = :BEZ
229
-
230
- private
231
-
232
- def merge_records(hash, *)
233
- idmap = hash[:__list__] = ::Hash.idmap
234
-
235
- super
236
-
237
- idmap.replace(nil => idmap.map { |key, id| "#{key}|DE|#{id}" })
238
- end
239
-
240
- def resolve_from_to(*)
241
- # nothing to do
242
- end
243
-
244
- def resolve(key, value, hash, *)
245
- value.map { |id| hash[:__list__][id] }
246
- end
247
-
248
- end
249
-
250
- end
251
- end
252
- end