marty 0.5.36 → 0.5.38
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/app/models/marty/data_grid.rb +780 -0
- data/app/models/marty/grid_index_boolean.rb +4 -0
- data/app/models/marty/grid_index_int4range.rb +3 -0
- data/app/models/marty/grid_index_integer.rb +3 -0
- data/app/models/marty/grid_index_numrange.rb +3 -0
- data/app/models/marty/grid_index_string.rb +3 -0
- data/app/models/marty/name_validator.rb +15 -0
- data/db/migrate/100_create_marty_data_grids.rb +16 -0
- data/db/migrate/101_create_marty_grid_index_numranges.rb +27 -0
- data/db/migrate/102_create_marty_grid_index_int4ranges.rb +27 -0
- data/db/migrate/103_create_marty_grid_index_integers.rb +27 -0
- data/db/migrate/104_create_marty_grid_index_strings.rb +27 -0
- data/db/migrate/105_create_marty_grid_index_booleans.rb +27 -0
- data/lib/marty/migrations.rb +3 -4
- data/lib/marty/version.rb +1 -1
- data/spec/dummy/app/models/gemini/state.rb +19 -0
- data/spec/dummy/config/application.rb +1 -1
- data/spec/dummy/db/migrate/20160100000038_create_gemini_states.rb +8 -0
- data/spec/dummy/db/seeds.rb +67 -0
- data/spec/dummy/lib/class_list.rb +1 -1
- data/spec/lib/data_exporter_spec.rb +0 -1
- data/spec/lib/data_importer_spec.rb +0 -1
- data/spec/lib/migrations/vw_marty_postings.sql.expected +2 -2
- data/spec/models/data_grid_spec.rb +614 -0
- data/spec/models/srp_data.csv +55 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/support/spec_setup.rb +23 -0
- metadata +20 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4d2c09a2a2c8901d33786fc13277047462dda37b
|
4
|
+
data.tar.gz: df69eb54ada158f959330b96a2be3e6231a66b30
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 824c8e5fe29067c3c9d5aa5093e01eb28c68acc22ac765d28283997db96fe08fb05f36dedf63974665b1be263c835e8afcbd284fb30090934d592ba5c3402cc4
|
7
|
+
data.tar.gz: 46ba58c67bb858427506a2de15d35a5dadab42cdc63595dfb53967b341049d9e421ea4c006c6ef949df111cef5d197802cbf0c4dd5374f075d381fa18d750ac9
|
@@ -0,0 +1,780 @@
|
|
1
|
+
class Marty::DataGrid < Marty::Base
|
2
|
+
|
3
|
+
# If data_type is nil, assume float
|
4
|
+
DEFAULT_DATA_TYPE = "float"
|
5
|
+
|
6
|
+
INDEX_MAP = {
|
7
|
+
"numrange" => Marty::GridIndexNumrange,
|
8
|
+
"int4range" => Marty::GridIndexInt4range,
|
9
|
+
"integer" => Marty::GridIndexInteger,
|
10
|
+
"string" => Marty::GridIndexString,
|
11
|
+
"boolean" => Marty::GridIndexBoolean,
|
12
|
+
"float" => true
|
13
|
+
}
|
14
|
+
|
15
|
+
ARRSEP = '|'
|
16
|
+
NULLABLE_TYPES = Set["string", "integer"]
|
17
|
+
|
18
|
+
class DataGridValidator < ActiveModel::Validator
|
19
|
+
def validate(dg)
|
20
|
+
|
21
|
+
dg.errors[:base] = "'#{dg.data_type}' not a defined type or class" unless
|
22
|
+
Marty::DataGrid.convert_data_type(dg.data_type)
|
23
|
+
|
24
|
+
dg.errors[:base] = "data must be array of arrays" unless
|
25
|
+
dg.data.is_a?(Array) && dg.data.all? {|a| a.is_a? Array}
|
26
|
+
|
27
|
+
dg.errors[:base] = "metadata must be an array of hashes" unless
|
28
|
+
dg.metadata.is_a?(Array) && dg.metadata.all? {|a| a.is_a? Hash}
|
29
|
+
|
30
|
+
dg.errors[:base] = "metadata must contain only h/v dirs" unless
|
31
|
+
dg.metadata.all? {|h| ["h", "v"].member? h["dir"]}
|
32
|
+
|
33
|
+
dg.errors[:base] = "metadata item attrs must be unique" unless
|
34
|
+
dg.metadata.map {|h| h["attr"]}.uniq.length == dg.metadata.length
|
35
|
+
|
36
|
+
dg.metadata.each do
|
37
|
+
|inf|
|
38
|
+
|
39
|
+
attr, type, keys, dir, rs_keep =
|
40
|
+
inf["attr"], inf["type"], inf["keys"], inf["dir"], inf["rs_keep"]
|
41
|
+
|
42
|
+
unless rs_keep.nil? || rs_keep.empty?
|
43
|
+
m = /\A *(<|<=|>|>=)? *([a-z_]+) *\z/.match(rs_keep)
|
44
|
+
unless m
|
45
|
+
dg.errors[:base] = "invalid grid modifier expression: #{rs_keep}"
|
46
|
+
next
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
dg.errors[:base] = "metadata elements must have attr/type/keys" unless
|
51
|
+
attr && type && keys
|
52
|
+
|
53
|
+
# enforce Delorean attr syntax (a bit Draconian)
|
54
|
+
dg.errors[:base] = "bad attribute '#{attr}'" unless
|
55
|
+
attr =~ /^[a-z][A-Za-z0-9_]*$/
|
56
|
+
|
57
|
+
dg.errors[:base] = "unknown metadata type #{type}" unless
|
58
|
+
Marty::DataGrid.type_to_index(type)
|
59
|
+
|
60
|
+
dg.errors[:base] = "bad metadata keys" unless
|
61
|
+
keys.is_a?(Array) && keys.length>0
|
62
|
+
end
|
63
|
+
|
64
|
+
# Check key uniqueness of vertical/horizontal key
|
65
|
+
# combinations. FIXME: ideally, we should also check for
|
66
|
+
# array/range key subsumption. Those will result in runtime
|
67
|
+
# errors anyway when multiple hits are produced.
|
68
|
+
v_keys = dg.dir_infos("v").map {|inf| inf["keys"]}
|
69
|
+
h_keys = dg.dir_infos("h").map {|inf| inf["keys"]}
|
70
|
+
|
71
|
+
v_zip_keys = v_keys.empty? ? [] : v_keys[0].zip(*v_keys[1..-1])
|
72
|
+
h_zip_keys = h_keys.empty? ? [] : h_keys[0].zip(*h_keys[1..-1])
|
73
|
+
|
74
|
+
dg.errors[:base] = "duplicate horiz. key combination" unless
|
75
|
+
h_zip_keys.uniq.length == h_zip_keys.length
|
76
|
+
|
77
|
+
dg.errors[:base] = "duplicate vertical key combination" unless
|
78
|
+
v_zip_keys.uniq.length == v_zip_keys.length
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
has_mcfly
|
83
|
+
|
84
|
+
lazy_load :data
|
85
|
+
|
86
|
+
validates_presence_of :name, :data, :metadata
|
87
|
+
|
88
|
+
mcfly_validates_uniqueness_of :name
|
89
|
+
validates_with DataGridValidator
|
90
|
+
validates_with Marty::NameValidator, field: :name
|
91
|
+
|
92
|
+
gen_mcfly_lookup :lookup, {
|
93
|
+
name: false,
|
94
|
+
}
|
95
|
+
|
96
|
+
gen_mcfly_lookup :get_all, {}, mode: :all
|
97
|
+
|
98
|
+
cached_mcfly_lookup :lookup_id, sig: 2 do
|
99
|
+
|pt, group_id|
|
100
|
+
find_by_group_id group_id
|
101
|
+
end
|
102
|
+
|
103
|
+
def to_s
|
104
|
+
name
|
105
|
+
end
|
106
|
+
|
107
|
+
def freeze
|
108
|
+
# FIXME: mcfly lookups freeze their results in order to protect
|
109
|
+
# the cache. That doesn't interact correctly with lazy_load which
|
110
|
+
# modifies the attr hash at runtime.
|
111
|
+
self
|
112
|
+
end
|
113
|
+
|
114
|
+
# FIXME: not sure what's the right way to perform the save in a
|
115
|
+
# transaction -- i.e. together with build_index. before_save would
|
116
|
+
# be OK, but then save inside it would cause an infinite loop.
|
117
|
+
def save!
|
118
|
+
if self.changed?
|
119
|
+
transaction do
|
120
|
+
res = super
|
121
|
+
reload
|
122
|
+
build_index
|
123
|
+
res
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
# FIXME: hacky -- save is just save!
|
129
|
+
def save
|
130
|
+
self.save!
|
131
|
+
end
|
132
|
+
|
133
|
+
def self.type_to_index(type)
|
134
|
+
# map given header type to an index class -- uses string index
|
135
|
+
# for ruby classes.
|
136
|
+
return INDEX_MAP[type] if INDEX_MAP[type]
|
137
|
+
INDEX_MAP["string"] if (type.constantize rescue nil)
|
138
|
+
end
|
139
|
+
|
140
|
+
def self.convert_data_type(data_type)
|
141
|
+
# given data_type, convert it to class and or known data type --
|
142
|
+
# returns nil if data_type is invalid
|
143
|
+
|
144
|
+
return DEFAULT_DATA_TYPE if data_type.nil?
|
145
|
+
return data_type if
|
146
|
+
Marty::DataConversion::DATABASE_TYPES.member?(data_type.to_sym)
|
147
|
+
|
148
|
+
data_type.constantize rescue nil
|
149
|
+
end
|
150
|
+
|
151
|
+
def lookup_grid_distinct(pt, h, return_grid_data=false)
|
152
|
+
isets = {}
|
153
|
+
|
154
|
+
(dir_infos("v") + dir_infos("h")).each do
|
155
|
+
|inf|
|
156
|
+
|
157
|
+
dir, type, attr = inf["dir"], inf["type"], inf["attr"]
|
158
|
+
|
159
|
+
next unless h.has_key?(attr)
|
160
|
+
|
161
|
+
v = h[attr]
|
162
|
+
|
163
|
+
ix_class = INDEX_MAP[type] || INDEX_MAP["string"]
|
164
|
+
|
165
|
+
unless v.nil?
|
166
|
+
q = case type
|
167
|
+
when "boolean"
|
168
|
+
"key = ?"
|
169
|
+
when "numrange", "int4range"
|
170
|
+
"key @> ?"
|
171
|
+
else # "string", "integer", AR klass
|
172
|
+
"key @> ARRAY[?]"
|
173
|
+
end
|
174
|
+
|
175
|
+
# FIXME: very hacky -- need to cast numrange/intrange values or
|
176
|
+
# we get errors from PG.
|
177
|
+
v = case type
|
178
|
+
when "string"
|
179
|
+
v.to_s
|
180
|
+
when "numrange"
|
181
|
+
v.to_f
|
182
|
+
when "int4range", "integer"
|
183
|
+
v.to_i
|
184
|
+
when "boolean"
|
185
|
+
v
|
186
|
+
else # AR class
|
187
|
+
v.to_s
|
188
|
+
end
|
189
|
+
|
190
|
+
ixa = ix_class.
|
191
|
+
where(data_grid_id: group_id,
|
192
|
+
created_dt: created_dt,
|
193
|
+
attr: inf["attr"],
|
194
|
+
).
|
195
|
+
where(q, v).uniq.pluck(:index)
|
196
|
+
end
|
197
|
+
|
198
|
+
if v.nil? || (ixa.empty? && NULLABLE_TYPES.member?(type))
|
199
|
+
# NULLABLE_TYPES fields allow NULL key wildcards. If no match
|
200
|
+
# for key was found, try NULL.
|
201
|
+
ixa = ix_class.
|
202
|
+
where(data_grid_id: group_id,
|
203
|
+
created_dt: created_dt,
|
204
|
+
attr: inf["attr"],
|
205
|
+
key: nil,
|
206
|
+
).uniq.pluck(:index)
|
207
|
+
end
|
208
|
+
|
209
|
+
# FIXME: optimization: bail out if one of the sets is empty.
|
210
|
+
# Or, even better, we should submit all the queris together.
|
211
|
+
isets[dir] = isets[dir] ? isets[dir] & ixa : Set.new(ixa)
|
212
|
+
end
|
213
|
+
|
214
|
+
["h", "v"].each do |dir|
|
215
|
+
isets[dir] = Set[0] if !isets[dir] && dir_infos(dir).empty?
|
216
|
+
|
217
|
+
unless isets[dir] or return_grid_data
|
218
|
+
attrs = dir_infos(dir).map { |inf| inf["attr"] }
|
219
|
+
|
220
|
+
raise "#{dir} attrs not provided: %s" % attrs.join(',')
|
221
|
+
end
|
222
|
+
|
223
|
+
raise "Grid #{name}, (#{isets[dir].count}) #{dir} matches > 1." if
|
224
|
+
isets[dir] && isets[dir].count > 1
|
225
|
+
end
|
226
|
+
|
227
|
+
vi, hi = isets["v"].first, isets["h"].first if isets["v"] && isets["h"]
|
228
|
+
|
229
|
+
raise "DataGrid lookup failed #{name}" unless (vi && hi) or lenient or
|
230
|
+
return_grid_data
|
231
|
+
|
232
|
+
modified_data, modified_metadata = modify_grid(h) if return_grid_data
|
233
|
+
|
234
|
+
return {
|
235
|
+
"result" => (data[vi][hi] if vi && hi),
|
236
|
+
"name" => name,
|
237
|
+
"data" => (modified_data if return_grid_data),
|
238
|
+
"metadata" => (modified_metadata if return_grid_data)
|
239
|
+
}
|
240
|
+
end
|
241
|
+
|
242
|
+
# FIXME: using cached_delorean_fn just for the caching -- this is
|
243
|
+
# not expected to be called from Delorean.
|
244
|
+
cached_delorean_fn :find_class_instance, sig: 3 do
|
245
|
+
|pt, klass, v|
|
246
|
+
# FIXME: very hacky -- hard-coded name
|
247
|
+
Marty::DataConversion.find_row(klass, {"name" => v}, pt)
|
248
|
+
end
|
249
|
+
|
250
|
+
def lookup_grid_distinct_entry(pt, h, visited=nil, follow=true,
|
251
|
+
return_grid_data=false)
|
252
|
+
|
253
|
+
# Perform grid lookup, if result is another data_grid, and follow is true,
|
254
|
+
# then perform lookup on the resulting grid. Allows grids to be nested
|
255
|
+
# as multi-grids. If return_grid_data is true, also return the grid
|
256
|
+
# data and metadata
|
257
|
+
# return is a hash for the grid results:
|
258
|
+
#
|
259
|
+
# "result" => <result of running the grid>
|
260
|
+
# "name" => <grid name>
|
261
|
+
# "data" => <grid's data array>
|
262
|
+
# "metadata" => <grid's metadata (array of hashes)>
|
263
|
+
|
264
|
+
vhash = lookup_grid_distinct(pt, h, return_grid_data)
|
265
|
+
|
266
|
+
return vhash if vhash["result"].nil? || !data_type
|
267
|
+
|
268
|
+
c_data_type = Marty::DataGrid.convert_data_type(data_type)
|
269
|
+
|
270
|
+
return vhash if String === c_data_type
|
271
|
+
|
272
|
+
v = Marty::DataGrid.find_class_instance(pt, c_data_type, vhash["result"])
|
273
|
+
|
274
|
+
return vhash.merge({"result" => v}) unless (Marty::DataGrid === v && follow)
|
275
|
+
|
276
|
+
visited ||= []
|
277
|
+
|
278
|
+
visited << self.group_id
|
279
|
+
|
280
|
+
raise "#{self.class} recursion loop detected -- #{visited}" if
|
281
|
+
visited.member?(v.group_id)
|
282
|
+
|
283
|
+
v.lookup_grid_distinct_entry(pt, h, visited, follow, return_grid_data)
|
284
|
+
end
|
285
|
+
|
286
|
+
delorean_instance_method :lookup_grid_distinct_entry,
|
287
|
+
[[Date, Time, ActiveSupport::TimeWithZone], Hash]
|
288
|
+
|
289
|
+
def dir_infos(dir)
|
290
|
+
metadata.select {|inf| inf["dir"] == dir}
|
291
|
+
end
|
292
|
+
|
293
|
+
def self.export_keys(inf)
|
294
|
+
# should unify this with Marty::DataConversion.convert
|
295
|
+
|
296
|
+
type = inf["type"]
|
297
|
+
klass = type.constantize unless INDEX_MAP[type]
|
298
|
+
|
299
|
+
inf["keys"].map do
|
300
|
+
|v|
|
301
|
+
|
302
|
+
case type
|
303
|
+
when "numrange", "int4range"
|
304
|
+
Marty::Util.pg_range_to_human(v)
|
305
|
+
when "boolean"
|
306
|
+
v.to_s
|
307
|
+
when "string", "integer"
|
308
|
+
v.map(&:to_s).join(ARRSEP) if v
|
309
|
+
else
|
310
|
+
# assume it's an AR class
|
311
|
+
v.each do |k|
|
312
|
+
begin
|
313
|
+
# check to see if class instance actually exists
|
314
|
+
Marty::DataGrid.
|
315
|
+
find_class_instance('infinity', klass, k) || raise(NoMethodError)
|
316
|
+
rescue NoMethodError
|
317
|
+
raise "instance #{k} of #{type} not found"
|
318
|
+
end
|
319
|
+
end if v
|
320
|
+
v.join(ARRSEP) if v
|
321
|
+
end
|
322
|
+
end
|
323
|
+
end
|
324
|
+
|
325
|
+
# FIXME: this is only here to appease Netzke add_in_form
|
326
|
+
def export=(text)
|
327
|
+
end
|
328
|
+
|
329
|
+
def export_array
|
330
|
+
# add data type metadata row if not default
|
331
|
+
dt_row = lenient ? ["lenient"] : []
|
332
|
+
dt_row << data_type unless [nil, DEFAULT_DATA_TYPE].member?(data_type)
|
333
|
+
|
334
|
+
meta_rows = dt_row.empty? ? [] : [dt_row]
|
335
|
+
|
336
|
+
meta_rows += metadata.map { |inf|
|
337
|
+
[
|
338
|
+
inf["attr"],
|
339
|
+
inf["type"],
|
340
|
+
inf["dir"],
|
341
|
+
inf["rs_keep"] || "",
|
342
|
+
]
|
343
|
+
}
|
344
|
+
|
345
|
+
v_infos, h_infos = dir_infos("v"), dir_infos("h")
|
346
|
+
|
347
|
+
h_key_rows = h_infos.map do
|
348
|
+
|inf|
|
349
|
+
|
350
|
+
[nil]*v_infos.count + self.class.export_keys(inf)
|
351
|
+
end
|
352
|
+
|
353
|
+
transposed_v_keys = v_infos.empty? ? [[]] :
|
354
|
+
v_infos.map {|inf| self.class.export_keys(inf)}.transpose
|
355
|
+
|
356
|
+
data_rows = transposed_v_keys.each_with_index.map do
|
357
|
+
|keys, i|
|
358
|
+
keys + (self.data[i] || [])
|
359
|
+
end
|
360
|
+
[meta_rows, h_key_rows, data_rows]
|
361
|
+
end
|
362
|
+
|
363
|
+
def export
|
364
|
+
# return null string when called from Netzke on add_in_form
|
365
|
+
return "" if metadata.nil? && data.nil?
|
366
|
+
|
367
|
+
meta_rows, h_key_rows, data_rows = export_array
|
368
|
+
|
369
|
+
Marty::DataExporter.
|
370
|
+
to_csv(meta_rows + [[]] + h_key_rows + data_rows,
|
371
|
+
"col_sep" => "\t",
|
372
|
+
).
|
373
|
+
gsub(/\"\"/, '') # remove "" to beautify output
|
374
|
+
end
|
375
|
+
|
376
|
+
delorean_instance_method :export, []
|
377
|
+
|
378
|
+
def self.parse_fvalue(pt, v, type, klass)
|
379
|
+
return unless v
|
380
|
+
|
381
|
+
case type
|
382
|
+
when "numrange", "int4range"
|
383
|
+
Marty::Util.human_to_pg_range(v)
|
384
|
+
when "integer"
|
385
|
+
v.split(ARRSEP).map do |val|
|
386
|
+
Integer(val) rescue raise "invalid integer: #{val}"
|
387
|
+
end.uniq.sort
|
388
|
+
when "float"
|
389
|
+
v.split(ARRSEP).map do |val|
|
390
|
+
Float(val) rescue raise "invalid float: #{val}"
|
391
|
+
end.uniq.sort
|
392
|
+
when "string"
|
393
|
+
res = v.split(ARRSEP).uniq.sort
|
394
|
+
raise "leading/trailing spaces in elements not allowed" if
|
395
|
+
res.any? {|x| x != x.strip}
|
396
|
+
raise "0-length string not allowed" if res.any?(&:empty?)
|
397
|
+
res
|
398
|
+
when "boolean"
|
399
|
+
case v.downcase
|
400
|
+
when "true", "t"
|
401
|
+
true
|
402
|
+
when "false", "f"
|
403
|
+
false
|
404
|
+
else
|
405
|
+
raise "bad boolean #{v}"
|
406
|
+
end
|
407
|
+
else
|
408
|
+
# AR class
|
409
|
+
# FIXME: won't work if the obj identifier (name) has ARRSEP
|
410
|
+
res = v.split(ARRSEP).uniq
|
411
|
+
res.each do
|
412
|
+
|k|
|
413
|
+
begin
|
414
|
+
# check to see if class instance actually exists
|
415
|
+
Marty::DataGrid.
|
416
|
+
find_class_instance(pt, klass, k) || raise(NoMethodError)
|
417
|
+
rescue NoMethodError
|
418
|
+
raise "instance #{k} of #{type} not found"
|
419
|
+
end
|
420
|
+
end
|
421
|
+
res
|
422
|
+
end
|
423
|
+
end
|
424
|
+
|
425
|
+
def self.maybe_get_klass(type)
|
426
|
+
begin
|
427
|
+
klass = type.constantize unless INDEX_MAP[type]
|
428
|
+
rescue NameError
|
429
|
+
raise "unknown header type/klass: #{type}"
|
430
|
+
end
|
431
|
+
end
|
432
|
+
|
433
|
+
def self.parse_keys(pt, keys, type)
|
434
|
+
klass = maybe_get_klass(type)
|
435
|
+
keys.map do
|
436
|
+
|v|
|
437
|
+
parse_fvalue(pt, v, type, klass)
|
438
|
+
end
|
439
|
+
end
|
440
|
+
|
441
|
+
# parse grid external representation into metadata/data
|
442
|
+
def self.parse(pt, grid_text, options)
|
443
|
+
options[:headers] ||= false
|
444
|
+
options[:col_sep] ||= "\t"
|
445
|
+
|
446
|
+
pt ||= 'infinity'
|
447
|
+
|
448
|
+
rows = CSV.new(grid_text, options).to_a
|
449
|
+
blank_index = rows.find_index {|x| x.all?(&:nil?)}
|
450
|
+
|
451
|
+
raise "must have a blank row separating metadata" unless
|
452
|
+
blank_index
|
453
|
+
|
454
|
+
data_type = nil
|
455
|
+
lenient = false
|
456
|
+
|
457
|
+
# check if there's a data_type definition
|
458
|
+
dt, *x = rows[0]
|
459
|
+
if dt && x.all?(&:nil?)
|
460
|
+
dts = dt.split
|
461
|
+
raise "bad data type '#{dt}'" if dts.count > 2
|
462
|
+
|
463
|
+
lenient = dts.delete "lenient"
|
464
|
+
data_type = dts.first
|
465
|
+
end
|
466
|
+
|
467
|
+
metadata = rows[(data_type || lenient ? 1 : 0)...blank_index].map do
|
468
|
+
|attr, type, dir, rs_keep, key|
|
469
|
+
|
470
|
+
raise "metadata elements must include attr/type/dir" unless
|
471
|
+
attr && type && dir
|
472
|
+
raise "bad dir #{dir}" unless ["h", "v"].member? dir
|
473
|
+
|
474
|
+
res = {
|
475
|
+
"attr" => attr,
|
476
|
+
"type" => type,
|
477
|
+
"dir" => dir,
|
478
|
+
"keys" => key && parse_keys(pt, [key], type),
|
479
|
+
}
|
480
|
+
res["rs_keep"] = rs_keep if rs_keep
|
481
|
+
res
|
482
|
+
end
|
483
|
+
|
484
|
+
v_infos = metadata.select {|inf| inf["dir"] == "v"}
|
485
|
+
h_infos = metadata.select {|inf| inf["dir"] == "h"}
|
486
|
+
|
487
|
+
# keys+data start right after blank_index
|
488
|
+
data_index = blank_index+1
|
489
|
+
|
490
|
+
# process horizontal key rows
|
491
|
+
h_infos.each_with_index do
|
492
|
+
|inf, i|
|
493
|
+
|
494
|
+
row = rows[data_index+i]
|
495
|
+
|
496
|
+
raise "horiz. key row #{data_index+i} must include nil starting cells" if
|
497
|
+
row[0, v_infos.count].any?
|
498
|
+
|
499
|
+
inf["keys"] = parse_keys(pt, row[v_infos.count, row.count], inf["type"])
|
500
|
+
end
|
501
|
+
|
502
|
+
raise "horiz. info keys length mismatch!" unless
|
503
|
+
h_infos.map {|inf| inf["keys"].length}.uniq.count <= 1
|
504
|
+
|
505
|
+
data_rows = rows[data_index+h_infos.count, rows.count]
|
506
|
+
|
507
|
+
# process vertical key columns
|
508
|
+
v_key_cols = data_rows.map {|r| r[0, v_infos.count]}.transpose
|
509
|
+
|
510
|
+
v_infos.each_with_index do |inf, i|
|
511
|
+
inf["keys"] = parse_keys(pt, v_key_cols[i], inf["type"])
|
512
|
+
end
|
513
|
+
|
514
|
+
raise "vert. info keys length mismatch!" unless
|
515
|
+
v_infos.map {|inf| inf["keys"].length}.uniq.count <= 1
|
516
|
+
|
517
|
+
c_data_type = Marty::DataGrid.convert_data_type(data_type)
|
518
|
+
|
519
|
+
raise "bad data type" unless c_data_type
|
520
|
+
|
521
|
+
# based on data type, decide to check using convert or instance
|
522
|
+
# lookup. FIXME: DRY.
|
523
|
+
if String === c_data_type
|
524
|
+
tsym = c_data_type.to_sym
|
525
|
+
|
526
|
+
data = data_rows.map do
|
527
|
+
|r|
|
528
|
+
r[v_infos.count, r.count].map do
|
529
|
+
|v|
|
530
|
+
Marty::DataConversion.convert(v, tsym) if v
|
531
|
+
end
|
532
|
+
end
|
533
|
+
else
|
534
|
+
data = data_rows.map do
|
535
|
+
|r|
|
536
|
+
r[v_infos.count, r.count].map do
|
537
|
+
|v|
|
538
|
+
next v if !v || Marty::DataGrid.
|
539
|
+
find_class_instance(pt, c_data_type, v)
|
540
|
+
|
541
|
+
raise "can't find key '#{v}' for class #{data_type}"
|
542
|
+
end
|
543
|
+
end
|
544
|
+
end
|
545
|
+
|
546
|
+
[metadata, data, data_type, lenient]
|
547
|
+
end
|
548
|
+
|
549
|
+
def self.create_from_import(name, import_text, created_dt=nil)
|
550
|
+
metadata, data, data_type, lenient = parse(created_dt, import_text, {})
|
551
|
+
dg = self.new
|
552
|
+
dg.name = name
|
553
|
+
dg.data = data
|
554
|
+
dg.data_type = data_type
|
555
|
+
dg.lenient = !!lenient
|
556
|
+
dg.metadata = metadata
|
557
|
+
dg.created_dt = created_dt if created_dt
|
558
|
+
dg.save!
|
559
|
+
dg
|
560
|
+
end
|
561
|
+
|
562
|
+
def update_from_import(name, import_text, created_dt=nil)
|
563
|
+
metadata, data, data_type, lenient =
|
564
|
+
self.class.parse(created_dt, import_text, {})
|
565
|
+
|
566
|
+
self.name = name
|
567
|
+
self.data = data
|
568
|
+
self.data_type = data_type
|
569
|
+
self.lenient = !!lenient
|
570
|
+
self.metadata = metadata
|
571
|
+
self.created_dt = created_dt if created_dt
|
572
|
+
save!
|
573
|
+
end
|
574
|
+
|
575
|
+
# FIXME: should be private
|
576
|
+
def build_index
|
577
|
+
# create indices for the metadata
|
578
|
+
metadata.each do
|
579
|
+
|inf|
|
580
|
+
|
581
|
+
attr, type, keys = inf["attr"], inf["type"], inf["keys"]
|
582
|
+
|
583
|
+
# find index class
|
584
|
+
idx_class = Marty::DataGrid.type_to_index(type)
|
585
|
+
|
586
|
+
keys.each_with_index do
|
587
|
+
|k, index|
|
588
|
+
|
589
|
+
gi = idx_class.new
|
590
|
+
gi.attr = attr
|
591
|
+
gi.key = k
|
592
|
+
gi.created_dt = created_dt
|
593
|
+
gi.data_grid_id = group_id
|
594
|
+
gi.index = index
|
595
|
+
gi.save!
|
596
|
+
end
|
597
|
+
end
|
598
|
+
end
|
599
|
+
|
600
|
+
def modify_grid(params)
|
601
|
+
removes = ["h", "v"].each_with_object({}) {|dir, hash| hash[dir] = Set.new}
|
602
|
+
|
603
|
+
metadata_copy, data_copy = metadata.deep_dup, data.deep_dup
|
604
|
+
|
605
|
+
metadata_copy.each do |meta|
|
606
|
+
dir, attr, keys, type, rs_keep = meta.values_at(
|
607
|
+
"dir", "attr", "keys", "type", "rs_keep")
|
608
|
+
next unless rs_keep
|
609
|
+
|
610
|
+
if type == "numrange" || type == "int4range"
|
611
|
+
modop, modvalparm = parse_bounds(rs_keep)
|
612
|
+
modval = params[modvalparm]
|
613
|
+
if modval
|
614
|
+
prune_a, rewrite_a = compute_numeric_mods(keys, modop, modval)
|
615
|
+
removes[dir].merge(prune_a)
|
616
|
+
rewrite_a.each { |(ind, value)| keys[ind] = value }
|
617
|
+
end
|
618
|
+
else
|
619
|
+
modval = params[rs_keep]
|
620
|
+
if modval
|
621
|
+
prune_a, rewrite_a = compute_set_mods(keys, modval)
|
622
|
+
removes[dir].merge(prune_a)
|
623
|
+
rewrite_a.each { |(ind, value)| keys[ind] = value }
|
624
|
+
end
|
625
|
+
end
|
626
|
+
end
|
627
|
+
|
628
|
+
removes.reject! { |dir, set| set.empty? }
|
629
|
+
|
630
|
+
removes.each do
|
631
|
+
|dir, set|
|
632
|
+
metadata_copy.select { |m| m["dir"] == dir }.each do |meta|
|
633
|
+
meta["keys"] = remove_indices(meta["keys"], removes[dir])
|
634
|
+
end
|
635
|
+
end
|
636
|
+
|
637
|
+
data_copy = remove_indices(data_copy, removes["v"]) if removes["v"]
|
638
|
+
|
639
|
+
data_copy.each_index do |index|
|
640
|
+
data_copy[index] = remove_indices(data_copy[index], removes["h"])
|
641
|
+
end if removes["h"]
|
642
|
+
|
643
|
+
[data_copy, metadata_copy]
|
644
|
+
end
|
645
|
+
|
646
|
+
private
|
647
|
+
def remove_indices(orig_array, inds)
|
648
|
+
orig_array.each_with_object([]).with_index do |(item, new_array), index|
|
649
|
+
new_array.push(item) unless inds.include?(index)
|
650
|
+
end
|
651
|
+
end
|
652
|
+
|
653
|
+
def opposite_sign(op) # toggle sign and inclusivity
|
654
|
+
{
|
655
|
+
lt: :ge,
|
656
|
+
le: :gt,
|
657
|
+
gt: :le,
|
658
|
+
ge: :lt,
|
659
|
+
}[op]
|
660
|
+
end
|
661
|
+
|
662
|
+
def compute_numeric_mods(keys, op, val)
|
663
|
+
@keyhash ||= {}
|
664
|
+
prune_a, rewrite_a = [], []
|
665
|
+
|
666
|
+
# features allow multiple values, but for constraint on a grid range
|
667
|
+
# only a scalar is meaningful. so if there are multiple values we
|
668
|
+
# take the first value to use
|
669
|
+
value = val.is_a?(Array) ? val[0] : val
|
670
|
+
keys.each_with_index do |key, index|
|
671
|
+
lhop, orig_lhv, orig_rhv, rhop = @keyhash[key] ||= parse_range(key)
|
672
|
+
|
673
|
+
lhv, rhv = orig_lhv || -Float::INFINITY, orig_rhv || Float::INFINITY
|
674
|
+
|
675
|
+
case op
|
676
|
+
when :ge, :gt
|
677
|
+
next if value > rhv
|
678
|
+
|
679
|
+
if value == rhv
|
680
|
+
if rhop == :le && op == :ge
|
681
|
+
rewrite_a.push(
|
682
|
+
[index, rewrite_range(lhop, orig_lhv, orig_rhv, :lt)])
|
683
|
+
end
|
684
|
+
elsif value > lhv
|
685
|
+
rewrite_a.push(
|
686
|
+
[index, rewrite_range(lhop, orig_lhv, value, opposite_sign(op))])
|
687
|
+
elsif value == lhv && lhop == :ge && op == :gt
|
688
|
+
rewrite_a.push([index, rewrite_range(:ge, value, value, :le)])
|
689
|
+
elsif value <= lhv
|
690
|
+
prune_a.push(index)
|
691
|
+
end
|
692
|
+
when :le, :lt
|
693
|
+
next if value < lhv
|
694
|
+
|
695
|
+
if value == lhv
|
696
|
+
if lhop == :ge && op == :le
|
697
|
+
rewrite_a.push(
|
698
|
+
[index, rewrite_range(:gt, orig_lhv, orig_rhv, rhop)])
|
699
|
+
end
|
700
|
+
elsif value < rhv
|
701
|
+
rewrite_a.push(
|
702
|
+
[index, rewrite_range(opposite_sign(op), value, orig_rhv, rhop)])
|
703
|
+
elsif value == rhv && rhop == :le && op == :lt
|
704
|
+
rewrite_a.push([index, rewrite_range(:ge, value, value, :le)])
|
705
|
+
elsif value >= rhv
|
706
|
+
prune_a.push(index)
|
707
|
+
end
|
708
|
+
end
|
709
|
+
|
710
|
+
end
|
711
|
+
[prune_a, rewrite_a]
|
712
|
+
end
|
713
|
+
|
714
|
+
# value is a list of what to keep
|
715
|
+
def compute_set_mods(keys, val)
|
716
|
+
prune_a, rewrite_a, value = [], [], Array(val)
|
717
|
+
|
718
|
+
keys.each_with_index do |key, index|
|
719
|
+
|
720
|
+
# rewrite any nil (wildcard) keys in the dimension
|
721
|
+
# to be our 'to-keep' val(s)
|
722
|
+
if key.nil?
|
723
|
+
rewrite_a.push([index, value])
|
724
|
+
next
|
725
|
+
end
|
726
|
+
|
727
|
+
remove = key - value
|
728
|
+
if remove == key
|
729
|
+
prune_a.push(index)
|
730
|
+
next
|
731
|
+
end
|
732
|
+
|
733
|
+
rewrite_a.push([index, key - remove]) if remove != []
|
734
|
+
end
|
735
|
+
[prune_a, rewrite_a]
|
736
|
+
end
|
737
|
+
|
738
|
+
def parse_range(key)
|
739
|
+
match = key.match(/\A(\[|\()([0-9\.-]*),([0-9\.-]*)(\]|\))\z/)
|
740
|
+
raise "unrecognized pattern #{key}" unless match
|
741
|
+
|
742
|
+
lboundary, lhs, rhs, rboundary = match[1..4]
|
743
|
+
# convert range values to float for comparison
|
744
|
+
lhv = lhs.blank? ? nil : lhs.to_f
|
745
|
+
rhv = rhs.blank? ? nil : rhs.to_f
|
746
|
+
|
747
|
+
[lboundary == '(' ? :gt : :ge, lhv, rhv, rboundary == ')' ? :lt : :le]
|
748
|
+
end
|
749
|
+
|
750
|
+
def rewrite_range(lb, lhv, rhv, rb)
|
751
|
+
lboundary = lb == :gt ? '(' : '['
|
752
|
+
|
753
|
+
# even though numranges are float type, we don't want to output ".0"
|
754
|
+
# for integer values. So for values like that we convert to int
|
755
|
+
# first before conversion to string
|
756
|
+
lvalue = (lhv.to_i == lhv ? lhv.to_i : lhv).to_s
|
757
|
+
rvalue = (rhv.to_i == rhv ? rhv.to_i : rhv).to_s
|
758
|
+
rboundary = rb == :lt ? ')' : ']'
|
759
|
+
lboundary + lvalue + ',' + rvalue + rboundary
|
760
|
+
end
|
761
|
+
|
762
|
+
def parse_bounds(key)
|
763
|
+
match = key.match(/\A *(<|>|<=|>=)? *([a-z_]+) *\z/)
|
764
|
+
raise "unrecognized pattern #{key}" unless match
|
765
|
+
|
766
|
+
opstr, ident = match[1..2]
|
767
|
+
|
768
|
+
orig_op = {
|
769
|
+
'<' => :lt,
|
770
|
+
'>' => :gt,
|
771
|
+
'<=' => :le,
|
772
|
+
'>=' => :ge,
|
773
|
+
'' => :inc,
|
774
|
+
}[opstr]
|
775
|
+
|
776
|
+
# data grid value is expressed as what to keep
|
777
|
+
# we convert to the opposite (what to prune)
|
778
|
+
[opposite_sign(orig_op), ident]
|
779
|
+
end
|
780
|
+
end
|