nodepile 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rspec +2 -0
- data/.rubocop.yml +1 -1
- data/BACKLOG.md +34 -0
- data/Rakefile +92 -2
- data/lib/nodepile/base_structs.rb +62 -0
- data/lib/nodepile/colspecs.rb +562 -0
- data/lib/nodepile/gross_actions.rb +38 -0
- data/lib/nodepile/gviz.rb +108 -0
- data/lib/nodepile/keyed_array.rb +386 -0
- data/lib/nodepile/pile_organizer.rb +258 -0
- data/lib/nodepile/pragmas.rb +97 -0
- data/lib/nodepile/rec_source.rb +329 -0
- data/lib/nodepile/rule_eval.rb +155 -0
- data/lib/nodepile/version.rb +1 -1
- data/nodepile.gemspec +53 -0
- data/tmp/.gitignore +1 -0
- metadata +136 -19
@@ -0,0 +1,562 @@
|
|
1
|
+
#
|
2
|
+
# colspecs.rb
|
3
|
+
#
|
4
|
+
# Specification of the column names that have predefined meaning
|
5
|
+
# when encountered in a input file.
|
6
|
+
#
|
7
|
+
|
8
|
+
require 'set'
|
9
|
+
require 'yaml'
|
10
|
+
require 'nodepile/keyed_array.rb'
|
11
|
+
require 'nodepile/base_structs.rb'
|
12
|
+
|
13
|
+
module Nodepile
|
14
|
+
|
15
|
+
# This class provides information about the valid columns for potential use
|
16
|
+
# in documentation and also provides facilities for doing per-line verification
|
17
|
+
# of column values within a single line.
|
18
|
+
# that can appear on a non-header line of an input file.
|
19
|
+
# Note that the best way to think of this class is as a scanner which is in some sense
|
20
|
+
# stateless
|
21
|
+
#
|
22
|
+
# Records generated by the #parse method and related methods will by default
|
23
|
+
# set metadata fields, particularly including:
|
24
|
+
# '@type' = :node, :edge, :rule, :pragma
|
25
|
+
# '@key' = String or [String,String] for node or edge respectively
|
26
|
+
class InputColumnSpecs
|
27
|
+
|
28
|
+
class InvalidRecordError < StandardError
|
29
|
+
attr_accessor :rec_num,:file_path # use to add error detail
|
30
|
+
def initialize(msg) = @msg = msg
|
31
|
+
def message
|
32
|
+
prefix = "Nodepile parsing error at record [#{self.rec_num||'?'}] from source [#{self.file_path||'?'}]: "
|
33
|
+
return (!self.rec_num.nil? || !self.file_path.nil?) ? (prefix + @msg) : @msg
|
34
|
+
end
|
35
|
+
end # parsing errors throw this
|
36
|
+
|
37
|
+
DEFAULT_ID_DELIMITER = ',' # may be used in _link_from and _link_to for multiple edges
|
38
|
+
DEFAULT_PRAGMA_MARKER = "#pragma "
|
39
|
+
|
40
|
+
|
41
|
+
public
|
42
|
+
# Provide a simple hash of field names and their meaning/use.
|
43
|
+
def self.coldefs
|
44
|
+
@@class_mcache.cache(__method__){||
|
45
|
+
h = YAML.load(defined?(DATA) ? DATA.read :
|
46
|
+
/__END__\s+(.*)/m.match(File.read(__FILE__))[1]
|
47
|
+
)['data']['fields']
|
48
|
+
h # this value is cached
|
49
|
+
}
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.val_is_pattern?(s)
|
53
|
+
s[0] == '/' ? :pattern : nil
|
54
|
+
end
|
55
|
+
|
56
|
+
# List the most crucial columns that indicate the existence of
|
57
|
+
# nodes, edges, and styling instructions.
|
58
|
+
def self.id_cols; %w(_id _links_from _links_to); end #do not reorder
|
59
|
+
def self.all_cols; coldefs().keys; end
|
60
|
+
|
61
|
+
# Defines the characters that will be interpreted as delimiting entity "id"
|
62
|
+
# values.
|
63
|
+
attr_accessor :id_delimiter
|
64
|
+
|
65
|
+
# Creates a customized InputColumnSpecs object based on the column names and order
|
66
|
+
# that are included in one specific file. That object can then be used
|
67
|
+
# ONLY to validate that specific file. See the #coldefs
|
68
|
+
# @param col_names[Array<String>] Order of column data expected from calls to
|
69
|
+
# #validate
|
70
|
+
# @param id_delimiter[String] Indicates a character that will be considered
|
71
|
+
# a delimiter between ids so that multiple may occupy the
|
72
|
+
# field
|
73
|
+
# @param pragmas[String,nil] If nil, "pragmas" are not identified.
|
74
|
+
# If true, then when the _id field is started with the
|
75
|
+
# "#pragma", it is identified as a pragma and made available
|
76
|
+
# through the #each_pragma method. If a string, then
|
77
|
+
# any record whose _id column starts with that string is
|
78
|
+
# considered a pragma. Note that ONLY the _id column of
|
79
|
+
# a pragma record is captured.
|
80
|
+
# @param metadata_key_prefix [String,nil] During #parse and related methods
|
81
|
+
# records are yielded in the form of KeyedArrayAccessor objects
|
82
|
+
# that have both the loaded data and also metadata about the
|
83
|
+
# records such as the type of the entity and whether its
|
84
|
+
# existence was triggered explicitly or implicitly. This
|
85
|
+
# value is is passed to the KeyedArrayAccessor.
|
86
|
+
# @raise InvalidRecordError
|
87
|
+
def initialize(col_names,id_delimiter: DEFAULT_ID_DELIMITER,pragmas: DEFAULT_PRAGMA_MARKER,
|
88
|
+
metadata_key_prefix: '@')
|
89
|
+
@col_names = col_names.dup.freeze
|
90
|
+
@id_cols_indices = self.class.id_cols.map{|cnm| @col_names.find_index(cnm)}.freeze
|
91
|
+
@id_delimiter = id_delimiter
|
92
|
+
@pragma_marker = pragmas
|
93
|
+
@empty_kv_array = KeyedArrayAccessor.new(@col_names,Array.new(@col_names.length).freeze)
|
94
|
+
raise InvalidRecordError.new(<<~ERRMSG) if @id_cols_indices[0].nil?
|
95
|
+
A valid record set must contain an '_id' column
|
96
|
+
ERRMSG
|
97
|
+
@metadata_key_prefix = metadata_key_prefix
|
98
|
+
@md_pfxs = [(@metadata_key_prefix||'')+'type',
|
99
|
+
(@metadata_key_prefix||'')+'key',
|
100
|
+
(@metadata_key_prefix||'')+'is_implied',
|
101
|
+
]
|
102
|
+
|
103
|
+
@mc = CrudeCalculationCache.new
|
104
|
+
end
|
105
|
+
|
106
|
+
|
107
|
+
|
108
|
+
# Given a string representing the contents of the "_id", "_links_to", or "_links_from" field,
|
109
|
+
# this method will split it into zero or more tokens representing either ids or
|
110
|
+
# or else patterns. Patterns start with the question mark character.
|
111
|
+
# Leading and trailing spaces are stripped before return.
|
112
|
+
# @param id_containing_field [String] Any of the possible id containing fields
|
113
|
+
# @return [Array<String>] zero or more
|
114
|
+
def split_ids(id_containing_field, &block)
|
115
|
+
# very simple implementation (make smarter later???)
|
116
|
+
return [] if id_containing_field.nil?
|
117
|
+
return enum_for(:split_ids,id_containing_field) unless block_given?
|
118
|
+
raise "A field containing a rule calculation may not contain other ids" if /,\s*\?/ =~ id_containing_field
|
119
|
+
id_containing_field.split(@id_delimiter).tap{|a2|
|
120
|
+
a2.each{|s|
|
121
|
+
s.strip!
|
122
|
+
yield s unless s == ''
|
123
|
+
}
|
124
|
+
}
|
125
|
+
end
|
126
|
+
|
127
|
+
# Given a single "record" (which may define zero or more entities or contain errors)
|
128
|
+
# this method will yield once for each "entity" or "rule" that may be inferred
|
129
|
+
# by that record. The "entities" defined by a given record are determined by
|
130
|
+
# three fields: _id, _links_from, and _links_to.
|
131
|
+
#
|
132
|
+
# The entries in these fields can indicate several things:
|
133
|
+
# 1) The explicit existence and attribute values for a node
|
134
|
+
# 2) Override values for a node or pattern of nodes
|
135
|
+
# 3) The implicit existence of a node (because an explicit node links explicitly to/from it)
|
136
|
+
# 4) The explicit existence of an edge (because an edge is explicitly in the to/from fields)
|
137
|
+
# and attribute values for the edge
|
138
|
+
# 5) The implicit existence of an edge (because an edge is implied by a rule in the to/from)
|
139
|
+
# and attribute values for the edge
|
140
|
+
#
|
141
|
+
# Note that when metadata is attached to the KeyedArrayAccessors, it the metada will
|
142
|
+
# be updated to include the following key-values.
|
143
|
+
# * 'type' = :node, :edge, :rule, :pragma
|
144
|
+
# * 'key' = either a single String of nodes/node-rules or an array of two strings for edges
|
145
|
+
# and edge-rules
|
146
|
+
# * 'is_implied' = true,nil to indicate whether the entity is implied
|
147
|
+
#
|
148
|
+
# @param col_value_array [Array] Column values in exact order of column names
|
149
|
+
# provided when this object was constructed.
|
150
|
+
# @return [Integer] Number of entities encountered. Note that zero is valid.
|
151
|
+
# @param metadata [Hash,nil] If provided, the given metadata will be attached to each of the
|
152
|
+
# KeyedArrayAccessors that are yielded along with metadata about
|
153
|
+
# this particular entity. Note that the hash passed in will be altered
|
154
|
+
# in two ways. Firstly, if a @metadata_key_prefix is specified, all keys
|
155
|
+
# will be changed to include this prefix (if they aren't already).
|
156
|
+
# Secondly, the three additional metadata key-values will be added
|
157
|
+
# (type, key, is_implied).
|
158
|
+
# @param metadata_key_prefix [String,nil] See KeyedArrayAccessor#initialize for detail.
|
159
|
+
# If provided, this string will be foreced to appear at the beginning
|
160
|
+
# of every metadata key.
|
161
|
+
# @param source [String,nil,Object] see KeyedArrayAccessor#initialize for detail
|
162
|
+
# @param ref_num [Integer,nil] see KeyedArrayAccessor#iniialize for detail
|
163
|
+
# @raise [InvalidRecordError] If errors or omissions in data make it uninterpretable
|
164
|
+
# @yieldparam [Nodepile::KeyedArrayAccessor] A single node, edge, or rule taken extracted
|
165
|
+
# from the record. Note that the id, links_to, and links_from
|
166
|
+
# fields may be altered in the return value.
|
167
|
+
def parse(col_value_array,source: nil, ref_num: nil,metadata: nil,&entity_receiver)
|
168
|
+
#see below in this file for the various preprocessing defined
|
169
|
+
_preprocs.each{|(ix,preproc_block)|
|
170
|
+
col_value_array[ix] = preproc_block.call(col_value_array[ix])
|
171
|
+
}
|
172
|
+
_validators.each{|(vl_col_nums,val_block)|
|
173
|
+
errmsg = val_block.call(*vl_col_nums.map{|i| i && col_value_array[i]}) # test the specified column values
|
174
|
+
raise InvalidRecordError.new(errmsg) if errmsg
|
175
|
+
}
|
176
|
+
if metadata && (@metadata_key_prefix||'') != ''
|
177
|
+
# if necessary, facilitate quick attachment of metadata to KeyedArrayAccessor
|
178
|
+
metadata.transform_keys{|k| k.start_with?(@metadata_key_prefix) ? k : @metadata_key_prefix + k}
|
179
|
+
end
|
180
|
+
metadata ||= Hash.new
|
181
|
+
# following proc is used to package up the return value at multiplel places below
|
182
|
+
yieldval_bldr = Proc.new{|kaa,*three_md_fields|
|
183
|
+
(0..(@md_pfxs.length-1)).each{|i| metadata[@md_pfxs[i]] = three_md_fields[i]}
|
184
|
+
kaa.reset_metadata(metadata,metadata_key_prefix: @metadata_key_prefix)
|
185
|
+
kaa
|
186
|
+
}
|
187
|
+
ids, links_from, links_to = @id_cols_indices.map{|i| i && col_value_array[i]}
|
188
|
+
return 0 if ids&.start_with?('#') # ignore these records
|
189
|
+
base_kva = KeyedArrayAccessor.new(@col_names, col_value_array, source: source, ref_num: ref_num)
|
190
|
+
if @pragma_marker && ids&.start_with?(@pragma_marker)
|
191
|
+
# pragmas get shortcut treatment, not keyed, ignore all other columns
|
192
|
+
yield yieldval_bldr(base_kva,:pragma,nil,false) if block_given?
|
193
|
+
return 1 # pragmas do not have links, or multiple ids
|
194
|
+
end
|
195
|
+
entity_count = 0
|
196
|
+
lf_list = split_ids(links_from).to_a
|
197
|
+
lt_list = split_ids(links_to).to_a
|
198
|
+
if !ids.nil?
|
199
|
+
edge_list = Array.new
|
200
|
+
else
|
201
|
+
# for pure edges, add them to list for later yielding
|
202
|
+
edge_list = lf_list.to_a.product(lt_list.to_a)
|
203
|
+
.map{|(lf,lt)|
|
204
|
+
kva = base_kva.dup
|
205
|
+
kva['_links_from'] = lf
|
206
|
+
kva['_links_to'] = lt
|
207
|
+
[lf,lt,kva ]
|
208
|
+
}
|
209
|
+
end #detecting pure edges
|
210
|
+
|
211
|
+
split_ids(ids).each{|id|
|
212
|
+
kva = base_kva.dup.tap{|kva|
|
213
|
+
kva['_id'] = id
|
214
|
+
kva['_links_from'] = nil
|
215
|
+
kva['_links_to'] = nil
|
216
|
+
}
|
217
|
+
entity_count += 1
|
218
|
+
yield yieldval_bldr.call(kva,id[0] == '?' ? :rule : :node,id.freeze,false) if block_given?
|
219
|
+
# emit any implicitly existing nodes
|
220
|
+
(lf_list + lt_list).each{|link|
|
221
|
+
if !link.start_with?('?')
|
222
|
+
entity_count += 1
|
223
|
+
# implied nodes have cleared value except their key
|
224
|
+
kva = base_kva.dup.tap{|x|
|
225
|
+
x['_id'] = link
|
226
|
+
x['_links_from'] = nil
|
227
|
+
x['_links_to'] = nil
|
228
|
+
}
|
229
|
+
yield yieldval_bldr.call(kva,:node,link.freeze,true) if block_given?
|
230
|
+
end
|
231
|
+
}
|
232
|
+
# Flag edges the go from/to _id. Note, you can't define rules this way.
|
233
|
+
(lf_list.product([id]) + [id].product(lt_list)).each{|a|
|
234
|
+
next if a.any?{|v| v.start_with?('?')} # rules can't imply an edge
|
235
|
+
kva = @empty_kv_array.dup
|
236
|
+
kva['_links_from'] = a[0]
|
237
|
+
kva['_links_to'] = a[1]
|
238
|
+
kva.source = base_kva.source
|
239
|
+
kva.ref_num = base_kva.ref_num
|
240
|
+
edge_list << [a[0],a[1],kva]
|
241
|
+
}
|
242
|
+
}
|
243
|
+
edge_list.each{|(n1,n2,kva)|
|
244
|
+
entity_count += 1
|
245
|
+
et = (n1.start_with?('?') || n2.start_with?('?')) ? :rule : :edge
|
246
|
+
yield yieldval_bldr.call(kva,et,[n1,n2].freeze,false) if block_given?
|
247
|
+
}
|
248
|
+
return entity_count
|
249
|
+
end
|
250
|
+
|
251
|
+
|
252
|
+
# Bulk parse is a convenience method for parsing a source of records. It is essentially
|
253
|
+
# the same as instantiating an object using the first record and then calling parse multiple times
|
254
|
+
#
|
255
|
+
# For information on most of the parameters, see the #parse method
|
256
|
+
#
|
257
|
+
# @param rec_source [Enumerable<Array<String>>] first record is presumed to be
|
258
|
+
# the header and all other lines will be forced into the #parse
|
259
|
+
# method.
|
260
|
+
# @return [Integer, Enumerator] If a block is passed in, returns the total of all
|
261
|
+
# entities that were yielded from the source. Otherwise
|
262
|
+
# returns an enumerator.
|
263
|
+
#
|
264
|
+
def self.bulk_parse(rec_source,source: nil,metadata: nil, metadata_key_prefix: nil, &entity_receiver)
|
265
|
+
return enum_for(:bulk_parse,rec_source, source:, metadata:, metadata_key_prefix:) unless block_given?
|
266
|
+
hdr_vals = rec_source.next
|
267
|
+
specs = InputColumnSpecs.new(hdr_vals)
|
268
|
+
rec_count = 0
|
269
|
+
begin
|
270
|
+
loop do
|
271
|
+
next_rec = rec_source.next
|
272
|
+
rec_count += specs.parse(next_rec,source:, ref_num: rec_count+2,metadata:,&entity_receiver)
|
273
|
+
end
|
274
|
+
rescue StopIteration
|
275
|
+
#no-op
|
276
|
+
end
|
277
|
+
return rec_count
|
278
|
+
end
|
279
|
+
|
280
|
+
# Utility class returned by the #make_pattern_match_verifier() method
|
281
|
+
#
|
282
|
+
# It holds tests that can be used to confirm whether a pattern matches
|
283
|
+
# aspects of a given node.
|
284
|
+
#
|
285
|
+
# Example Pattern Strings:
|
286
|
+
# 1) "?/^alpha/" matches type == :node where key starts with "alpha"
|
287
|
+
# 2) "beta" mates type == :node where key is exactly "beta"
|
288
|
+
#
|
289
|
+
class PatternMatchVerifier
|
290
|
+
ALWAYS_TRUE_PROC = Proc.new{true}
|
291
|
+
def initialize(pattern_string)
|
292
|
+
@non_id_test = ALWAYS_TRUE_PROC
|
293
|
+
@id_test = nil
|
294
|
+
@pattern_string = pattern_string
|
295
|
+
case pattern_string
|
296
|
+
when /^\s*\?\s*\/(.*)\/\s*$/
|
297
|
+
rx = Regexp.new($1)
|
298
|
+
@id_test = Proc.new{|id| rx.match?(id)}
|
299
|
+
else
|
300
|
+
exact_id = pattern_string.strip # match with the exact (trimmed) string
|
301
|
+
@id_test = Proc.new{|id| id == exact_id }
|
302
|
+
end
|
303
|
+
end #initialize
|
304
|
+
|
305
|
+
|
306
|
+
def inspect = "#<#{self.class} 0x#{object_id} pattern_string=#{@pattern_string.inspect}> "
|
307
|
+
|
308
|
+
|
309
|
+
# Exclusively test whether the given node id would be acceptable for this
|
310
|
+
# verifier.
|
311
|
+
#
|
312
|
+
# @param test_id_string [String]
|
313
|
+
def id_match?(test_id_string) = @id_test.call(test_id_string)
|
314
|
+
|
315
|
+
# Exclusively test whether any of the non-id aspects of the node would be
|
316
|
+
# acceptable for this verifier.
|
317
|
+
# @param node_entity_packet [Nodepile::EntityPacket]
|
318
|
+
def non_id_match?(node_entity_packet) = @non_id_test.call(node_entity_packet)
|
319
|
+
|
320
|
+
# Perform both the id_match?() and return their logical AND
|
321
|
+
def match?(nep) = id_match?(nep.key) && non_id_match?(nep)
|
322
|
+
end #class PatternMatchVerifier
|
323
|
+
|
324
|
+
# "Rule" type entities are characterized by having one or more "patterns"
|
325
|
+
# that are used to determine which of the nodes a given rule should apply to.
|
326
|
+
# Most often, the patterns specify sets of node IDs would satisfy them
|
327
|
+
# such as through regular expression matching. However, future instances
|
328
|
+
# may use field values to determine matching.
|
329
|
+
#
|
330
|
+
# For explanation of pattern logic see the PatternMatchVerifier class
|
331
|
+
#
|
332
|
+
|
333
|
+
def self.make_pattern_match_verifier(pattern_string)
|
334
|
+
return PatternMatchVerifier.new(pattern_string)
|
335
|
+
end
|
336
|
+
|
337
|
+
|
338
|
+
|
339
|
+
|
340
|
+
private
|
341
|
+
|
342
|
+
|
343
|
+
def _preprocs
|
344
|
+
@mc.cache(__method__){||
|
345
|
+
# collect preproc relevant for the columns present
|
346
|
+
my_preprocs = Array.new
|
347
|
+
@col_names.each_with_index{|nm,ix|
|
348
|
+
self.class._all_preprocs[nm]&.tap{|(skip_nil,block)|
|
349
|
+
my_preprocs << [ix,block].freeze
|
350
|
+
}
|
351
|
+
}
|
352
|
+
my_preprocs.freeze # will get cached
|
353
|
+
} # end cache calculator
|
354
|
+
|
355
|
+
end # _preprocs()
|
356
|
+
|
357
|
+
def _validators
|
358
|
+
@mc.cache(__method__){||
|
359
|
+
my_validators = Array.new
|
360
|
+
# collect validators relevant for the columns present
|
361
|
+
self.class._all_validators.each{|(always,vl_col_names,block)|
|
362
|
+
vl_col_nums = vl_col_names.map{|nm| @col_names.find_index(nm) }
|
363
|
+
if always || vl_col_nums.none?{|v| v.nil?}
|
364
|
+
my_validators << [vl_col_nums.freeze,block].freeze
|
365
|
+
end
|
366
|
+
}
|
367
|
+
my_validators.freeze # this should get cached
|
368
|
+
} # end cache calculator
|
369
|
+
end
|
370
|
+
|
371
|
+
|
372
|
+
|
373
|
+
# A validator is a block used to verify the values in a specific
|
374
|
+
# set of fields. The blocks registered here are compiled into calls
|
375
|
+
# to InputColumnSpecs#new. A validator block should evaluate to nil
|
376
|
+
# if everything is okay. If it evaluates to a string, that string may be
|
377
|
+
# communicated to users as a validation failure.
|
378
|
+
#
|
379
|
+
# @param always [true,false,nil] Indicates that the validator should
|
380
|
+
# be run regardless of whether all fields
|
381
|
+
# are present. Nils will be passed
|
382
|
+
# to the validator for missing fields.
|
383
|
+
# @param col_name_array [Array<String>] These fields must be passed
|
384
|
+
# in this order to the block to
|
385
|
+
# perform the validation.
|
386
|
+
def self._make_validator(col_name_array,always: nil, &validator_block)
|
387
|
+
[always,col_name_array.dup.freeze,validator_block].freeze
|
388
|
+
end
|
389
|
+
|
390
|
+
# Package up field preprocessing into a record for later use
|
391
|
+
# @param skip_nil [Boolean] if true, does no preprocessing if the field value
|
392
|
+
# is nil
|
393
|
+
def self._make_field_preproc(col_name, skip_nil: true, &preproc_block)
|
394
|
+
return [col_name,skip_nil ? Proc.new{|s| s && preproc_block.(s)} : preproc_block]
|
395
|
+
end
|
396
|
+
|
397
|
+
# Package up preprocs for a field using some standard rules. Multiple
|
398
|
+
# rules may apply to the same field.
|
399
|
+
#
|
400
|
+
# * :strip will cause leading and trailing spaces to be removed and
|
401
|
+
# blank fields will be set to nil
|
402
|
+
# * :downcase will cause field contents to be downcased
|
403
|
+
# @param col_name [String] name of the column the preproc applies to
|
404
|
+
# @param std_syms [Array<Symbol>,nil] one or more symbols representing
|
405
|
+
# the preprocs that should be combined. They area applied
|
406
|
+
# in the specified order although is :strip is present it
|
407
|
+
# must appear first. Method is a no-op of the std_syms is
|
408
|
+
# nil
|
409
|
+
def self._make_standard_preproc(col_name,std_syms)
|
410
|
+
return nil if std_syms.nil?
|
411
|
+
nproc = Proc.new{|s|
|
412
|
+
std_syms.each{|instr|
|
413
|
+
case instr
|
414
|
+
when :downcase then s&.downcase!
|
415
|
+
when :strip then s = nil if (s.strip!||s) == ''
|
416
|
+
else
|
417
|
+
raise "Unrecognized preproc found [#{proc_sym.inspect}]"
|
418
|
+
end
|
419
|
+
} #each instruction
|
420
|
+
next s # "return value"
|
421
|
+
} #nproc
|
422
|
+
_make_field_preproc(col_name,skip_nil: true,&nproc)
|
423
|
+
end
|
424
|
+
|
425
|
+
@@class_mcache = CrudeCalculationCache.new
|
426
|
+
|
427
|
+
def self._all_preprocs
|
428
|
+
@@class_mcache.cache(__method__){||
|
429
|
+
h = Hash.new # append to this array
|
430
|
+
# generate preprocs using the flags in the YAML at bottom of this file
|
431
|
+
coldefs.each_pair{|fieldname,fielddata|
|
432
|
+
h[fieldname] = _make_standard_preproc(fieldname,fielddata['preproc']&.map(&:to_sym))
|
433
|
+
}
|
434
|
+
h.freeze # this Hash will get cached
|
435
|
+
} # end cache calculator
|
436
|
+
end # _all_preprocs()
|
437
|
+
|
438
|
+
def self._all_validators
|
439
|
+
@@class_mcache.cache(__method__){||
|
440
|
+
a = Array.new
|
441
|
+
a << _make_validator(['_id','_links_from','_links_to'],always: true){|id,lf,lt|
|
442
|
+
if id.nil? && (lf.nil? ^ lt.nil?)
|
443
|
+
next 'If the _id field is blank, both _links_from and links_to fields must be blank or both populated'
|
444
|
+
end
|
445
|
+
if id&.start_with?('?') && (lf || lt)
|
446
|
+
next 'If the _id field indicates a :rule, _links_from and _links_to must be blank'
|
447
|
+
end
|
448
|
+
if id && (lf&.start_with?('?') || lt&.start_with?('?'))
|
449
|
+
next "If the _id field is populated, you may not put a rule formula in _links_from or _links_to"
|
450
|
+
end
|
451
|
+
next nil
|
452
|
+
}
|
453
|
+
a.freeze # this Array will get cached
|
454
|
+
} # end cache calculator
|
455
|
+
|
456
|
+
end # _all_validators()
|
457
|
+
|
458
|
+
|
459
|
+
|
460
|
+
|
461
|
+
end # class InputColumnSpecs
|
462
|
+
|
463
|
+
end # module Nodepile
|
464
|
+
|
465
|
+
# Below are the column spec to be used for documentation and to some degree
|
466
|
+
__END__
|
467
|
+
---
|
468
|
+
data:
|
469
|
+
fields:
|
470
|
+
_id:
|
471
|
+
description: >
|
472
|
+
Required column in any input file. Can be one of three value types.
|
473
|
+
If it starts with a literal asterisk character or with a literal
|
474
|
+
forward slash character, it indicates the line is a
|
475
|
+
style instruction. If it is blank or whitespace, it indicates the
|
476
|
+
line defines an edge or edge style instruction. Any other value
|
477
|
+
indicates that this line defines a node and the value in this column
|
478
|
+
is interpreted as a unique identifier (node_id) that can be used on
|
479
|
+
other lines to reference this node. Unless otherwise overridden, the
|
480
|
+
_id is used to label the node. Note that your life may be happier
|
481
|
+
if you forbid using commas as part of _id values although it is not
|
482
|
+
forbidden.
|
483
|
+
preproc:
|
484
|
+
- strip
|
485
|
+
|
486
|
+
_links_from:
|
487
|
+
description: >
|
488
|
+
Required if the _id field has been left blank. Specifies one or
|
489
|
+
more node_id values separated by valid delimiter characters. If the
|
490
|
+
first character is an asterisk or forward slash, it indicates that this
|
491
|
+
is a edge styling instruction. Otherwise, this is used to indicate
|
492
|
+
the existence of one or more edges originating from the specified node.
|
493
|
+
preproc:
|
494
|
+
- strip
|
495
|
+
|
496
|
+
_links_to:
|
497
|
+
description: Follows same protocol as _link_from.
|
498
|
+
preproc:
|
499
|
+
- strip
|
500
|
+
|
501
|
+
_label:
|
502
|
+
description: >
|
503
|
+
Indicates a (typically short) label that should appear rather than
|
504
|
+
value of the _id for nodes and edges. For nodes, see also _labelNN
|
505
|
+
which allows specifying node labels in a line-by-line format.
|
506
|
+
#preproc: # no preproc for this one... deliberate blanks may be meaningful
|
507
|
+
|
508
|
+
_labelNN:
|
509
|
+
description: >
|
510
|
+
If present and non-blank, this value supercedes any text in _label
|
511
|
+
column.
|
512
|
+
When a column with this pattern is specified it should replace
|
513
|
+
_labelNN with a integer such as _label3 or _label22 to indicate
|
514
|
+
that the provided text appears on line 3 or line 22 respectively.
|
515
|
+
|
516
|
+
_color:
|
517
|
+
decription: >
|
518
|
+
For nodes, color is the border color of the shape. For edges, this is
|
519
|
+
the actual edge color. There are a very wide variety of ways that
|
520
|
+
color can be specified. Any format supported by the DOT language is
|
521
|
+
permitted. The rock bottom simplest is to use the supported set of
|
522
|
+
simple color words like red, blue, etc.
|
523
|
+
dot_ref: https://graphviz.org/docs/attrs/color/
|
524
|
+
preproc:
|
525
|
+
- strip
|
526
|
+
- downcase
|
527
|
+
|
528
|
+
_fillcolor:
|
529
|
+
description: >
|
530
|
+
For nodes, fillcolor is the background color of the shape.
|
531
|
+
dot_ref: https://graphviz.org/docs/attrs/fillcolor/
|
532
|
+
preproc:
|
533
|
+
- strip
|
534
|
+
- downcase
|
535
|
+
|
536
|
+
_fontcolor:
|
537
|
+
description: >
|
538
|
+
For many entities, defines the text color.
|
539
|
+
dot_ref: https://graphviz.org/docs/attrs/fontcolor/
|
540
|
+
preproc:
|
541
|
+
- strip
|
542
|
+
- downcase
|
543
|
+
|
544
|
+
|
545
|
+
_shape:
|
546
|
+
description: >
|
547
|
+
For nodes, determines the shape of the node. Shape names tend to be
|
548
|
+
either simple things like (box, plain, plaintext, circle, ellipse,
|
549
|
+
etc.) or else it is a record type that is meant to render data
|
550
|
+
in a structured layout.
|
551
|
+
dot_ref: https://graphviz.org/docs/attr-types/shape/
|
552
|
+
preproc:
|
553
|
+
- strip
|
554
|
+
- downcase
|
555
|
+
|
556
|
+
|
557
|
+
|
558
|
+
|
559
|
+
|
560
|
+
|
561
|
+
|
562
|
+
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'nodepile/gviz.rb'
|
2
|
+
require 'nodepile/pile_organizer.rb'
|
3
|
+
|
4
|
+
module Nodepile
|
5
|
+
|
6
|
+
# A set of large scale, batch-like operations
|
7
|
+
module GrossActions
|
8
|
+
|
9
|
+
# Given an output filepath and one or more tabular datafile inputs, render
|
10
|
+
# the visualization of the input datafiles.
|
11
|
+
# @param output_filepath [String] location where the output file should be
|
12
|
+
# written. Note, if the file already exists it
|
13
|
+
# will be overwritten. Note that the file
|
14
|
+
# suffix indicates the output format that will
|
15
|
+
# be used. Valid extensions are: jpg, dot, gif,
|
16
|
+
# svg, png, json, pdf
|
17
|
+
#
|
18
|
+
# @param input_filepaths [Array<String>] One or more input files. Note that the
|
19
|
+
# program will take a best guess at the text file
|
20
|
+
# format with file suffix (csv, tsv) being the
|
21
|
+
# a strong signal. The order of the filenames
|
22
|
+
# is used as their load order.
|
23
|
+
def self.render_to_file(output_filepath, *input_filepaths)
|
24
|
+
pile = Nodepile::PileOrganizer.new
|
25
|
+
gviz = Nodepile::GraphVisualizer.new
|
26
|
+
input_filepaths.each{|fpath|
|
27
|
+
raise "File not found: #{fpath}" unless File.exist?(fpath)
|
28
|
+
pile.load_from_file(fpath)
|
29
|
+
gviz.load(pile.node_records,pile.edge_records,configs: pile.pragmas)
|
30
|
+
gviz.emit_file(output_filepath,configs: pile.pragmas)
|
31
|
+
}
|
32
|
+
return nil # for now, return is meaningless
|
33
|
+
end #render_to_file()
|
34
|
+
|
35
|
+
|
36
|
+
end #module GrossActions
|
37
|
+
|
38
|
+
end #module Nodepile
|