rufus-decision 1.2.0 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,5 @@
1
1
  #--
2
- # Copyright (c) 2008-2009, John Mettraux, jmettraux@gmail.com
2
+ # Copyright (c) 2008-2010, John Mettraux, jmettraux@gmail.com
3
3
  #
4
4
  # Permission is hereby granted, free of charge, to any person obtaining a copy
5
5
  # of this software and associated documentation files (the "Software"), to deal
@@ -0,0 +1,679 @@
1
+ #--
2
+ # Copyright (c) 2007-2010, John Mettraux, jmettraux@gmail.com
3
+ #
4
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
5
+ # of this software and associated documentation files (the "Software"), to deal
6
+ # in the Software without restriction, including without limitation the rights
7
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ # copies of the Software, and to permit persons to whom the Software is
9
+ # furnished to do so, subject to the following conditions:
10
+ #
11
+ # The above copyright notice and this permission notice shall be included in
12
+ # all copies or substantial portions of the Software.
13
+ #
14
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
20
+ # THE SOFTWARE.
21
+ #
22
+ # Made in Japan.
23
+ #++
24
+
25
+
26
+ require 'csv'
27
+ require 'open-uri'
28
+
29
+ require 'rufus/dollar'
30
+ require 'rufus/decision/hashes'
31
+
32
+
33
+ module Rufus
34
+ module Decision
35
+
36
+ #
37
+ # A decision table is a description of a set of rules as a CSV (comma
38
+ # separated values) file. Such a file can be edited / generated by
39
+ # a spreadsheet (Excel, Google spreadsheets, Gnumeric, ...)
40
+ #
41
+ # == Disclaimer
42
+ #
43
+ # The decision / CSV table system is no replacement for
44
+ # full rule engines with forward and backward chaining, RETE implementation
45
+ # and the like...
46
+ #
47
+ #
48
+ # == Usage
49
+ #
50
+ # The following CSV file
51
+ #
52
+ # in:topic,in:region,out:team_member
53
+ # sports,europe,Alice
54
+ # sports,,Bob
55
+ # finance,america,Charly
56
+ # finance,europe,Donald
57
+ # finance,,Ernest
58
+ # politics,asia,Fujio
59
+ # politics,america,Gilbert
60
+ # politics,,Henry
61
+ # ,,Zach
62
+ #
63
+ # embodies a rule for distributing items (piece of news) labelled with a
64
+ # topic and a region to various members of a team.
65
+ # For example, all news about finance from Europe are to be routed to
66
+ # Donald.
67
+ #
68
+ # Evaluation occurs row by row. The "in out" row states which field
69
+ # is considered at input and which are to be modified if the "ins" do
70
+ # match.
71
+ #
72
+ # The default behaviour is to change the value of the "outs" if all the
73
+ # "ins" match and then terminate.
74
+ # An empty "in" cell means "matches any".
75
+ #
76
+ # Enough words, some code :
77
+ #
78
+ # require 'rufus/decision'
79
+ #
80
+ # table = Rufus::Decision::Table.new(%{
81
+ # in:topic,in:region,out:team_member
82
+ # sports,europe,Alice
83
+ # sports,,Bob
84
+ # finance,america,Charly
85
+ # finance,europe,Donald
86
+ # finance,,Ernest
87
+ # politics,asia,Fujio
88
+ # politics,america,Gilbert
89
+ # politics,,Henry
90
+ # ,,Zach
91
+ # })
92
+ #
93
+ # h = {}
94
+ # h["topic"] = "politics"
95
+ #
96
+ # table.transform!(h)
97
+ #
98
+ # puts h["team_member"]
99
+ # # will yield "Henry" who takes care of all the politics stuff,
100
+ # # except for Asia and America
101
+ #
102
+ # '>', '>=', '<' and '<=' can be put in front of individual cell values :
103
+ #
104
+ # table = Rufus::Decision::Table.new(%{
105
+ # ,
106
+ # in:fx, out:fy
107
+ # ,
108
+ # >100,a
109
+ # >=10,b
110
+ # ,c
111
+ # })
112
+ #
113
+ # h = { 'fx' => '10' }
114
+ # h = table.transform(h)
115
+ #
116
+ # p h # => { 'fx' => '10', 'fy' => 'b' }
117
+ #
118
+ # Such comparisons are done after the elements are transformed to float
119
+ # numbers. By default, non-numeric arguments will get compared as Strings.
120
+ #
121
+ #
122
+ # == transform and transform!
123
+ #
124
+ # The method transform! acts directly on its parameter hash, the method
125
+ # transform will act on a copy of it. Both methods return their transformed
126
+ # hash.
127
+ #
128
+ #
129
+ # == [ruby] ranges
130
+ #
131
+ # Ruby-like ranges are also accepted in cells.
132
+ #
133
+ # in:f0,out:result
134
+ # ,
135
+ # 0..32,low
136
+ # 33..66,medium
137
+ # 67..100,high
138
+ #
139
+ # will set the field 'result' to 'low' for f0 => 24
140
+ #
141
+ #
142
+ # == Options
143
+ #
144
+ # You can put options on their own in a cell BEFORE the line containing
145
+ # "in:xxx" and "out:yyy" (ins and outs).
146
+ #
147
+ # Three options are supported, "ignorecase", "through" and "accumulate".
148
+ #
149
+ # * "ignorecase", if found by the decision table will make any match (in the
150
+ # "in" columns) case unsensitive.
151
+ #
152
+ # * "through", will make sure that EVERY row is evaluated and potentially
153
+ # applied. The default behaviour (without "through"), is to stop the
154
+ # evaluation after applying the results of the first matching row.
155
+ #
156
+ # * "accumulate", behaves as with "through" set but instead of overriding
157
+ # values each time a match is found, will gather them in an array.
158
+ #
159
+ # an example of 'accumulate'
160
+ #
161
+ # accumulate
162
+ # in:f0,out:result
163
+ # ,
164
+ # ,normal
165
+ # >10,large
166
+ # >100,xl
167
+ #
168
+ # will yield { result => [ 'normal', 'large' ]} for f0 => 56
169
+ #
170
+ # * "unbounded", by default, string matching is 'bounded', "apple" will match
171
+ # 'apple', but not 'greenapple'. When "unbounded" is set, 'greenapple' will
172
+ # match. ('bounded', in reality, means the target value is surrounded
173
+ # by ^ and $)
174
+ #
175
+ # === Setting options at table initialization
176
+ #
177
+ # It's OK to set the options at initialization time :
178
+ #
179
+ # table = Rufus::Decision::Table.new(
180
+ # csv, :ruby_eval => true, :accumulate => true)
181
+ #
182
+ #
183
+ # == Cross references
184
+ #
185
+ # By using the 'dollar notation', it's possible to reference a value
186
+ # already in the hash (that is, the hash undergoing 'transformation').
187
+ #
188
+ # in:value,in:roundup,out:newvalue
189
+ # 0..32,true,32
190
+ # 33..65,true,65
191
+ # 66..99,true,99
192
+ # ,,${value}
193
+ #
194
+ # Here, if 'roundup' is set to true, newvalue will hold 32, 65 or 99
195
+ # as value, else it will simply hold the 'value'.
196
+ #
197
+ # The value is the value as currently found in the transformed hash, not
198
+ # as found in the original (non-transformed) hash.
199
+ #
200
+ #
201
+ # == Ruby code evaluation
202
+ #
203
+ # The dollar notation can be used for yet another trick, evaluation of
204
+ # ruby code at transform time.
205
+ #
206
+ # Note though that this feature is only enabled via the :ruby_eval
207
+ # option of the transform!() method.
208
+ #
209
+ # decisionTable.transform!(h, :ruby_eval => true)
210
+ #
211
+ # That decision table may look like :
212
+ #
213
+ # in:value,in:result
214
+ # 0..32,${r:Time.now.to_f}
215
+ # 33..65,${r:call_that_other_function()}
216
+ # 66..99,${r:${value} * 3}
217
+ #
218
+ # (It's a very simplistic example, but I hope it demonstrates the
219
+ # capabilities of this technique)
220
+ #
221
+ # It's OK to set the :ruby_eval parameter when initializing the decision
222
+ # table :
223
+ #
224
+ # table = Rufus::Decision::Table.new(csv, :ruby_eval => true)
225
+ #
226
+ # so that there is no need to specify it at transform() call time.
227
+ #
228
+ #
229
+ # == See also
230
+ #
231
+ # * http://jmettraux.wordpress.com/2007/02/11/ruby-decision-tables/
232
+ #
233
+ class Table
234
+
235
+ IN = /^in:/
236
+ OUT = /^out:/
237
+ IN_OR_OUT = /^(in|out):/
238
+ NUMERIC_COMPARISON = /^([><]=?)(.*)$/
239
+
240
+ # when set to true, the transformation process stops after the
241
+ # first match got applied.
242
+ #
243
+ attr_accessor :first_match
244
+
245
+ # when set to true, matches evaluation ignores case.
246
+ #
247
+ attr_accessor :ignore_case
248
+
249
+ # when set to true, multiple matches result get accumulated in
250
+ # an array.
251
+ #
252
+ attr_accessor :accumulate
253
+
254
+ # when set to true, evaluation of ruby code for output is allowed. False
255
+ # by default.
256
+ #
257
+ attr_accessor :ruby_eval
258
+
259
+ # false (bounded) by default : exact matches for string matching. When
260
+ # 'unbounded', target 'apple' will match for values like 'greenapples' or
261
+ # 'apple seed'.
262
+ #
263
+ attr_accessor :unbound
264
+
265
+ # The constructor for DecisionTable, you can pass a String, an Array
266
+ # (of arrays), a File object. The CSV parser coming with Ruby will take
267
+ # care of it and a DecisionTable instance will be built.
268
+ #
269
+ # Options are :through, :ignore_case, :accumulate (which
270
+ # forces :through to true when set) and :ruby_eval. See
271
+ # Rufus::Decision::Table for more details.
272
+ #
273
+ # Options passed to this method do override the options defined
274
+ # in the CSV itself.
275
+ #
276
+ # == options
277
+ #
278
+ # * :through : when set, all the rows of the decision table are considered
279
+ # * :ignore_case : case is ignored (not ignored by default)
280
+ # * :accumulate : gather instead of overriding (implies :through)
281
+ # * :ruby_eval : ruby code evaluation is OK
282
+ #
283
+ def initialize (csv, options={})
284
+
285
+ @rows = Rufus::Decision.csv_to_a(csv)
286
+
287
+ extract_options
288
+
289
+ parse_header_row
290
+
291
+ @first_match = false if options[:through] == true
292
+ @first_match = true if @first_match.nil?
293
+
294
+ set_opt(options, :ignore_case, :ignorecase)
295
+ set_opt(options, :accumulate)
296
+ set_opt(options, :ruby_eval)
297
+ set_opt(options, :unbounded)
298
+
299
+ @first_match = false if @accumulate
300
+ end
301
+
302
+ # Like transform, but the original hash doesn't get touched,
303
+ # a copy of it gets transformed and finally returned.
304
+ #
305
+ def transform (hash)
306
+
307
+ transform!(hash.dup)
308
+ end
309
+
310
+ # Passes the hash through the decision table and returns it,
311
+ # transformed.
312
+ #
313
+ def transform! (hash)
314
+
315
+ hash = Rufus::Decision::EvalHashFilter.new(hash) if @ruby_eval
316
+
317
+ @rows.each do |row|
318
+ next unless matches?(row, hash)
319
+ apply(row, hash)
320
+ break if @first_match
321
+ end
322
+
323
+ hash.is_a?(Rufus::Decision::HashFilter) ? hash.parent_hash : hash
324
+ end
325
+
326
+ alias :run :transform
327
+
328
+ # Outputs back this table as a CSV String
329
+ #
330
+ def to_csv
331
+
332
+ @rows.inject([ @header.to_csv ]) { |a, row|
333
+ a << row.join(',')
334
+ }.join("\n")
335
+ end
336
+
337
+ protected
338
+
339
+ def set_opt (options, *optnames)
340
+
341
+ optnames.each do |oname|
342
+
343
+ v = options[oname]
344
+ next unless v != nil
345
+ instance_variable_set("@#{optnames.first.to_s}", v)
346
+ return
347
+ end
348
+ end
349
+
350
+
351
+ # Returns true if the hash matches the in: values for this row
352
+ #
353
+ def matches? (row, hash)
354
+
355
+ @header.ins.each do |x, in_header|
356
+
357
+ in_header = "${#{in_header}}"
358
+
359
+ value = Rufus::dsub(in_header, hash)
360
+
361
+ cell = row[x]
362
+
363
+ next if cell == nil || cell == ''
364
+
365
+ cell = Rufus::dsub(cell, hash)
366
+
367
+ b = if m = NUMERIC_COMPARISON.match(cell)
368
+
369
+ numeric_compare(m, value, cell)
370
+ else
371
+
372
+ range = to_ruby_range(cell)
373
+ range ? range.include?(value) : string_compare(value, cell)
374
+ end
375
+
376
+ return false unless b
377
+ end
378
+
379
+ true
380
+ end
381
+
382
+ def string_compare (value, cell)
383
+
384
+ modifiers = 0
385
+ modifiers += Regexp::IGNORECASE if @ignore_case
386
+
387
+ rcell = @unbounded ?
388
+ Regexp.new(cell, modifiers) : Regexp.new("^#{cell}$", modifiers)
389
+
390
+ rcell.match(value)
391
+ end
392
+
393
+ def numeric_compare (match, value, cell)
394
+
395
+ comparator = match[1]
396
+ cell = match[2]
397
+
398
+ nvalue = Float(value) rescue value
399
+ ncell = Float(cell) rescue cell
400
+
401
+ value, cell = if nvalue.is_a?(String) or ncell.is_a?(String)
402
+ [ "\"#{value}\"", "\"#{cell}\"" ]
403
+ else
404
+ [ nvalue, ncell ]
405
+ end
406
+
407
+ s = "#{value} #{comparator} #{cell}"
408
+
409
+ Rufus::Decision::check_and_eval(s) rescue false
410
+ end
411
+
412
+ def apply (row, hash)
413
+
414
+ @header.outs.each do |x, out_header|
415
+
416
+ value = row[x]
417
+
418
+ next if value == nil || value == ''
419
+
420
+ value = Rufus::dsub(value, hash)
421
+
422
+ hash[out_header] = if @accumulate
423
+ #
424
+ # accumulate
425
+
426
+ v = hash[out_header]
427
+
428
+ if v and v.is_a?(Array)
429
+ v + Array(value)
430
+ elsif v
431
+ [ v, value ]
432
+ else
433
+ value
434
+ end
435
+ else
436
+ #
437
+ # override
438
+
439
+ value
440
+ end
441
+ end
442
+ end
443
+
444
+ def extract_options
445
+
446
+ row = @rows.first
447
+
448
+ return unless row
449
+ # end of table somehow
450
+
451
+ return if row.find { |cell| cell && cell.match(IN_OR_OUT) }
452
+ # just hit the header row
453
+
454
+ row.each do |cell|
455
+
456
+ cell = cell.downcase
457
+
458
+ if cell == 'ignorecase' or cell == 'ignore_case'
459
+ @ignore_case = true
460
+ elsif cell == 'through'
461
+ @first_match = false
462
+ elsif cell == 'accumulate'
463
+ @first_match = false
464
+ @accumulate = true
465
+ elsif cell == 'unbounded'
466
+ @unbounded = true
467
+ end
468
+ end
469
+
470
+ @rows.shift
471
+
472
+ extract_options
473
+ end
474
+
475
+ # Returns true if the first row of the table contains just an "in:" or
476
+ # an "out:"
477
+ #
478
+ def is_vertical_table? (first_row)
479
+
480
+ bin = false
481
+ bout = false
482
+
483
+ first_row.each do |cell|
484
+ bin ||= cell.match(IN)
485
+ bout ||= cell.match(OUT)
486
+ return false if bin and bout
487
+ end
488
+
489
+ true
490
+ end
491
+
492
+ def parse_header_row
493
+
494
+ row = @rows.first
495
+
496
+ return unless row
497
+
498
+ if is_vertical_table?(row)
499
+ @rows = @rows.transpose
500
+ row = @rows.first
501
+ end
502
+
503
+ @rows.shift
504
+
505
+ row.each_with_index do |cell, x|
506
+ next unless cell.match(IN_OR_OUT)
507
+ (@header ||= Header.new).add(cell, x)
508
+ end
509
+ end
510
+
511
+ # A regexp for checking if a string is a numeric Ruby range
512
+ #
513
+ RUBY_NUMERIC_RANGE_REGEXP = Regexp.compile(
514
+ "^\\d+(\\.\\d+)?\\.{2,3}\\d+(\\.\\d+)?$")
515
+
516
+ # A regexp for checking if a string is an alpha Ruby range
517
+ #
518
+ RUBY_ALPHA_RANGE_REGEXP = Regexp.compile(
519
+ "^([A-Za-z])(\\.{2,3})([A-Za-z])$")
520
+
521
+ # If the string contains a Ruby range definition
522
+ # (ie something like "93.0..94.5" or "56..72"), it will return
523
+ # the Range instance.
524
+ # Will return nil else.
525
+ #
526
+ # The Ruby range returned (if any) will accept String or Numeric,
527
+ # ie (4..6).include?("5") will yield true.
528
+ #
529
+ def to_ruby_range (s)
530
+
531
+ range = if RUBY_NUMERIC_RANGE_REGEXP.match(s)
532
+
533
+ eval(s)
534
+
535
+ else
536
+
537
+ m = RUBY_ALPHA_RANGE_REGEXP.match(s)
538
+
539
+ m ? eval("'#{m[1]}'#{m[2]}'#{m[3]}'") : nil
540
+ end
541
+
542
+ class << range
543
+
544
+ alias :old_include? :include?
545
+
546
+ def include? (elt)
547
+
548
+ elt = first.is_a?(Numeric) ? (Float(elt) rescue '') : elt
549
+ old_include?(elt)
550
+ end
551
+
552
+ end if range
553
+
554
+ range
555
+ end
556
+
557
+ class Header
558
+
559
+ attr_accessor :ins, :outs
560
+
561
+ def initialize
562
+
563
+ @ins = {}
564
+ @outs = {}
565
+ end
566
+
567
+ def add (cell, x)
568
+
569
+ if cell.match(IN)
570
+
571
+ @ins[x] = cell[3..-1]
572
+
573
+ elsif cell.match(OUT)
574
+
575
+ @outs[x] = cell[4..-1]
576
+
577
+ end
578
+ # else don't add
579
+ end
580
+
581
+ def to_csv
582
+
583
+ (@ins.keys.sort.collect { |k| "in:#{@ins[k]}" } +
584
+ @outs.keys.sort.collect { |k| "out:#{@outs[k]}" }).join(',')
585
+ end
586
+ end
587
+ end
588
+
589
+ # Given a CSV string or the URI / path to a CSV file, turns the CSV
590
+ # into an array of array.
591
+ #
592
+ def self.csv_to_a (csv)
593
+
594
+ return csv if csv.is_a?(Array)
595
+
596
+ csv = csv.to_s if csv.is_a?(URI)
597
+ csv = open(csv) if is_uri?(csv)
598
+
599
+ csv_lib = defined?(CSV::Reader) ? CSV::Reader : CSV
600
+ # no CSV::Reader for Ruby 1.9.1
601
+
602
+ csv_lib.parse(csv).inject([]) { |rows, row|
603
+ row = row.collect { |cell| cell ? cell.strip : '' }
604
+ rows << row if row.find { |cell| (cell != '') }
605
+ rows
606
+ }
607
+ end
608
+
609
+ # Returns true if the string is a URI false if it's something else
610
+ # (CSV data ?)
611
+ #
612
+ def self.is_uri? (string)
613
+
614
+ return false if string.index("\n") # quick one
615
+
616
+ begin
617
+ URI::parse(string); return true
618
+ rescue
619
+ end
620
+
621
+ false
622
+ end
623
+
624
+ # Turns an array of array (rows / columns) into an array of hashes.
625
+ # The first row is considered the "row of keys".
626
+ #
627
+ # [
628
+ # [ 'age', 'name' ],
629
+ # [ 33, 'Jeff' ],
630
+ # [ 35, 'John' ]
631
+ # ]
632
+ #
633
+ # =>
634
+ #
635
+ # [
636
+ # { 'age' => 33, 'name' => 'Jeff' },
637
+ # { 'age' => 35, 'name' => 'John' }
638
+ # ]
639
+ #
640
+ # You can also pass the CSV as a string or the URI/path to the actual CSV
641
+ # file.
642
+ #
643
+ def self.transpose (a)
644
+
645
+ a = csv_to_a(a) if a.is_a?(String)
646
+
647
+ return a if a.empty?
648
+
649
+ first = a.first
650
+
651
+ if first.is_a?(Hash)
652
+
653
+ keys = first.keys.sort
654
+ [ keys ] + a.collect { |row|
655
+ keys.collect { |k| row[k] }
656
+ }
657
+ else
658
+
659
+ keys = first
660
+ a[1..-1].collect { |row|
661
+ (0..keys.size - 1).inject({}) { |h, i| h[keys[i]] = row[i]; h }
662
+ }
663
+ end
664
+ end
665
+
666
+ end
667
+ end
668
+
669
+ module Rufus
670
+
671
+ #
672
+ # An 'alias' for the class Rufus::Decision::Table
673
+ #
674
+ # (for backward compatibility)
675
+ #
676
+ class DecisionTable < Rufus::Decision::Table
677
+ end
678
+ end
679
+