ezr 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ezr-0.1.0/LICENSE ADDED
@@ -0,0 +1,24 @@
1
+ BSD 2-Clause License
2
+
3
+ Copyright (c) 2024, Tim Menzies
4
+
5
+ Redistribution and use in source and binary forms, with or without
6
+ modification, are permitted provided that the following conditions are met:
7
+
8
+ 1. Redistributions of source code must retain the above copyright notice, this
9
+ list of conditions and the following disclaimer.
10
+
11
+ 2. Redistributions in binary form must reproduce the above copyright notice,
12
+ this list of conditions and the following disclaimer in the documentation
13
+ and/or other materials provided with the distribution.
14
+
15
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
16
+ AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17
+ IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18
+ DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
19
+ FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20
+ DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
21
+ SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
22
+ CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
23
+ OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24
+ OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ezr-0.1.0/PKG-INFO ADDED
@@ -0,0 +1,13 @@
1
+ Metadata-Version: 2.1
2
+ Name: ezr
3
+ Version: 0.1.0
4
+ Summary: Semi-supervised explanations for incremental multi-objective optimization
5
+ Home-page: https://github.com/timm/ezr
6
+ Author: Tim Menzies
7
+ Author-email: timm@ieee.org
8
+ License: BSD2
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: License :: OSI Approved :: BSD License
11
+ Classifier: Development Status :: 2 - Pre-Alpha
12
+ Classifier: Operating System :: OS Independent
13
+ License-File: LICENSE
ezr-0.1.0/README.md ADDED
@@ -0,0 +1,23 @@
1
+ # ezr.py
2
+
3
+ Explanation system for semi=supervised multi-objective optimization.
4
+
5
+
6
+ ## Install
7
+
8
+ Download ez.py.
9
+
10
+ Test:
11
+
12
+ ./ez.py -h
13
+
14
+ ## Run
15
+
16
+ Find some csv data where the first row names the columns
17
+
18
+ - Uppercase names denote numerics (all others are symbolic)
19
+ - Names ending in "+" or "-" are goals to be minimized.
20
+ - Names ending in "!" show the klass column (there can only be one).
21
+
22
+ For examples, see the [/data](https://github.com/timm/ezr/tree/main/data)
23
+ directory.
@@ -0,0 +1,13 @@
1
+ Metadata-Version: 2.1
2
+ Name: ezr
3
+ Version: 0.1.0
4
+ Summary: Semi-supervised explanations for incremental multi-objective optimization
5
+ Home-page: https://github.com/timm/ezr
6
+ Author: Tim Menzies
7
+ Author-email: timm@ieee.org
8
+ License: BSD2
9
+ Classifier: Programming Language :: Python :: 3
10
+ Classifier: License :: OSI Approved :: BSD License
11
+ Classifier: Development Status :: 2 - Pre-Alpha
12
+ Classifier: Operating System :: OS Independent
13
+ License-File: LICENSE
@@ -0,0 +1,9 @@
1
+ LICENSE
2
+ README.md
3
+ ezr.py
4
+ setup.py
5
+ ezr.egg-info/PKG-INFO
6
+ ezr.egg-info/SOURCES.txt
7
+ ezr.egg-info/dependency_links.txt
8
+ ezr.egg-info/entry_points.txt
9
+ ezr.egg-info/top_level.txt
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ ezr = ezr:MAIN.main
@@ -0,0 +1 @@
1
+ ezr
ezr-0.1.0/ezr.py ADDED
@@ -0,0 +1,509 @@
1
+ #!/usr/bin/env python3
2
+ # MARK: help
3
+ """
4
+ ez.py: Active learning, find best/rest seen so far in a Bayes classifier
5
+ (c) 2024 Tim Menzies <timm@ieee.org>, BSD-2 license
6
+
7
+ OPTIONS:
8
+ -s --seed random number seed = 1234567891
9
+ -g --go start up action = help
10
+ -f --file data file = ../data/auto93.csv
11
+
12
+ Discretize:
13
+ -B --Bins max number of bins = 16
14
+
15
+ Classify:
16
+ -k --k low frequency kludge = 1
17
+ -m --m low frequency kludge = 2
18
+
19
+ Optimize:
20
+ -n --budget0 init evals = 4
21
+ -N --Budget max evals = 16
22
+ -b --best ratio of top = .5
23
+ -T --Top keep top todos = .8
24
+
25
+ Explain:
26
+ -l --leaf leaf size = 2 """
27
+
28
+ from __future__ import annotations # <1> ## types
29
+ import sys
30
+ sys.dont_write_bytecode = True
31
+ from collections import Counter
32
+ import re,ast,copy,json,math,random
33
+ from typing import Any,Iterable,Callable
34
+ from fileinput import FileInput as file_or_stdin
35
+
36
+ # ----------------------------------------------------------------------------------------
37
+ # MARK: inits
38
+
39
+ # Some globals
40
+ big = 1E32
41
+ tiny = 1/big
42
+
43
+ # Special type annotations
44
+ class Row : has:list[Any]
45
+ class Rows : has:list[Row]
46
+ class Classes: has:dict[str, Rows] # a dictionary, one key for each class
47
+
48
+ # Simple base object: defines simple initialization and pretty print.
49
+
50
+ class OBJ:
51
+ def __init__(i,**d) : i.__dict__.update(d)
52
+ def __repr__(i) -> str : return i.__class__.__name__+show(i.__dict__)
53
+
54
+ def settings(s:str) -> dict:
55
+ return {m[1] : coerce(m[2]) for m in re.finditer(r"--(\w+)[^=]*=\s*(\S+)", s)}
56
+
57
+ # ----------------------------------------------------------------------------------------
58
+ # ## Classes
59
+
60
+ # MARK: BIN
61
+ # Stores in `ys` the klass symbols see between `lo` and `hi`.
62
+ #
63
+ # [1] `merge()` combines two BINs, if they are too small or they have similar distributions.
64
+ # [2] `selects()` returns true when a BIN matches a row.
65
+ # [3] `BIN.score()` reports how often we see `goals` symbols more than other symbols.
66
+ #
67
+ # To build decision trees, split Rows on the best scoring bin, then recurse on each half.
68
+
69
+ #ZZZ add in __repr__
70
+ class BIN(OBJ):
71
+ id=0
72
+ def __init__(i, at:int, txt:str, lo:float, hi:float=None, ys:Counter=None):
73
+ i.at,i.txt,i.lo,i.hi,i.ys = at,txt, lo,hi or lo,ys or Counter()
74
+ i.id = BIN.id = BIN.id + 1
75
+
76
+ def add(i, x:float, y:Any):
77
+ i.lo = min(x, i.lo)
78
+ i.hi = max(x, i.hi)
79
+ i.ys[y] += 1
80
+
81
+ def __repr__(i):
82
+ if i.lo == i.hi: return f"{i.txt}={i.hi}"
83
+ if i.lo == -big: return f"{i.txt} < {i.hi}"
84
+ if i.hi == big: return f"{i.txt} >= {i.lo}"
85
+ return f"{i.lo} <= {i.txt} < {i.hi}"
86
+
87
+ def merge(i, j:BIN, small:float) -> BIN: # or None if nothing merged ------------[1]
88
+ if i.at == j.at:
89
+ k = BIN(i.at, i.txt, min(i.lo,j.lo), hi=max(i.hi,j.hi), ys=i.ys+j.ys)
90
+ ei,ni = entropy(i.ys)
91
+ ej,nj = entropy(j.ys)
92
+ ek,nk = entropy(k.ys)
93
+ if ni < small or nj < small : return k # merge if bins too small
94
+ if ek <= (ni*ei + nj*ej)/nk: return k # merge if parts are more complex
95
+
96
+ def selects(i, row: Row) -> bool: #-----------------------------------------------[2]
97
+ x = row[i.at]
98
+ return x=="?" or i.lo == x == i.hi or i.lo <= x < i.hi
99
+
100
+ def selectsRejects(i, classes: Classes) -> tuple[Classes,Classes]:
101
+ yes = {k:[] for k in classes}
102
+ no = {k:[] for k in classes}
103
+ for k,rows in classes.items():
104
+ [(yes if i.selects(row) else no)[k].append(row) for row in rows]
105
+ return yes,no
106
+
107
+ # MARK: COL
108
+ # is an abstract class above NUM and SYM.
109
+ #
110
+ # - `bins()` reports how col values are spread over a list of BINs.
111
+
112
+ class COL(OBJ):
113
+ def __init__(i, at:int=0, txt:str=" "): i.n,i.at,i.txt = 0,at,txt
114
+
115
+ def bins(i, classes: Classes, small=None) -> list[BIN]:
116
+ def send2bin(x,y):
117
+ k = i.bin(x)
118
+ if k not in out: out[k] = BIN(i.at,i.txt,x)
119
+ out[k].add(x,y)
120
+ out = {}
121
+ [send2bin(row[i.at],y) for y,lst in classes.items() for row in lst if row[i.at]!="?"]
122
+ return i.binsComplete(sorted(out.values(), key=lambda z:z.lo),
123
+ small = small or (sum(len(lst) for lst in classes.values())/the.Bins))
124
+
125
+ # MARK: SYM
126
+ # summarizes a stream of numbers.
127
+ #
128
+ # - the `div()`ersity of a SYM summary is the `entropy`;
129
+ # - the `mid()`dle of a SYM summary is the mode value;
130
+ # - `like()` returns the likelihood of a value belongs in a SYM distribution;
131
+ # - `bin()` and `binsComplete()` are used for generating BINs (for SYMs there is not much to do with BINs)
132
+
133
+ class SYM(COL):
134
+ def __init__(i,**kw): super().__init__(**kw); i.has = {}
135
+ def add(i, x:Any):
136
+ if x != "?":
137
+ i.n += 1
138
+ i.has[x] = i.has.get(x,0) + 1
139
+
140
+ def bin(i,x:Any) -> Any : return x
141
+ def binsComplete(i,bins:list[BIN],**_) -> list[BIN] : return bins
142
+
143
+ def div(i) -> float : return entropy(i.has)
144
+ def mid(i) -> Any : return max(i.has, key=i.has.get)
145
+
146
+ def like(i, x:Any, prior:float) -> float :
147
+ return (i.has.get(x, 0) + the.m*prior) / (i.n + the.m)
148
+
149
+ # MARK: NUM
150
+ # summarizes a stream of numbers.
151
+ #
152
+ # - the `div()`ersity of a NUM summary is the standard deviation;
153
+ # - the `mid()`dle of a NUM summary is the mean value;
154
+ # - `like()` returns the likelihood of a value belongs in a NUM distribution;
155
+ # - `bin(n)` places `n` in one equal width bin (spread from `lo` to `hi`)
156
+ # `_bin(bins)` tries to merge numeric bins
157
+ # - `d2h(n)` reports how far n` is from `heaven` (which is 0 when minimizing, 1 otherwise
158
+ # - `norm(n)` maps `n` into 0..1 (min..max)
159
+
160
+ class NUM(COL):
161
+ def __init__(i,**kw):
162
+ super().__init__(**kw)
163
+ i.mu,i.m2,i.lo,i.hi = 0,0,big, -big
164
+ i.heaven = 0 if i.txt[-1]=="-" else 1
165
+
166
+ def add(i, x:Any): #= sd
167
+ if x != "?":
168
+ i.n += 1
169
+ d = x - i.mu
170
+ i.mu += d/i.n
171
+ i.m2 += d * (x - i.mu)
172
+ i.lo = min(x, i.lo)
173
+ i.hi = max(x, i.hi)
174
+
175
+ def bin(i, x:float) -> int:
176
+ return min(the.Bins - 1, int(the.Bins * i.norm(x)))
177
+
178
+ def binsComplete(i, bins: list[BIN], small=2) -> list[BIN]:
179
+ bins = merges(bins,merge=lambda x,y:x.merge(y,small))
180
+ bins[0].lo = -big
181
+ bins[-1].hi = big
182
+ for j in range(1,len(bins)): bins[j].lo = bins[j-1].hi
183
+ return bins
184
+
185
+ def d2h(i, x:float) -> float: return abs(i.norm(x) - i.heaven)
186
+ def norm(i,x:float) -> float: return x=="?" and x or (x - i.lo) / (i.hi - i.lo + tiny)
187
+
188
+ def div(i) -> float : return 0 if i.n < 2 else (i.m2 / (i.n - 1))**.5
189
+ def mid(i) -> float : return i.mu
190
+
191
+ def like(i, x:float, _) -> float:
192
+ v = i.div()**2 + tiny
193
+ nom = math.e**(-1*(x - i.mu)**2/(2*v)) + tiny
194
+ denom = (2*math.pi*v)**.5
195
+ return min(1, nom/(denom + tiny))
196
+
197
+ # MARK: COLS
198
+ # is a factory for building and storing COLs from column names. All columns are in `all`.
199
+ # References to the independent and dependent variables are in `x` and `y` (respectively).
200
+ # If there is a klass, that is referenced in `klass`. And all the names are stored in `names`.
201
+
202
+ class COLS(OBJ):
203
+ def __init__(i, names: list[str]):
204
+ i.x, i.y, i.all, i.names, i.klass = [], [], [], names, None
205
+ for at,txt in enumerate(names):
206
+ a,z = txt[0], txt[-1]
207
+ col = (NUM if a.isupper() else SYM)(at=at,txt=txt)
208
+ i.all.append(col)
209
+ if z != "X":
210
+ (i.y if z in "!+-" else i.x).append(col)
211
+ if z == "!": i.klass= col
212
+
213
+ def add(i,row: Row) -> Row:
214
+ [col.add(row[col.at]) for col in i.all if row[col.at] != "?"]
215
+ return row
216
+
217
+ # MARK: DATA
218
+ # stores `rows`, summarized into `cols`. Optionally, `rows` can be sorted by distance to
219
+ # heaven (`d2h()`). A `clone()` is a new `DATA` of the same structure. Can compute
220
+ # `loglike()`lihood of a `Row` belonging to this `DATA`.
221
+
222
+ class DATA(OBJ):
223
+ def __init__(i, src=Iterable[Row], order=False, fun=None):
224
+ i.rows, i.cols = [], None
225
+ [i.add(lst,fun) for lst in src]
226
+ if order: i.order()
227
+
228
+ def add(i, row:Row, fun:Callable=None):
229
+ if i.cols:
230
+ if fun: fun(i,row)
231
+ i.rows += [i.cols.add(row)]
232
+ else:
233
+ i.cols = COLS(row)
234
+
235
+ def clone(i,lst:Iterable[Row]=[],order=False) -> DATA:
236
+ return DATA([i.cols.names]+lst,order=order)
237
+
238
+ def stats(i, cols=None, what:str=None):
239
+ return {col.txt:show(getattr(col,what or "mid")())
240
+ for col in cols or i.cols.y}
241
+
242
+ def order(i) -> Rows:
243
+ i.rows = sorted(i.rows, key=i.d2h, reverse=False)
244
+ return i.rows
245
+
246
+ def d2h(i, row:Row) -> float:
247
+ d = sum(col.d2h( row[col.at] )**2 for col in i.cols.y)
248
+ return (d/len(i.cols.y))**.5
249
+
250
+ def loglike(i, row:Row, nall:int, nh:int) -> float:
251
+ prior = (len(i.rows) + the.k) / (nall + the.k*nh)
252
+ likes = [c.like(row[c.at],prior) for c in i.cols.x if row[c.at] != "?"]
253
+ return sum(math.log(x) for x in likes + [prior] if x>0)
254
+
255
+ # MARK: smo
256
+ def smo(data0:DATA, score=lambda B,R: B-R) -> Row:
257
+ def like(row,data,nall):
258
+ return data.loglike(row,nall,2)
259
+ def acquire(best, rest, rows):
260
+ nall = len(best.rows) + len(rest.rows)
261
+ rows.sort(key=lambda r: -score(like(r,best,nall),like(r,rest,nall)))
262
+ chop = int(len(rows) * the.Top)
263
+ return rows[:chop]
264
+ #-----------
265
+ random.shuffle(data0.rows)
266
+ done, todo = data0.rows[:the.budget0], data0.rows[the.budget0:]
267
+ data1 = data0.clone(done, order=True)
268
+ for i in range(the.Budget):
269
+ if len(todo) < 3: break
270
+ n = int(len(done)**the.best + .5)
271
+ top,*todo = acquire(data0.clone(data1.rows[:n]),
272
+ data0.clone(data1.rows[n:]),
273
+ todo)
274
+ done.append(top)
275
+ data1 = data0.clone(done, order=True)
276
+ return data1.rows[0]
277
+
278
+ # MARK: CONTRAST
279
+ class CONTRAST(OBJ):
280
+ def show(i):
281
+ for lvl,node in i.nodes():
282
+ print("|.. " * lvl,
283
+ counts(node.yes) if node.isLeaf else node.bin )
284
+ def nodes(i,lvl=0):
285
+ yield lvl,i
286
+ if not i.isLeaf:
287
+ for x in [i.yes,i.no]:
288
+ for lvl1,y in x.nodes(lvl+1): yield lvl1,y
289
+
290
+ class CONTRASTS(OBJ):
291
+ def __init__(i, data:DATA, classes:Classes,
292
+ best:str="best", rest:str="rest", score=lambda B,R: B-R):
293
+ i.bins = [bin for col in data.cols.x for bin in col.bins(classes)]
294
+ i.best, i.score, i.bests, i.rests = best, score, len(classes[best]), len(classes[rest])
295
+ print(counts(classes))
296
+ i.root = i.grow(classes, 0 ,1E30)
297
+
298
+ def grow(i, classes:Classes, lvl:int, above:int) -> OBJ:
299
+ myBest = len(classes[i.best])
300
+ if myBest <= the.leaf or myBest == above:
301
+ return CONTRAST(isLeaf=True, yes=classes, no={}, lvl=lvl)
302
+ else:
303
+ bin = max(i.bins, key = lambda bin: i.sorter(bin,classes))
304
+ yes,no = bin.selectsRejects(classes)
305
+ print(counts(yes), counts(no))
306
+ return CONTRAST(isLeaf=False, lvl=lvl, bin=bin,
307
+ yes = i.grow(yes, lvl+1, myBest),
308
+ no = i.grow(no, lvl+1, myBest))
309
+
310
+ def sorter(i, bin:BIN, classes:Classes) -> float:
311
+ b,r = 0,0 # counts of best,rest
312
+ for k,rows in classes.items():
313
+ for row in rows:
314
+ if bin.selects(row):
315
+ if k==i.best: b += 1
316
+ else : r += 1
317
+ return i.score( b/(i.bests+tiny), r/(i.rests+tiny) )
318
+
319
+ # MARK: NB
320
+ # Visitor object carried along by a DATA. Internally maintains its own `DATA` for rows
321
+ # from different class.
322
+
323
+ class NB(OBJ):
324
+ def __init__(i): i.nall=0; i.datas:Classes = {}; i.acc=0
325
+
326
+ def classify(i,data,row):
327
+ return max(i.datas,
328
+ key=lambda k: i.datas[k].loglike(row, i.nall, len(i.datas)))
329
+
330
+ def run(i, data:DATA, row:Row):
331
+ want = row[data.cols.klass.at]
332
+ i.nall += 1
333
+ if i.nall>10:
334
+ got = i.classify(data,row)
335
+ i.acc += (want==got)
336
+ if want not in i.datas: i.datas[want] = data.clone()
337
+ i.datas[want].add(row)
338
+
339
+ #----------------------------------------------------------------------------------------
340
+ # MARK: misc functions
341
+
342
+ def shuffle(lst): random.shuffle(lst); return lst
343
+ def counts(d): return {k:len(v) for k,v in d.items()}
344
+ def first(lst): return lst[0]
345
+
346
+ # ### Data mining tricks
347
+ def entropy(d: dict) -> float:
348
+ N = sum(n for n in d.values()if n>0)
349
+ return -sum(n/N*math.log(n/N,2) for n in d.values() if n>0), N
350
+
351
+ def merges(b4: list[BIN], merge:Callable) -> list[BIN]:
352
+ j, now = 0, []
353
+ while j < len(b4):
354
+ x = b4[j]
355
+ if j < len(b4) - 1:
356
+ y = b4[j+1]
357
+ if xy := merge(x, y):
358
+ x = xy
359
+ j = j+1 # if i can merge, jump over the merged item
360
+ now += [x]
361
+ j += 1
362
+ return b4 if len(now) == len(b4) else merges(now, merge)
363
+
364
+ # ### Strings to things
365
+ def coerce(s:str) -> Any:
366
+ try: return ast.literal_eval(s) # <1>
367
+ except Exception: return s
368
+
369
+ def csv(file=None) -> Iterable[Row]:
370
+ with file_or_stdin(file) as src:
371
+ for line in src:
372
+ line = re.sub(r'([\n\t\r"\’ ]|#.*)', '', line)
373
+ if line: yield [coerce(s.strip()) for s in line.split(",")]
374
+
375
+ def cli(d:dict) -> None:
376
+ for k,v in d.items():
377
+ v = str(v)
378
+ for c,arg in enumerate(sys.argv):
379
+ after = "" if c >= len(sys.argv) - 1 else sys.argv[c+1]
380
+ if arg in ["-"+k[0], "--"+k]:
381
+ v = "false" if v=="true" else ("true" if v=="false" else after)
382
+ d[k] = coerce(v)
383
+ if d.get("help", False): sys.text( MAIN.help() )
384
+
385
+ # ### Printing
386
+ def show(x:Any, n=3) -> Any:
387
+ if isinstance(x,(int,float)) : x= x if int(x)==x else round(x,n)
388
+ elif isinstance(x,(list,tuple)): x= [show(y,n) for y in x][:10]
389
+ elif isinstance(x,dict):
390
+ x= "{"+', '.join(f":{k} {show(v,n)}" for k,v in sorted(x.items()) if k[0]!="_")+"}"
391
+ return x
392
+
393
+ def prints(matrix: list[list],sep=' | ') -> None:
394
+ s = [[str(e) for e in row] for row in matrix]
395
+ lens = [max(map(len, col)) for col in zip(*s)]
396
+ fmt = sep.join('{{:>{}}}'.format(x) for x in lens)
397
+ [print(fmt.format(*row)) for row in s]
398
+
399
+ def asRed(pat,s) : return re.sub(pat, r"\033[91m\1\033[00m",s)
400
+ def asYellow(pat,s): return re.sub(pat, r"\033[93m\1\033[00m",s)
401
+
402
+ #----------------------------------------------------------------------------------------
403
+ # MARK: main
404
+ # `./trees.py _all` : run all functions , return to operating system the count of failures.
405
+ # `MAIN._one()` : reset all options to defaults, then run one start-up action.
406
+
407
+ class MAIN:
408
+ def main():
409
+ global the
410
+ if __name__=="__main__": cli(the.__dict__)
411
+ MAIN.one(the.go)
412
+
413
+ def one(s:str) -> any:
414
+ global the
415
+ cache = copy.deepcopy(the)
416
+ random.seed(the.seed)
417
+ out = getattr(MAIN, s, lambda :print(f"E> '{s}' unknown."))()
418
+ the = cache
419
+ return out
420
+
421
+ def all() -> None:
422
+ sys.exit(sum(MAIN.one(s) == False for s in sorted(dir(MAIN))
423
+ if s[0] != "_" and s not in ["all", "one", "main"]))
424
+
425
+ def help():
426
+ print(asRed(r"(\n[\s]+-\S)", asYellow(r"( --[\S]+)", __doc__)))
427
+
428
+ def opt(): print(the)
429
+
430
+ def header():
431
+ top=["Clndrs","Volume","HpX","Model","origin","Lbs-","Acc+","Mpg+"]
432
+ [print(col) for col in COLS(top).all]
433
+
434
+ def data():
435
+ d=DATA(csv(the.file))
436
+ print("mid", d.stats())
437
+ print("div", d.stats(cols=d.cols.all,what="div"))
438
+
439
+ def rows():
440
+ d1=DATA(csv(the.file))
441
+ d2=d1.clone(d1.rows, order=True)
442
+ for d in [d1,d2]:
443
+ print(sorted(show(d.loglike(r,len(d.rows),1)) for r in d.rows)[::50])
444
+
445
+ def nbayes():
446
+ the.file="../data/soybean.csv"
447
+ the.m,the.k = 1,0
448
+ nb = NB()
449
+ d=DATA(csv(the.file),order=False,
450
+ fun=nb.run)
451
+ print(show(nb.acc/len(d.rows)))
452
+
453
+ def bore():
454
+ d=DATA(csv(the.file),order=True); print("")
455
+ prints([d.cols.names] + [r for r in d.rows[::50]])
456
+
457
+ def bore2():
458
+ d = DATA(csv(the.file),order=True)
459
+ n = int(len(d.rows)**.5)
460
+ for col in d.cols.x:
461
+ print("")
462
+ for bin in col.bins(dict(best=d.rows[:n] ,rest=d.rows[-n:])):
463
+ print(bin, sep="\t")
464
+
465
+ def contrasts():
466
+ d = DATA(csv(the.file),order=True)
467
+ n = int(len(d.rows)**.5)
468
+ best = d.rows[:n]
469
+ rest = shuffle(d.rows[n:])[-n:]
470
+
471
+ tree = CONTRASTS(d,dict(best=best,rest=rest)).root
472
+ tree.show()
473
+ #print(json.dumps(tree, indent=2))
474
+
475
+ def guess():
476
+ budget = 20
477
+ d = DATA(csv(the.file),order=True)
478
+ asIs, toBe = NUM(), NUM()
479
+ [asIs.add(d.d2h(row)) for row in d.rows]
480
+ for _ in range(20):
481
+ tmp = [random.choice(d.rows) for _ in range(budget)]
482
+ toBe.add( d.d2h( sorted(tmp, key=lambda r: d.d2h(r))[0]))
483
+ print(show(dict(budget= budget,
484
+ mu= dict( asIs=asIs.mid(), guess= toBe.mid()),
485
+ sd= dict(asIs=asIs.div(), guess= toBe.div()))))
486
+
487
+ def smo():
488
+ d = DATA(csv(the.file))
489
+ print(d.d2h( smo( d )))
490
+
491
+ def smo20():
492
+ import cProfile
493
+ agains = 20
494
+ d = DATA(csv(the.file),order=True)
495
+ asIs, toBe = NUM(), NUM()
496
+ [asIs.add(d.d2h(row)) for row in d.rows]
497
+ pr = cProfile.Profile()
498
+ pr.enable()
499
+ [toBe.add(d.d2h(smo(d))) for _ in range(agains)]
500
+ pr.disable()
501
+ pr.print_stats(sort='time')
502
+ print(show(dict(agains=agains,
503
+ mu= dict(asIs=asIs.mid(), toBe= toBe.mid()),
504
+ sd= dict(asIs=asIs.div(), toBe= toBe.div()))))
505
+
506
+ # --------------------------------------------
507
+ # MARK: Start-up
508
+ the = OBJ(**settings(__doc__))
509
+ if __name__=="__main__": MAIN.main()
ezr-0.1.0/setup.cfg ADDED
@@ -0,0 +1,4 @@
1
+ [egg_info]
2
+ tag_build =
3
+ tag_date = 0
4
+
ezr-0.1.0/setup.py ADDED
@@ -0,0 +1,24 @@
1
+ from setuptools import setup,find_packages
2
+
3
+ setup(
4
+ name='ezr',
5
+ version='0.1.0',
6
+ license="BSD2",
7
+ py_modules=['ezr'],
8
+ url='https://github.com/timm/ezr',
9
+ author='Tim Menzies',
10
+ author_email='timm@ieee.org',
11
+ description='Semi-supervised explanations for incremental multi-objective optimization',
12
+ install_requires=[],
13
+ packages=find_packages(),
14
+ classifiers=[
15
+ 'Programming Language :: Python :: 3',
16
+ 'License :: OSI Approved :: BSD License',
17
+ 'Development Status :: 2 - Pre-Alpha',
18
+ 'Operating System :: OS Independent',
19
+ ],
20
+ entry_points='''
21
+ [console_scripts]
22
+ ezr=ezr:MAIN.main
23
+ ''',
24
+ )