see5 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +22 -0
- data/lib/see5.rb +33 -1
- data/lib/see5/gritbot_output_parser.rb +92 -0
- data/lib/see5/input_file_writer.rb +9 -1
- data/lib/see5/{rules_file_parser.rb → rules_output_parser.rb} +1 -1
- data/lib/see5/schema.rb +20 -0
- data/lib/see5/version.rb +1 -1
- data/see5.gemspec +1 -1
- metadata +7 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f2f7b552c03f5df7d375e4fdf2af7a30bbd25cd4c55634a5be14b5abd0f511f2
|
4
|
+
data.tar.gz: 03056357c74c6cd58145c5a60f9a759b2f8c04fb7809357163900415da1a1791
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dd9b563564b17b5b9f2f770448b59ed75947b0ad61fc26377529fed84cca56f5ac31f2b8950635a1a53bd8667d289d19a2cd5cedc2a2783c04b1f485d27cfcfc
|
7
|
+
data.tar.gz: f5b0be6ae50abef89eab752f4008b454a49b00f68bc59bff54a99adcc6968950fc898f43785eca0ee3c4915f75616edbae5c77fd953953a27156976cb51f7206
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
see5 (0.1.0)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
minitest (5.14.1)
|
10
|
+
rake (13.0.1)
|
11
|
+
|
12
|
+
PLATFORMS
|
13
|
+
ruby
|
14
|
+
|
15
|
+
DEPENDENCIES
|
16
|
+
bundler (~> 1.17)
|
17
|
+
minitest (~> 5.0)
|
18
|
+
rake (~> 13.0)
|
19
|
+
see5!
|
20
|
+
|
21
|
+
BUNDLED WITH
|
22
|
+
1.17.2
|
data/lib/see5.rb
CHANGED
@@ -2,6 +2,38 @@
|
|
2
2
|
|
3
3
|
require "see5/input_file_writer"
|
4
4
|
require "see5/model"
|
5
|
-
require "see5/
|
5
|
+
require "see5/rules_output_parser"
|
6
|
+
require "see5/gritbot_output_parser"
|
6
7
|
require "see5/schema"
|
7
8
|
require "see5/version"
|
9
|
+
|
10
|
+
module See5
|
11
|
+
def self.train(data, class_attribute)
|
12
|
+
prepare_tmp_files(data, class_attribute)
|
13
|
+
run_see5
|
14
|
+
|
15
|
+
output = See5::RulesOutputParser.parse_file("/tmp/ruby-see5.rules_output")
|
16
|
+
|
17
|
+
See5::Model.new(**output)
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.audit(data, class_attribute)
|
21
|
+
prepare_tmp_files(data, class_attribute)
|
22
|
+
run_gritbot
|
23
|
+
|
24
|
+
See5::GritbotOutputParser.parse_file("/tmp/ruby-see5.gritbot_output")
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.prepare_tmp_files(data, class_attribute)
|
28
|
+
schema = See5::Schema.from_dataset(data, class_attribute)
|
29
|
+
See5::InputFileWriter.write_files(data: data, schema: schema)
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.run_see5
|
33
|
+
system("c5.0 -f /tmp/ruby-see5 -r > /tmp/ruby-see5.rules_output")
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.run_gritbot
|
37
|
+
system("gritbot -s -f /tmp/ruby-see5 -r > /tmp/ruby-see5.gritbot_output")
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module See5
|
4
|
+
# Read Gritbot output and return an array of hashes representing the anomalies
|
5
|
+
class GritbotOutputParser
|
6
|
+
attr_reader :anomalies
|
7
|
+
|
8
|
+
def self.parse_file(fname)
|
9
|
+
new(fname).anomalies
|
10
|
+
end
|
11
|
+
|
12
|
+
def initialize(fname)
|
13
|
+
@file = File.open(fname)
|
14
|
+
@anomalies = []
|
15
|
+
|
16
|
+
parse_file
|
17
|
+
end
|
18
|
+
|
19
|
+
def parse_file
|
20
|
+
discard_header
|
21
|
+
|
22
|
+
while (line = lines.next)
|
23
|
+
if line.start_with?(/\s*while checking/)
|
24
|
+
# TODO record excluded cases
|
25
|
+
elsif line.start_with?(/(:?test |data )?case /)
|
26
|
+
@anomalies << parse_anomaly(line)
|
27
|
+
elsif line.start_with?("Time:")
|
28
|
+
break
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def lines
|
36
|
+
# TODO: lazy unnecessary given that rules are small?
|
37
|
+
@file.each_line.lazy
|
38
|
+
end
|
39
|
+
|
40
|
+
# Discard the file header and advance to the anomalies section
|
41
|
+
def discard_header
|
42
|
+
while (line = lines.next)
|
43
|
+
break if line.start_with?(/\d+ possible anomal/)
|
44
|
+
end
|
45
|
+
# discard the final blank line
|
46
|
+
lines.next
|
47
|
+
end
|
48
|
+
|
49
|
+
def parse_anomaly(line)
|
50
|
+
info = parse_anomaly_info_line(line)
|
51
|
+
value = parse_anomaly_value_line(lines.next)
|
52
|
+
|
53
|
+
conditions = []
|
54
|
+
while (line = lines.next.strip)
|
55
|
+
break if line == ""
|
56
|
+
|
57
|
+
conditions << parse_condition_line(line)
|
58
|
+
end
|
59
|
+
|
60
|
+
# TODO new class for these
|
61
|
+
{
|
62
|
+
**info,
|
63
|
+
**value,
|
64
|
+
conditions: conditions
|
65
|
+
}
|
66
|
+
end
|
67
|
+
|
68
|
+
def parse_anomaly_info_line(line)
|
69
|
+
matches = line.match(/.*case (.*): (.*)\[([\.\d]+)\]/)
|
70
|
+
|
71
|
+
{
|
72
|
+
case_index: matches[1],
|
73
|
+
case_label: matches[2].strip.delete_prefix("(label ").delete_suffix(")"),
|
74
|
+
signifigance: matches[3].to_f
|
75
|
+
}
|
76
|
+
end
|
77
|
+
|
78
|
+
def parse_anomaly_value_line(line)
|
79
|
+
matches = line.match(/(.*) = (\S*)\s*\((.*)\)/)
|
80
|
+
|
81
|
+
{
|
82
|
+
attribute: matches[1].strip,
|
83
|
+
value: matches[2],
|
84
|
+
reason: matches[3]
|
85
|
+
}
|
86
|
+
end
|
87
|
+
|
88
|
+
def parse_condition_line(line)
|
89
|
+
line.strip
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -20,6 +20,9 @@ module See5
|
|
20
20
|
def write_files
|
21
21
|
write_names_file
|
22
22
|
write_data_file
|
23
|
+
|
24
|
+
names_io.close
|
25
|
+
data_io.close
|
23
26
|
end
|
24
27
|
|
25
28
|
def write_names_file
|
@@ -38,7 +41,12 @@ module See5
|
|
38
41
|
|
39
42
|
def row(record)
|
40
43
|
@schema.attributes.map do |attr, _vals|
|
41
|
-
record.
|
44
|
+
if record.is_a?(Hash)
|
45
|
+
record[attr]
|
46
|
+
else
|
47
|
+
# assume some kind of OpenStruct- or ActiveModel-like object
|
48
|
+
record.send(attr)
|
49
|
+
end
|
42
50
|
end.join(",")
|
43
51
|
end
|
44
52
|
|
@@ -6,7 +6,7 @@ module See5
|
|
6
6
|
# Read See5 rules output and return an array of hashes representing the rules
|
7
7
|
# Note that this is the output normally sent to stdout, NOT the .rules file!
|
8
8
|
# The .rules file lacks some important information like confidence.
|
9
|
-
class
|
9
|
+
class RulesOutputParser
|
10
10
|
def self.parse_file(fname)
|
11
11
|
new(fname).model
|
12
12
|
end
|
data/lib/see5/schema.rb
CHANGED
@@ -22,5 +22,25 @@ module See5
|
|
22
22
|
"#{attr}: #{vals.join(',')}"
|
23
23
|
end.join("\n")
|
24
24
|
end
|
25
|
+
|
26
|
+
# Infers a schema from a dataset
|
27
|
+
def self.from_dataset(data, class_attribute: nil)
|
28
|
+
classes = data.map { |record| record[class_attribute.to_sym] }.uniq
|
29
|
+
|
30
|
+
attributes = {}
|
31
|
+
data.each do |record|
|
32
|
+
record.each do |key, value|
|
33
|
+
if !attributes.include?(key)
|
34
|
+
attributes[key] = [value]
|
35
|
+
elsif !attributes[key].include?(value)
|
36
|
+
attributes[key].append(value)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
new(classes: classes,
|
42
|
+
attributes: attributes,
|
43
|
+
class_attribute: class_attribute)
|
44
|
+
end
|
25
45
|
end
|
26
46
|
end
|
data/lib/see5/version.rb
CHANGED
data/see5.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: see5
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eddie Lebow
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-01-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
47
|
+
version: '13.0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
54
|
+
version: '13.0'
|
55
55
|
description:
|
56
56
|
email:
|
57
57
|
- elebow@users.noreply.github.com
|
@@ -62,13 +62,15 @@ files:
|
|
62
62
|
- ".gitignore"
|
63
63
|
- ".rubocop.yml"
|
64
64
|
- Gemfile
|
65
|
+
- Gemfile.lock
|
65
66
|
- README.md
|
66
67
|
- Rakefile
|
67
68
|
- lib/see5.rb
|
69
|
+
- lib/see5/gritbot_output_parser.rb
|
68
70
|
- lib/see5/input_file_writer.rb
|
69
71
|
- lib/see5/model.rb
|
70
72
|
- lib/see5/rule.rb
|
71
|
-
- lib/see5/
|
73
|
+
- lib/see5/rules_output_parser.rb
|
72
74
|
- lib/see5/schema.rb
|
73
75
|
- lib/see5/version.rb
|
74
76
|
- see5.gemspec
|