see5 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +22 -0
- data/lib/see5.rb +33 -1
- data/lib/see5/gritbot_output_parser.rb +92 -0
- data/lib/see5/input_file_writer.rb +9 -1
- data/lib/see5/{rules_file_parser.rb → rules_output_parser.rb} +1 -1
- data/lib/see5/schema.rb +20 -0
- data/lib/see5/version.rb +1 -1
- data/see5.gemspec +1 -1
- metadata +7 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f2f7b552c03f5df7d375e4fdf2af7a30bbd25cd4c55634a5be14b5abd0f511f2
|
4
|
+
data.tar.gz: 03056357c74c6cd58145c5a60f9a759b2f8c04fb7809357163900415da1a1791
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dd9b563564b17b5b9f2f770448b59ed75947b0ad61fc26377529fed84cca56f5ac31f2b8950635a1a53bd8667d289d19a2cd5cedc2a2783c04b1f485d27cfcfc
|
7
|
+
data.tar.gz: f5b0be6ae50abef89eab752f4008b454a49b00f68bc59bff54a99adcc6968950fc898f43785eca0ee3c4915f75616edbae5c77fd953953a27156976cb51f7206
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
see5 (0.1.0)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
minitest (5.14.1)
|
10
|
+
rake (13.0.1)
|
11
|
+
|
12
|
+
PLATFORMS
|
13
|
+
ruby
|
14
|
+
|
15
|
+
DEPENDENCIES
|
16
|
+
bundler (~> 1.17)
|
17
|
+
minitest (~> 5.0)
|
18
|
+
rake (~> 13.0)
|
19
|
+
see5!
|
20
|
+
|
21
|
+
BUNDLED WITH
|
22
|
+
1.17.2
|
data/lib/see5.rb
CHANGED
@@ -2,6 +2,38 @@
|
|
2
2
|
|
3
3
|
require "see5/input_file_writer"
|
4
4
|
require "see5/model"
|
5
|
-
require "see5/
|
5
|
+
require "see5/rules_output_parser"
|
6
|
+
require "see5/gritbot_output_parser"
|
6
7
|
require "see5/schema"
|
7
8
|
require "see5/version"
|
9
|
+
|
10
|
+
module See5
|
11
|
+
def self.train(data, class_attribute)
|
12
|
+
prepare_tmp_files(data, class_attribute)
|
13
|
+
run_see5
|
14
|
+
|
15
|
+
output = See5::RulesOutputParser.parse_file("/tmp/ruby-see5.rules_output")
|
16
|
+
|
17
|
+
See5::Model.new(**output)
|
18
|
+
end
|
19
|
+
|
20
|
+
def self.audit(data, class_attribute)
|
21
|
+
prepare_tmp_files(data, class_attribute)
|
22
|
+
run_gritbot
|
23
|
+
|
24
|
+
See5::GritbotOutputParser.parse_file("/tmp/ruby-see5.gritbot_output")
|
25
|
+
end
|
26
|
+
|
27
|
+
def self.prepare_tmp_files(data, class_attribute)
|
28
|
+
schema = See5::Schema.from_dataset(data, class_attribute)
|
29
|
+
See5::InputFileWriter.write_files(data: data, schema: schema)
|
30
|
+
end
|
31
|
+
|
32
|
+
def self.run_see5
|
33
|
+
system("c5.0 -f /tmp/ruby-see5 -r > /tmp/ruby-see5.rules_output")
|
34
|
+
end
|
35
|
+
|
36
|
+
def self.run_gritbot
|
37
|
+
system("gritbot -s -f /tmp/ruby-see5 -r > /tmp/ruby-see5.gritbot_output")
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module See5
|
4
|
+
# Read Gritbot output and return an array of hashes representing the anomalies
|
5
|
+
class GritbotOutputParser
|
6
|
+
attr_reader :anomalies
|
7
|
+
|
8
|
+
def self.parse_file(fname)
|
9
|
+
new(fname).anomalies
|
10
|
+
end
|
11
|
+
|
12
|
+
def initialize(fname)
|
13
|
+
@file = File.open(fname)
|
14
|
+
@anomalies = []
|
15
|
+
|
16
|
+
parse_file
|
17
|
+
end
|
18
|
+
|
19
|
+
def parse_file
|
20
|
+
discard_header
|
21
|
+
|
22
|
+
while (line = lines.next)
|
23
|
+
if line.start_with?(/\s*while checking/)
|
24
|
+
# TODO record excluded cases
|
25
|
+
elsif line.start_with?(/(:?test |data )?case /)
|
26
|
+
@anomalies << parse_anomaly(line)
|
27
|
+
elsif line.start_with?("Time:")
|
28
|
+
break
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
private
|
34
|
+
|
35
|
+
def lines
|
36
|
+
# TODO: lazy unnecessary given that rules are small?
|
37
|
+
@file.each_line.lazy
|
38
|
+
end
|
39
|
+
|
40
|
+
# Discard the file header and advance to the anomalies section
|
41
|
+
def discard_header
|
42
|
+
while (line = lines.next)
|
43
|
+
break if line.start_with?(/\d+ possible anomal/)
|
44
|
+
end
|
45
|
+
# discard the final blank line
|
46
|
+
lines.next
|
47
|
+
end
|
48
|
+
|
49
|
+
def parse_anomaly(line)
|
50
|
+
info = parse_anomaly_info_line(line)
|
51
|
+
value = parse_anomaly_value_line(lines.next)
|
52
|
+
|
53
|
+
conditions = []
|
54
|
+
while (line = lines.next.strip)
|
55
|
+
break if line == ""
|
56
|
+
|
57
|
+
conditions << parse_condition_line(line)
|
58
|
+
end
|
59
|
+
|
60
|
+
# TODO new class for these
|
61
|
+
{
|
62
|
+
**info,
|
63
|
+
**value,
|
64
|
+
conditions: conditions
|
65
|
+
}
|
66
|
+
end
|
67
|
+
|
68
|
+
def parse_anomaly_info_line(line)
|
69
|
+
matches = line.match(/.*case (.*): (.*)\[([\.\d]+)\]/)
|
70
|
+
|
71
|
+
{
|
72
|
+
case_index: matches[1],
|
73
|
+
case_label: matches[2].strip.delete_prefix("(label ").delete_suffix(")"),
|
74
|
+
signifigance: matches[3].to_f
|
75
|
+
}
|
76
|
+
end
|
77
|
+
|
78
|
+
def parse_anomaly_value_line(line)
|
79
|
+
matches = line.match(/(.*) = (\S*)\s*\((.*)\)/)
|
80
|
+
|
81
|
+
{
|
82
|
+
attribute: matches[1].strip,
|
83
|
+
value: matches[2],
|
84
|
+
reason: matches[3]
|
85
|
+
}
|
86
|
+
end
|
87
|
+
|
88
|
+
def parse_condition_line(line)
|
89
|
+
line.strip
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
@@ -20,6 +20,9 @@ module See5
|
|
20
20
|
def write_files
|
21
21
|
write_names_file
|
22
22
|
write_data_file
|
23
|
+
|
24
|
+
names_io.close
|
25
|
+
data_io.close
|
23
26
|
end
|
24
27
|
|
25
28
|
def write_names_file
|
@@ -38,7 +41,12 @@ module See5
|
|
38
41
|
|
39
42
|
def row(record)
|
40
43
|
@schema.attributes.map do |attr, _vals|
|
41
|
-
record.
|
44
|
+
if record.is_a?(Hash)
|
45
|
+
record[attr]
|
46
|
+
else
|
47
|
+
# assume some kind of OpenStruct- or ActiveModel-like object
|
48
|
+
record.send(attr)
|
49
|
+
end
|
42
50
|
end.join(",")
|
43
51
|
end
|
44
52
|
|
@@ -6,7 +6,7 @@ module See5
|
|
6
6
|
# Read See5 rules output and return an array of hashes representing the rules
|
7
7
|
# Note that this is the output normally sent to stdout, NOT the .rules file!
|
8
8
|
# The .rules file lacks some important information like confidence.
|
9
|
-
class
|
9
|
+
class RulesOutputParser
|
10
10
|
def self.parse_file(fname)
|
11
11
|
new(fname).model
|
12
12
|
end
|
data/lib/see5/schema.rb
CHANGED
@@ -22,5 +22,25 @@ module See5
|
|
22
22
|
"#{attr}: #{vals.join(',')}"
|
23
23
|
end.join("\n")
|
24
24
|
end
|
25
|
+
|
26
|
+
# Infers a schema from a dataset
|
27
|
+
def self.from_dataset(data, class_attribute: nil)
|
28
|
+
classes = data.map { |record| record[class_attribute.to_sym] }.uniq
|
29
|
+
|
30
|
+
attributes = {}
|
31
|
+
data.each do |record|
|
32
|
+
record.each do |key, value|
|
33
|
+
if !attributes.include?(key)
|
34
|
+
attributes[key] = [value]
|
35
|
+
elsif !attributes[key].include?(value)
|
36
|
+
attributes[key].append(value)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
new(classes: classes,
|
42
|
+
attributes: attributes,
|
43
|
+
class_attribute: class_attribute)
|
44
|
+
end
|
25
45
|
end
|
26
46
|
end
|
data/lib/see5/version.rb
CHANGED
data/see5.gemspec
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: see5
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Eddie Lebow
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-01-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -44,14 +44,14 @@ dependencies:
|
|
44
44
|
requirements:
|
45
45
|
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '
|
47
|
+
version: '13.0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
52
|
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '
|
54
|
+
version: '13.0'
|
55
55
|
description:
|
56
56
|
email:
|
57
57
|
- elebow@users.noreply.github.com
|
@@ -62,13 +62,15 @@ files:
|
|
62
62
|
- ".gitignore"
|
63
63
|
- ".rubocop.yml"
|
64
64
|
- Gemfile
|
65
|
+
- Gemfile.lock
|
65
66
|
- README.md
|
66
67
|
- Rakefile
|
67
68
|
- lib/see5.rb
|
69
|
+
- lib/see5/gritbot_output_parser.rb
|
68
70
|
- lib/see5/input_file_writer.rb
|
69
71
|
- lib/see5/model.rb
|
70
72
|
- lib/see5/rule.rb
|
71
|
-
- lib/see5/
|
73
|
+
- lib/see5/rules_output_parser.rb
|
72
74
|
- lib/see5/schema.rb
|
73
75
|
- lib/see5/version.rb
|
74
76
|
- see5.gemspec
|