see5 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6d2b54a9d4d3c9f3271505bf233a46772cacfd1702312a68dab174b197265f59
4
- data.tar.gz: cb5a5d275b08cb8ce7ca00be58c81fd015902578d31c1d6e8a7d1b5d2936225a
3
+ metadata.gz: f2f7b552c03f5df7d375e4fdf2af7a30bbd25cd4c55634a5be14b5abd0f511f2
4
+ data.tar.gz: 03056357c74c6cd58145c5a60f9a759b2f8c04fb7809357163900415da1a1791
5
5
  SHA512:
6
- metadata.gz: 903fb8b64822a99a15c14383fc57932af364bb1c355046b4b022b9588c2d7359c5bdbcf5221f4282ba9b1f9a4200f746d390d962367bcad8a39ecd61e2671839
7
- data.tar.gz: 98594a2c2be18ede5633ee7ba66894d2ce2b2eddead9d10eb4531728c349b8b9db95e439f7ca1407d9c92a6abed363549127a7b731a8f5c53af984096d0fe1d5
6
+ metadata.gz: dd9b563564b17b5b9f2f770448b59ed75947b0ad61fc26377529fed84cca56f5ac31f2b8950635a1a53bd8667d289d19a2cd5cedc2a2783c04b1f485d27cfcfc
7
+ data.tar.gz: f5b0be6ae50abef89eab752f4008b454a49b00f68bc59bff54a99adcc6968950fc898f43785eca0ee3c4915f75616edbae5c77fd953953a27156976cb51f7206
@@ -0,0 +1,22 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ see5 (0.1.0)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ minitest (5.14.1)
10
+ rake (13.0.1)
11
+
12
+ PLATFORMS
13
+ ruby
14
+
15
+ DEPENDENCIES
16
+ bundler (~> 1.17)
17
+ minitest (~> 5.0)
18
+ rake (~> 13.0)
19
+ see5!
20
+
21
+ BUNDLED WITH
22
+ 1.17.2
@@ -2,6 +2,38 @@
2
2
 
3
3
  require "see5/input_file_writer"
4
4
  require "see5/model"
5
- require "see5/rules_file_parser"
5
+ require "see5/rules_output_parser"
6
+ require "see5/gritbot_output_parser"
6
7
  require "see5/schema"
7
8
  require "see5/version"
9
+
10
+ module See5
11
+ def self.train(data, class_attribute)
12
+ prepare_tmp_files(data, class_attribute)
13
+ run_see5
14
+
15
+ output = See5::RulesOutputParser.parse_file("/tmp/ruby-see5.rules_output")
16
+
17
+ See5::Model.new(**output)
18
+ end
19
+
20
+ def self.audit(data, class_attribute)
21
+ prepare_tmp_files(data, class_attribute)
22
+ run_gritbot
23
+
24
+ See5::GritbotOutputParser.parse_file("/tmp/ruby-see5.gritbot_output")
25
+ end
26
+
27
+ def self.prepare_tmp_files(data, class_attribute)
28
+ schema = See5::Schema.from_dataset(data, class_attribute)
29
+ See5::InputFileWriter.write_files(data: data, schema: schema)
30
+ end
31
+
32
+ def self.run_see5
33
+ system("c5.0 -f /tmp/ruby-see5 -r > /tmp/ruby-see5.rules_output")
34
+ end
35
+
36
+ def self.run_gritbot
37
+ system("gritbot -s -f /tmp/ruby-see5 -r > /tmp/ruby-see5.gritbot_output")
38
+ end
39
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ module See5
4
+ # Read Gritbot output and return an array of hashes representing the anomalies
5
+ class GritbotOutputParser
6
+ attr_reader :anomalies
7
+
8
+ def self.parse_file(fname)
9
+ new(fname).anomalies
10
+ end
11
+
12
+ def initialize(fname)
13
+ @file = File.open(fname)
14
+ @anomalies = []
15
+
16
+ parse_file
17
+ end
18
+
19
+ def parse_file
20
+ discard_header
21
+
22
+ while (line = lines.next)
23
+ if line.start_with?(/\s*while checking/)
24
+ # TODO record excluded cases
25
+ elsif line.start_with?(/(:?test |data )?case /)
26
+ @anomalies << parse_anomaly(line)
27
+ elsif line.start_with?("Time:")
28
+ break
29
+ end
30
+ end
31
+ end
32
+
33
+ private
34
+
35
+ def lines
36
+ # TODO: lazy unnecessary given that rules are small?
37
+ @file.each_line.lazy
38
+ end
39
+
40
+ # Discard the file header and advance to the anomalies section
41
+ def discard_header
42
+ while (line = lines.next)
43
+ break if line.start_with?(/\d+ possible anomal/)
44
+ end
45
+ # discard the final blank line
46
+ lines.next
47
+ end
48
+
49
+ def parse_anomaly(line)
50
+ info = parse_anomaly_info_line(line)
51
+ value = parse_anomaly_value_line(lines.next)
52
+
53
+ conditions = []
54
+ while (line = lines.next.strip)
55
+ break if line == ""
56
+
57
+ conditions << parse_condition_line(line)
58
+ end
59
+
60
+ # TODO new class for these
61
+ {
62
+ **info,
63
+ **value,
64
+ conditions: conditions
65
+ }
66
+ end
67
+
68
+ def parse_anomaly_info_line(line)
69
+ matches = line.match(/.*case (.*): (.*)\[([\.\d]+)\]/)
70
+
71
+ {
72
+ case_index: matches[1],
73
+ case_label: matches[2].strip.delete_prefix("(label ").delete_suffix(")"),
74
+ signifigance: matches[3].to_f
75
+ }
76
+ end
77
+
78
+ def parse_anomaly_value_line(line)
79
+ matches = line.match(/(.*) = (\S*)\s*\((.*)\)/)
80
+
81
+ {
82
+ attribute: matches[1].strip,
83
+ value: matches[2],
84
+ reason: matches[3]
85
+ }
86
+ end
87
+
88
+ def parse_condition_line(line)
89
+ line.strip
90
+ end
91
+ end
92
+ end
@@ -20,6 +20,9 @@ module See5
20
20
  def write_files
21
21
  write_names_file
22
22
  write_data_file
23
+
24
+ names_io.close
25
+ data_io.close
23
26
  end
24
27
 
25
28
  def write_names_file
@@ -38,7 +41,12 @@ module See5
38
41
 
39
42
  def row(record)
40
43
  @schema.attributes.map do |attr, _vals|
41
- record.send(attr)
44
+ if record.is_a?(Hash)
45
+ record[attr]
46
+ else
47
+ # assume some kind of OpenStruct- or ActiveModel-like object
48
+ record.send(attr)
49
+ end
42
50
  end.join(",")
43
51
  end
44
52
 
@@ -6,7 +6,7 @@ module See5
6
6
  # Read See5 rules output and return an array of hashes representing the rules
7
7
  # Note that this is the output normally sent to stdout, NOT the .rules file!
8
8
  # The .rules file lacks some important information like confidence.
9
- class RulesFileParser
9
+ class RulesOutputParser
10
10
  def self.parse_file(fname)
11
11
  new(fname).model
12
12
  end
@@ -22,5 +22,25 @@ module See5
22
22
  "#{attr}: #{vals.join(',')}"
23
23
  end.join("\n")
24
24
  end
25
+
26
+ # Infers a schema from a dataset
27
+ def self.from_dataset(data, class_attribute: nil)
28
+ classes = data.map { |record| record[class_attribute.to_sym] }.uniq
29
+
30
+ attributes = {}
31
+ data.each do |record|
32
+ record.each do |key, value|
33
+ if !attributes.include?(key)
34
+ attributes[key] = [value]
35
+ elsif !attributes[key].include?(value)
36
+ attributes[key].append(value)
37
+ end
38
+ end
39
+ end
40
+
41
+ new(classes: classes,
42
+ attributes: attributes,
43
+ class_attribute: class_attribute)
44
+ end
25
45
  end
26
46
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module See5
4
- VERSION = "0.1.0"
4
+ VERSION = "0.2.0"
5
5
  end
@@ -34,5 +34,5 @@ Gem::Specification.new do |spec|
34
34
 
35
35
  spec.add_development_dependency "bundler", "~> 1.17"
36
36
  spec.add_development_dependency "minitest", "~> 5.0"
37
- spec.add_development_dependency "rake", "~> 10.0"
37
+ spec.add_development_dependency "rake", "~> 13.0"
38
38
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: see5
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eddie Lebow
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-03-03 00:00:00.000000000 Z
11
+ date: 2021-01-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -44,14 +44,14 @@ dependencies:
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '10.0'
47
+ version: '13.0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '10.0'
54
+ version: '13.0'
55
55
  description:
56
56
  email:
57
57
  - elebow@users.noreply.github.com
@@ -62,13 +62,15 @@ files:
62
62
  - ".gitignore"
63
63
  - ".rubocop.yml"
64
64
  - Gemfile
65
+ - Gemfile.lock
65
66
  - README.md
66
67
  - Rakefile
67
68
  - lib/see5.rb
69
+ - lib/see5/gritbot_output_parser.rb
68
70
  - lib/see5/input_file_writer.rb
69
71
  - lib/see5/model.rb
70
72
  - lib/see5/rule.rb
71
- - lib/see5/rules_file_parser.rb
73
+ - lib/see5/rules_output_parser.rb
72
74
  - lib/see5/schema.rb
73
75
  - lib/see5/version.rb
74
76
  - see5.gemspec