see5 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6d2b54a9d4d3c9f3271505bf233a46772cacfd1702312a68dab174b197265f59
4
- data.tar.gz: cb5a5d275b08cb8ce7ca00be58c81fd015902578d31c1d6e8a7d1b5d2936225a
3
+ metadata.gz: f2f7b552c03f5df7d375e4fdf2af7a30bbd25cd4c55634a5be14b5abd0f511f2
4
+ data.tar.gz: 03056357c74c6cd58145c5a60f9a759b2f8c04fb7809357163900415da1a1791
5
5
  SHA512:
6
- metadata.gz: 903fb8b64822a99a15c14383fc57932af364bb1c355046b4b022b9588c2d7359c5bdbcf5221f4282ba9b1f9a4200f746d390d962367bcad8a39ecd61e2671839
7
- data.tar.gz: 98594a2c2be18ede5633ee7ba66894d2ce2b2eddead9d10eb4531728c349b8b9db95e439f7ca1407d9c92a6abed363549127a7b731a8f5c53af984096d0fe1d5
6
+ metadata.gz: dd9b563564b17b5b9f2f770448b59ed75947b0ad61fc26377529fed84cca56f5ac31f2b8950635a1a53bd8667d289d19a2cd5cedc2a2783c04b1f485d27cfcfc
7
+ data.tar.gz: f5b0be6ae50abef89eab752f4008b454a49b00f68bc59bff54a99adcc6968950fc898f43785eca0ee3c4915f75616edbae5c77fd953953a27156976cb51f7206
@@ -0,0 +1,22 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ see5 (0.1.0)
5
+
6
+ GEM
7
+ remote: https://rubygems.org/
8
+ specs:
9
+ minitest (5.14.1)
10
+ rake (13.0.1)
11
+
12
+ PLATFORMS
13
+ ruby
14
+
15
+ DEPENDENCIES
16
+ bundler (~> 1.17)
17
+ minitest (~> 5.0)
18
+ rake (~> 13.0)
19
+ see5!
20
+
21
+ BUNDLED WITH
22
+ 1.17.2
@@ -2,6 +2,38 @@
2
2
 
3
3
  require "see5/input_file_writer"
4
4
  require "see5/model"
5
- require "see5/rules_file_parser"
5
+ require "see5/rules_output_parser"
6
+ require "see5/gritbot_output_parser"
6
7
  require "see5/schema"
7
8
  require "see5/version"
9
+
10
+ module See5
11
+ def self.train(data, class_attribute)
12
+ prepare_tmp_files(data, class_attribute)
13
+ run_see5
14
+
15
+ output = See5::RulesOutputParser.parse_file("/tmp/ruby-see5.rules_output")
16
+
17
+ See5::Model.new(**output)
18
+ end
19
+
20
+ def self.audit(data, class_attribute)
21
+ prepare_tmp_files(data, class_attribute)
22
+ run_gritbot
23
+
24
+ See5::GritbotOutputParser.parse_file("/tmp/ruby-see5.gritbot_output")
25
+ end
26
+
27
+ def self.prepare_tmp_files(data, class_attribute)
28
+ schema = See5::Schema.from_dataset(data, class_attribute)
29
+ See5::InputFileWriter.write_files(data: data, schema: schema)
30
+ end
31
+
32
+ def self.run_see5
33
+ system("c5.0 -f /tmp/ruby-see5 -r > /tmp/ruby-see5.rules_output")
34
+ end
35
+
36
+ def self.run_gritbot
37
+ system("gritbot -s -f /tmp/ruby-see5 -r > /tmp/ruby-see5.gritbot_output")
38
+ end
39
+ end
@@ -0,0 +1,92 @@
1
+ # frozen_string_literal: true
2
+
3
+ module See5
4
+ # Read Gritbot output and return an array of hashes representing the anomalies
5
+ class GritbotOutputParser
6
+ attr_reader :anomalies
7
+
8
+ def self.parse_file(fname)
9
+ new(fname).anomalies
10
+ end
11
+
12
+ def initialize(fname)
13
+ @file = File.open(fname)
14
+ @anomalies = []
15
+
16
+ parse_file
17
+ end
18
+
19
+ def parse_file
20
+ discard_header
21
+
22
+ while (line = lines.next)
23
+ if line.start_with?(/\s*while checking/)
24
+ # TODO record excluded cases
25
+ elsif line.start_with?(/(:?test |data )?case /)
26
+ @anomalies << parse_anomaly(line)
27
+ elsif line.start_with?("Time:")
28
+ break
29
+ end
30
+ end
31
+ end
32
+
33
+ private
34
+
35
+ def lines
36
+ # TODO: lazy unnecessary given that rules are small?
37
+ @file.each_line.lazy
38
+ end
39
+
40
+ # Discard the file header and advance to the anomalies section
41
+ def discard_header
42
+ while (line = lines.next)
43
+ break if line.start_with?(/\d+ possible anomal/)
44
+ end
45
+ # discard the final blank line
46
+ lines.next
47
+ end
48
+
49
+ def parse_anomaly(line)
50
+ info = parse_anomaly_info_line(line)
51
+ value = parse_anomaly_value_line(lines.next)
52
+
53
+ conditions = []
54
+ while (line = lines.next.strip)
55
+ break if line == ""
56
+
57
+ conditions << parse_condition_line(line)
58
+ end
59
+
60
+ # TODO new class for these
61
+ {
62
+ **info,
63
+ **value,
64
+ conditions: conditions
65
+ }
66
+ end
67
+
68
+ def parse_anomaly_info_line(line)
69
+ matches = line.match(/.*case (.*): (.*)\[([\.\d]+)\]/)
70
+
71
+ {
72
+ case_index: matches[1],
73
+ case_label: matches[2].strip.delete_prefix("(label ").delete_suffix(")"),
74
+ signifigance: matches[3].to_f
75
+ }
76
+ end
77
+
78
+ def parse_anomaly_value_line(line)
79
+ matches = line.match(/(.*) = (\S*)\s*\((.*)\)/)
80
+
81
+ {
82
+ attribute: matches[1].strip,
83
+ value: matches[2],
84
+ reason: matches[3]
85
+ }
86
+ end
87
+
88
+ def parse_condition_line(line)
89
+ line.strip
90
+ end
91
+ end
92
+ end
@@ -20,6 +20,9 @@ module See5
20
20
  def write_files
21
21
  write_names_file
22
22
  write_data_file
23
+
24
+ names_io.close
25
+ data_io.close
23
26
  end
24
27
 
25
28
  def write_names_file
@@ -38,7 +41,12 @@ module See5
38
41
 
39
42
  def row(record)
40
43
  @schema.attributes.map do |attr, _vals|
41
- record.send(attr)
44
+ if record.is_a?(Hash)
45
+ record[attr]
46
+ else
47
+ # assume some kind of OpenStruct- or ActiveModel-like object
48
+ record.send(attr)
49
+ end
42
50
  end.join(",")
43
51
  end
44
52
 
@@ -6,7 +6,7 @@ module See5
6
6
  # Read See5 rules output and return an array of hashes representing the rules
7
7
  # Note that this is the output normally sent to stdout, NOT the .rules file!
8
8
  # The .rules file lacks some important information like confidence.
9
- class RulesFileParser
9
+ class RulesOutputParser
10
10
  def self.parse_file(fname)
11
11
  new(fname).model
12
12
  end
@@ -22,5 +22,25 @@ module See5
22
22
  "#{attr}: #{vals.join(',')}"
23
23
  end.join("\n")
24
24
  end
25
+
26
+ # Infers a schema from a dataset
27
+ def self.from_dataset(data, class_attribute: nil)
28
+ classes = data.map { |record| record[class_attribute.to_sym] }.uniq
29
+
30
+ attributes = {}
31
+ data.each do |record|
32
+ record.each do |key, value|
33
+ if !attributes.include?(key)
34
+ attributes[key] = [value]
35
+ elsif !attributes[key].include?(value)
36
+ attributes[key].append(value)
37
+ end
38
+ end
39
+ end
40
+
41
+ new(classes: classes,
42
+ attributes: attributes,
43
+ class_attribute: class_attribute)
44
+ end
25
45
  end
26
46
  end
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module See5
4
- VERSION = "0.1.0"
4
+ VERSION = "0.2.0"
5
5
  end
@@ -34,5 +34,5 @@ Gem::Specification.new do |spec|
34
34
 
35
35
  spec.add_development_dependency "bundler", "~> 1.17"
36
36
  spec.add_development_dependency "minitest", "~> 5.0"
37
- spec.add_development_dependency "rake", "~> 10.0"
37
+ spec.add_development_dependency "rake", "~> 13.0"
38
38
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: see5
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Eddie Lebow
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-03-03 00:00:00.000000000 Z
11
+ date: 2021-01-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -44,14 +44,14 @@ dependencies:
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '10.0'
47
+ version: '13.0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '10.0'
54
+ version: '13.0'
55
55
  description:
56
56
  email:
57
57
  - elebow@users.noreply.github.com
@@ -62,13 +62,15 @@ files:
62
62
  - ".gitignore"
63
63
  - ".rubocop.yml"
64
64
  - Gemfile
65
+ - Gemfile.lock
65
66
  - README.md
66
67
  - Rakefile
67
68
  - lib/see5.rb
69
+ - lib/see5/gritbot_output_parser.rb
68
70
  - lib/see5/input_file_writer.rb
69
71
  - lib/see5/model.rb
70
72
  - lib/see5/rule.rb
71
- - lib/see5/rules_file_parser.rb
73
+ - lib/see5/rules_output_parser.rb
72
74
  - lib/see5/schema.rb
73
75
  - lib/see5/version.rb
74
76
  - see5.gemspec