see5 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 6d2b54a9d4d3c9f3271505bf233a46772cacfd1702312a68dab174b197265f59
4
+ data.tar.gz: cb5a5d275b08cb8ce7ca00be58c81fd015902578d31c1d6e8a7d1b5d2936225a
5
+ SHA512:
6
+ metadata.gz: 903fb8b64822a99a15c14383fc57932af364bb1c355046b4b022b9588c2d7359c5bdbcf5221f4282ba9b1f9a4200f746d390d962367bcad8a39ecd61e2671839
7
+ data.tar.gz: 98594a2c2be18ede5633ee7ba66894d2ce2b2eddead9d10eb4531728c349b8b9db95e439f7ca1407d9c92a6abed363549127a7b731a8f5c53af984096d0fe1d5
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ /.bundle/
2
+ /coverage/
3
+ /doc/
4
+ /pkg/
5
+ /spec/reports/
6
+ /tmp/
data/.rubocop.yml ADDED
@@ -0,0 +1,11 @@
1
+ ---
2
+ Style/ClassAndModuleChildren:
3
+ Exclude:
4
+ - "test/**"
5
+
6
+ Style/Documentation:
7
+ Exclude:
8
+ - "test/**"
9
+
10
+ Style/StringLiterals:
11
+ EnforcedStyle: double_quotes
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+
5
+ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
6
+
7
+ # Specify your gem's dependencies in ruby-see5.gemspec
8
+ gemspec
data/README.md ADDED
@@ -0,0 +1,33 @@
1
+ # Ruby::See5
2
+
3
+ A Ruby frontend for the See5/C5.0 family of classifiers and modellers. Builds models from Ruby objects.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'ruby-see5'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install ruby-see5
20
+
21
+ ## Usage
22
+
23
+ TODO: Write usage instructions here
24
+
25
+ ## Development
26
+
27
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
28
+
29
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
30
+
31
+ ## Contributing
32
+
33
+ Bug reports and pull requests are welcome on GitHub at https://github.com/elebow/ruby-see5.
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rake/testtask"
5
+
6
+ task default: :test
7
+
8
+ Rake::TestTask.new(:test) do |t|
9
+ t.libs << "test"
10
+ t.libs << "lib"
11
+ t.test_files = FileList["test/**/test_*.rb"]
12
+ end
data/lib/see5.rb ADDED
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "see5/input_file_writer"
4
+ require "see5/model"
5
+ require "see5/rules_file_parser"
6
+ require "see5/schema"
7
+ require "see5/version"
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "schema"
4
+
5
+ module See5
6
+ # Writes names and data files suitable for See5, Cubist, or GritBot.
7
+ class InputFileWriter
8
+ def self.write_files(data:, schema: nil, names_io: nil, data_io: nil)
9
+ new(data: data, schema: schema, names_io: names_io, data_io: data_io)
10
+ .write_files
11
+ end
12
+
13
+ def initialize(data:, schema: nil, names_io: nil, data_io: nil)
14
+ @data = data
15
+ @schema = schema # TODO: automatic schema from data objects' attributes
16
+ @names_io = names_io
17
+ @data_io = data_io
18
+ end
19
+
20
+ def write_files
21
+ write_names_file
22
+ write_data_file
23
+ end
24
+
25
+ def write_names_file
26
+ names_io.write(@schema.names_file_contents)
27
+ end
28
+
29
+ def write_data_file
30
+ # TODO: missing or N/A
31
+ @data.each do |record|
32
+ data_io.write(row(record))
33
+ data_io.write("\n")
34
+ end
35
+ end
36
+
37
+ private
38
+
39
+ def row(record)
40
+ @schema.attributes.map do |attr, _vals|
41
+ record.send(attr)
42
+ end.join(",")
43
+ end
44
+
45
+ def names_io
46
+ @names_io ||= File.open("/tmp/ruby-see5.names", "w")
47
+ end
48
+
49
+ def data_io
50
+ @data_io ||= File.open("/tmp/ruby-see5.data", "w")
51
+ end
52
+ end
53
+ end
data/lib/see5/model.rb ADDED
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module See5
4
+ class Model
5
+ attr_reader :rules
6
+
7
+ def initialize(default_classification:, rules:)
8
+ @default_classification = default_classification
9
+ @rules = rules
10
+ end
11
+
12
+ def classify(data)
13
+ # See5 orders rules by confidence within each class (TODO verify),
14
+ # so the first matching rule is the one with the highest confidence.
15
+ first_matching_rule = rules.find { |rule| rule.match?(data) }
16
+
17
+ return first_matching_rule.classification unless first_matching_rule.nil?
18
+
19
+ @default_classification
20
+ end
21
+ end
22
+ end
data/lib/see5/rule.rb ADDED
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module See5
4
+ class Rule
5
+ attr_reader :classification, :confidence, :rule_info, :conditions
6
+
7
+ def initialize(rule_info, conditions, class_info)
8
+ @rule_info = rule_info
9
+ @conditions = conditions
10
+ @classification = class_info[:classification]
11
+ @confidence = class_info[:confidence]
12
+ end
13
+
14
+ def match?(data)
15
+ conditions
16
+ .map { |attr, val| data[attr] == val }
17
+ .all? { |matched| matched == true }
18
+ end
19
+
20
+ def to_s
21
+ [
22
+ "See5::Rule",
23
+ "@classification=#{@classification}",
24
+ "@conditions=#{@conditions}"
25
+ ]
26
+ .join(", ")
27
+ .yield_self { |s| "#<#{s}>" }
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,101 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "rule"
4
+
5
+ module See5
6
+ # Read See5 rules output and return an array of hashes representing the rules
7
+ # Note that this is the output normally sent to stdout, NOT the .rules file!
8
+ # The .rules file lacks some important information like confidence.
9
+ class RulesFileParser
10
+ def self.parse_file(fname)
11
+ new(fname).model
12
+ end
13
+
14
+ def initialize(fname)
15
+ @file = File.open(fname)
16
+ @rules = []
17
+
18
+ parse_file
19
+ end
20
+
21
+ def model
22
+ {
23
+ default_classification: @default_classification,
24
+ rules: @rules
25
+ }
26
+ end
27
+
28
+ def parse_file
29
+ discard_header
30
+
31
+ while (line = lines.next)
32
+ if line.start_with?("Rule ")
33
+ @rules << parse_rule(line)
34
+ elsif line.start_with?("Default class:")
35
+ @default_classification = line.split(":").last.strip
36
+
37
+ break
38
+ end
39
+ end
40
+ end
41
+
42
+ private
43
+
44
+ def lines
45
+ # TODO: lazy unnecessary given that rules are small?
46
+ @file.each_line.lazy
47
+ end
48
+
49
+ # Discard the file header and advance to the rules section
50
+ # TODO: save the data from the header, in case user wants it?
51
+ def discard_header
52
+ while (line = lines.next)
53
+ break if line == "Rules:\n"
54
+ end
55
+ # discard the final blank line
56
+ lines.next
57
+ end
58
+
59
+ def parse_rule(line)
60
+ rule_info = parse_rule_info_line(line)
61
+ conditions = []
62
+
63
+ while (line = lines.next.strip)
64
+ if line.start_with?("->")
65
+ class_info = parse_class_line(line)
66
+
67
+ break
68
+ end
69
+
70
+ conditions << parse_condition_line(line)
71
+ end
72
+
73
+ Rule.new(rule_info, conditions.to_h, class_info)
74
+ end
75
+
76
+ def parse_class_line(line)
77
+ matches = line.match(/class ([\w]+) \[(.+)\]/)
78
+
79
+ {
80
+ classification: matches[1],
81
+ confidence: matches[2].to_f
82
+ }
83
+ end
84
+
85
+ def parse_rule_info_line(line)
86
+ matches = line.match(%r{Rule \d+: \((\d+)(?:/)?([^,]*), lift (.+)\)})
87
+
88
+ {
89
+ cases_covered: matches[1].to_i,
90
+ cases_not_covered: matches[2]&.to_i,
91
+ lift: matches[3].to_f
92
+ }
93
+ end
94
+
95
+ def parse_condition_line(line)
96
+ (attr, val) = line.split("=").map(&:strip)
97
+
98
+ [attr.to_sym, val]
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module See5
4
+ class Schema
5
+ attr_reader :class_attribute, :attributes
6
+
7
+ def initialize(classes:, attributes:, class_attribute: nil)
8
+ @classes = classes
9
+ @attributes = attributes
10
+ @class_attribute = class_attribute&.to_sym || :class_attribute
11
+
12
+ # if the class attribute doesn't exist in the attributes, add it
13
+ unless @attributes.key?(@class_attribute)
14
+ @attributes[@class_attribute] = @classes
15
+ end
16
+ end
17
+
18
+ def names_file_contents
19
+ class_attribute.to_s + # TODO: continuous class attribute
20
+ "\n" +
21
+ attributes.map do |attr, vals|
22
+ "#{attr}: #{vals.join(',')}"
23
+ end.join("\n")
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module See5
4
+ VERSION = "0.1.0"
5
+ end
data/see5.gemspec ADDED
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path("lib", __dir__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require "see5/version"
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = "see5"
9
+ spec.version = See5::VERSION
10
+ spec.authors = ["Eddie Lebow"]
11
+ spec.email = ["elebow@users.noreply.github.com"]
12
+
13
+ spec.summary = "A Ruby frontend for the See5/C5.0 family of classifiers and modellers."
14
+ #spec.description = "TODO: Write a longer description or delete this line."
15
+ spec.homepage = "https://github.com/elebow/ruby-see5"
16
+
17
+ if spec.respond_to?(:metadata)
18
+ spec.metadata["homepage_uri"] = spec.homepage
19
+ spec.metadata["source_code_uri"] = "https://github.com/elebow/ruby-see5"
20
+ spec.metadata["changelog_uri"] = "https://github.com/elebow/ruby-see5/blob/master/CHANGELOG.md"
21
+ else
22
+ raise "RubyGems 2.0 or newer is required to protect against " \
23
+ "public gem pushes."
24
+ end
25
+
26
+ # Specify which files should be added to the gem when it is released.
27
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
28
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
29
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
30
+ end
31
+ spec.bindir = "exe"
32
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
33
+ spec.require_paths = ["lib"]
34
+
35
+ spec.add_development_dependency "bundler", "~> 1.17"
36
+ spec.add_development_dependency "minitest", "~> 5.0"
37
+ spec.add_development_dependency "rake", "~> 10.0"
38
+ end
metadata ADDED
@@ -0,0 +1,100 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: see5
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Eddie Lebow
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2020-03-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.17'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.17'
27
+ - !ruby/object:Gem::Dependency
28
+ name: minitest
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '5.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '5.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ description:
56
+ email:
57
+ - elebow@users.noreply.github.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - ".rubocop.yml"
64
+ - Gemfile
65
+ - README.md
66
+ - Rakefile
67
+ - lib/see5.rb
68
+ - lib/see5/input_file_writer.rb
69
+ - lib/see5/model.rb
70
+ - lib/see5/rule.rb
71
+ - lib/see5/rules_file_parser.rb
72
+ - lib/see5/schema.rb
73
+ - lib/see5/version.rb
74
+ - see5.gemspec
75
+ homepage: https://github.com/elebow/ruby-see5
76
+ licenses: []
77
+ metadata:
78
+ homepage_uri: https://github.com/elebow/ruby-see5
79
+ source_code_uri: https://github.com/elebow/ruby-see5
80
+ changelog_uri: https://github.com/elebow/ruby-see5/blob/master/CHANGELOG.md
81
+ post_install_message:
82
+ rdoc_options: []
83
+ require_paths:
84
+ - lib
85
+ required_ruby_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ required_rubygems_version: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ">="
93
+ - !ruby/object:Gem::Version
94
+ version: '0'
95
+ requirements: []
96
+ rubygems_version: 3.0.3
97
+ signing_key:
98
+ specification_version: 4
99
+ summary: A Ruby frontend for the See5/C5.0 family of classifiers and modellers.
100
+ test_files: []