see5 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 6d2b54a9d4d3c9f3271505bf233a46772cacfd1702312a68dab174b197265f59
4
+ data.tar.gz: cb5a5d275b08cb8ce7ca00be58c81fd015902578d31c1d6e8a7d1b5d2936225a
5
+ SHA512:
6
+ metadata.gz: 903fb8b64822a99a15c14383fc57932af364bb1c355046b4b022b9588c2d7359c5bdbcf5221f4282ba9b1f9a4200f746d390d962367bcad8a39ecd61e2671839
7
+ data.tar.gz: 98594a2c2be18ede5633ee7ba66894d2ce2b2eddead9d10eb4531728c349b8b9db95e439f7ca1407d9c92a6abed363549127a7b731a8f5c53af984096d0fe1d5
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ /.bundle/
2
+ /coverage/
3
+ /doc/
4
+ /pkg/
5
+ /spec/reports/
6
+ /tmp/
data/.rubocop.yml ADDED
@@ -0,0 +1,11 @@
1
+ ---
2
+ Style/ClassAndModuleChildren:
3
+ Exclude:
4
+ - "test/**"
5
+
6
+ Style/Documentation:
7
+ Exclude:
8
+ - "test/**"
9
+
10
+ Style/StringLiterals:
11
+ EnforcedStyle: double_quotes
data/Gemfile ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ source "https://rubygems.org"
4
+
5
+ git_source(:github) { |repo_name| "https://github.com/#{repo_name}" }
6
+
7
+ # Specify your gem's dependencies in ruby-see5.gemspec
8
+ gemspec
data/README.md ADDED
@@ -0,0 +1,33 @@
1
+ # Ruby::See5
2
+
3
+ A Ruby frontend for the See5/C5.0 family of classifiers and modellers. Builds models from Ruby objects.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'ruby-see5'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install ruby-see5
20
+
21
+ ## Usage
22
+
23
+ TODO: Write usage instructions here
24
+
25
+ ## Development
26
+
27
+ After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
28
+
29
+ To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
30
+
31
+ ## Contributing
32
+
33
+ Bug reports and pull requests are welcome on GitHub at https://github.com/elebow/ruby-see5.
data/Rakefile ADDED
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "bundler/gem_tasks"
4
+ require "rake/testtask"
5
+
6
+ task default: :test
7
+
8
+ Rake::TestTask.new(:test) do |t|
9
+ t.libs << "test"
10
+ t.libs << "lib"
11
+ t.test_files = FileList["test/**/test_*.rb"]
12
+ end
data/lib/see5.rb ADDED
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "see5/input_file_writer"
4
+ require "see5/model"
5
+ require "see5/rules_file_parser"
6
+ require "see5/schema"
7
+ require "see5/version"
@@ -0,0 +1,53 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "schema"
4
+
5
+ module See5
6
+ # Writes names and data files suitable for See5, Cubist, or GritBot.
7
+ class InputFileWriter
8
+ def self.write_files(data:, schema: nil, names_io: nil, data_io: nil)
9
+ new(data: data, schema: schema, names_io: names_io, data_io: data_io)
10
+ .write_files
11
+ end
12
+
13
+ def initialize(data:, schema: nil, names_io: nil, data_io: nil)
14
+ @data = data
15
+ @schema = schema # TODO: automatic schema from data objects' attributes
16
+ @names_io = names_io
17
+ @data_io = data_io
18
+ end
19
+
20
+ def write_files
21
+ write_names_file
22
+ write_data_file
23
+ end
24
+
25
+ def write_names_file
26
+ names_io.write(@schema.names_file_contents)
27
+ end
28
+
29
+ def write_data_file
30
+ # TODO: missing or N/A
31
+ @data.each do |record|
32
+ data_io.write(row(record))
33
+ data_io.write("\n")
34
+ end
35
+ end
36
+
37
+ private
38
+
39
+ def row(record)
40
+ @schema.attributes.map do |attr, _vals|
41
+ record.send(attr)
42
+ end.join(",")
43
+ end
44
+
45
+ def names_io
46
+ @names_io ||= File.open("/tmp/ruby-see5.names", "w")
47
+ end
48
+
49
+ def data_io
50
+ @data_io ||= File.open("/tmp/ruby-see5.data", "w")
51
+ end
52
+ end
53
+ end
data/lib/see5/model.rb ADDED
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ module See5
4
+ class Model
5
+ attr_reader :rules
6
+
7
+ def initialize(default_classification:, rules:)
8
+ @default_classification = default_classification
9
+ @rules = rules
10
+ end
11
+
12
+ def classify(data)
13
+ # See5 orders rules by confidence within each class (TODO verify),
14
+ # so the first matching rule is the one with the highest confidence.
15
+ first_matching_rule = rules.find { |rule| rule.match?(data) }
16
+
17
+ return first_matching_rule.classification unless first_matching_rule.nil?
18
+
19
+ @default_classification
20
+ end
21
+ end
22
+ end
data/lib/see5/rule.rb ADDED
@@ -0,0 +1,30 @@
1
+ # frozen_string_literal: true
2
+
3
+ module See5
4
+ class Rule
5
+ attr_reader :classification, :confidence, :rule_info, :conditions
6
+
7
+ def initialize(rule_info, conditions, class_info)
8
+ @rule_info = rule_info
9
+ @conditions = conditions
10
+ @classification = class_info[:classification]
11
+ @confidence = class_info[:confidence]
12
+ end
13
+
14
+ def match?(data)
15
+ conditions
16
+ .map { |attr, val| data[attr] == val }
17
+ .all? { |matched| matched == true }
18
+ end
19
+
20
+ def to_s
21
+ [
22
+ "See5::Rule",
23
+ "@classification=#{@classification}",
24
+ "@conditions=#{@conditions}"
25
+ ]
26
+ .join(", ")
27
+ .yield_self { |s| "#<#{s}>" }
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,101 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "rule"
4
+
5
+ module See5
6
+ # Read See5 rules output and return an array of hashes representing the rules
7
+ # Note that this is the output normally sent to stdout, NOT the .rules file!
8
+ # The .rules file lacks some important information like confidence.
9
+ class RulesFileParser
10
+ def self.parse_file(fname)
11
+ new(fname).model
12
+ end
13
+
14
+ def initialize(fname)
15
+ @file = File.open(fname)
16
+ @rules = []
17
+
18
+ parse_file
19
+ end
20
+
21
+ def model
22
+ {
23
+ default_classification: @default_classification,
24
+ rules: @rules
25
+ }
26
+ end
27
+
28
+ def parse_file
29
+ discard_header
30
+
31
+ while (line = lines.next)
32
+ if line.start_with?("Rule ")
33
+ @rules << parse_rule(line)
34
+ elsif line.start_with?("Default class:")
35
+ @default_classification = line.split(":").last.strip
36
+
37
+ break
38
+ end
39
+ end
40
+ end
41
+
42
+ private
43
+
44
+ def lines
45
+ # TODO: lazy unnecessary given that rules are small?
46
+ @file.each_line.lazy
47
+ end
48
+
49
+ # Discard the file header and advance to the rules section
50
+ # TODO: save the data from the header, in case user wants it?
51
+ def discard_header
52
+ while (line = lines.next)
53
+ break if line == "Rules:\n"
54
+ end
55
+ # discard the final blank line
56
+ lines.next
57
+ end
58
+
59
+ def parse_rule(line)
60
+ rule_info = parse_rule_info_line(line)
61
+ conditions = []
62
+
63
+ while (line = lines.next.strip)
64
+ if line.start_with?("->")
65
+ class_info = parse_class_line(line)
66
+
67
+ break
68
+ end
69
+
70
+ conditions << parse_condition_line(line)
71
+ end
72
+
73
+ Rule.new(rule_info, conditions.to_h, class_info)
74
+ end
75
+
76
+ def parse_class_line(line)
77
+ matches = line.match(/class ([\w]+) \[(.+)\]/)
78
+
79
+ {
80
+ classification: matches[1],
81
+ confidence: matches[2].to_f
82
+ }
83
+ end
84
+
85
+ def parse_rule_info_line(line)
86
+ matches = line.match(%r{Rule \d+: \((\d+)(?:/)?([^,]*), lift (.+)\)})
87
+
88
+ {
89
+ cases_covered: matches[1].to_i,
90
+ cases_not_covered: matches[2]&.to_i,
91
+ lift: matches[3].to_f
92
+ }
93
+ end
94
+
95
+ def parse_condition_line(line)
96
+ (attr, val) = line.split("=").map(&:strip)
97
+
98
+ [attr.to_sym, val]
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module See5
4
+ class Schema
5
+ attr_reader :class_attribute, :attributes
6
+
7
+ def initialize(classes:, attributes:, class_attribute: nil)
8
+ @classes = classes
9
+ @attributes = attributes
10
+ @class_attribute = class_attribute&.to_sym || :class_attribute
11
+
12
+ # if the class attribute doesn't exist in the attributes, add it
13
+ unless @attributes.key?(@class_attribute)
14
+ @attributes[@class_attribute] = @classes
15
+ end
16
+ end
17
+
18
+ def names_file_contents
19
+ class_attribute.to_s + # TODO: continuous class attribute
20
+ "\n" +
21
+ attributes.map do |attr, vals|
22
+ "#{attr}: #{vals.join(',')}"
23
+ end.join("\n")
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module See5
4
+ VERSION = "0.1.0"
5
+ end
data/see5.gemspec ADDED
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ lib = File.expand_path("lib", __dir__)
4
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
+ require "see5/version"
6
+
7
+ Gem::Specification.new do |spec|
8
+ spec.name = "see5"
9
+ spec.version = See5::VERSION
10
+ spec.authors = ["Eddie Lebow"]
11
+ spec.email = ["elebow@users.noreply.github.com"]
12
+
13
+ spec.summary = "A Ruby frontend for the See5/C5.0 family of classifiers and modellers."
14
+ #spec.description = "TODO: Write a longer description or delete this line."
15
+ spec.homepage = "https://github.com/elebow/ruby-see5"
16
+
17
+ if spec.respond_to?(:metadata)
18
+ spec.metadata["homepage_uri"] = spec.homepage
19
+ spec.metadata["source_code_uri"] = "https://github.com/elebow/ruby-see5"
20
+ spec.metadata["changelog_uri"] = "https://github.com/elebow/ruby-see5/blob/master/CHANGELOG.md"
21
+ else
22
+ raise "RubyGems 2.0 or newer is required to protect against " \
23
+ "public gem pushes."
24
+ end
25
+
26
+ # Specify which files should be added to the gem when it is released.
27
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
28
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
29
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
30
+ end
31
+ spec.bindir = "exe"
32
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
33
+ spec.require_paths = ["lib"]
34
+
35
+ spec.add_development_dependency "bundler", "~> 1.17"
36
+ spec.add_development_dependency "minitest", "~> 5.0"
37
+ spec.add_development_dependency "rake", "~> 10.0"
38
+ end
metadata ADDED
@@ -0,0 +1,100 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: see5
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Eddie Lebow
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2020-03-03 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.17'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.17'
27
+ - !ruby/object:Gem::Dependency
28
+ name: minitest
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '5.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '5.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rake
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '10.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '10.0'
55
+ description:
56
+ email:
57
+ - elebow@users.noreply.github.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - ".rubocop.yml"
64
+ - Gemfile
65
+ - README.md
66
+ - Rakefile
67
+ - lib/see5.rb
68
+ - lib/see5/input_file_writer.rb
69
+ - lib/see5/model.rb
70
+ - lib/see5/rule.rb
71
+ - lib/see5/rules_file_parser.rb
72
+ - lib/see5/schema.rb
73
+ - lib/see5/version.rb
74
+ - see5.gemspec
75
+ homepage: https://github.com/elebow/ruby-see5
76
+ licenses: []
77
+ metadata:
78
+ homepage_uri: https://github.com/elebow/ruby-see5
79
+ source_code_uri: https://github.com/elebow/ruby-see5
80
+ changelog_uri: https://github.com/elebow/ruby-see5/blob/master/CHANGELOG.md
81
+ post_install_message:
82
+ rdoc_options: []
83
+ require_paths:
84
+ - lib
85
+ required_ruby_version: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - ">="
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ required_rubygems_version: !ruby/object:Gem::Requirement
91
+ requirements:
92
+ - - ">="
93
+ - !ruby/object:Gem::Version
94
+ version: '0'
95
+ requirements: []
96
+ rubygems_version: 3.0.3
97
+ signing_key:
98
+ specification_version: 4
99
+ summary: A Ruby frontend for the See5/C5.0 family of classifiers and modellers.
100
+ test_files: []