see5 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/.rubocop.yml +11 -0
- data/Gemfile +8 -0
- data/README.md +33 -0
- data/Rakefile +12 -0
- data/lib/see5.rb +7 -0
- data/lib/see5/input_file_writer.rb +53 -0
- data/lib/see5/model.rb +22 -0
- data/lib/see5/rule.rb +30 -0
- data/lib/see5/rules_file_parser.rb +101 -0
- data/lib/see5/schema.rb +26 -0
- data/lib/see5/version.rb +5 -0
- data/see5.gemspec +38 -0
- metadata +100 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: 6d2b54a9d4d3c9f3271505bf233a46772cacfd1702312a68dab174b197265f59
|
|
4
|
+
data.tar.gz: cb5a5d275b08cb8ce7ca00be58c81fd015902578d31c1d6e8a7d1b5d2936225a
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 903fb8b64822a99a15c14383fc57932af364bb1c355046b4b022b9588c2d7359c5bdbcf5221f4282ba9b1f9a4200f746d390d962367bcad8a39ecd61e2671839
|
|
7
|
+
data.tar.gz: 98594a2c2be18ede5633ee7ba66894d2ce2b2eddead9d10eb4531728c349b8b9db95e439f7ca1407d9c92a6abed363549127a7b731a8f5c53af984096d0fe1d5
|
data/.gitignore
ADDED
data/.rubocop.yml
ADDED
data/Gemfile
ADDED
data/README.md
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
# Ruby::See5
|
|
2
|
+
|
|
3
|
+
A Ruby frontend for the See5/C5.0 family of classifiers and modellers. Builds models from Ruby objects.
|
|
4
|
+
|
|
5
|
+
## Installation
|
|
6
|
+
|
|
7
|
+
Add this line to your application's Gemfile:
|
|
8
|
+
|
|
9
|
+
```ruby
|
|
10
|
+
gem 'ruby-see5'
|
|
11
|
+
```
|
|
12
|
+
|
|
13
|
+
And then execute:
|
|
14
|
+
|
|
15
|
+
$ bundle
|
|
16
|
+
|
|
17
|
+
Or install it yourself as:
|
|
18
|
+
|
|
19
|
+
$ gem install ruby-see5
|
|
20
|
+
|
|
21
|
+
## Usage
|
|
22
|
+
|
|
23
|
+
TODO: Write usage instructions here
|
|
24
|
+
|
|
25
|
+
## Development
|
|
26
|
+
|
|
27
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
|
28
|
+
|
|
29
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and tags, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
|
30
|
+
|
|
31
|
+
## Contributing
|
|
32
|
+
|
|
33
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/elebow/ruby-see5.
|
data/Rakefile
ADDED
data/lib/see5.rb
ADDED
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "schema"
|
|
4
|
+
|
|
5
|
+
module See5
|
|
6
|
+
# Writes names and data files suitable for See5, Cubist, or GritBot.
|
|
7
|
+
class InputFileWriter
|
|
8
|
+
def self.write_files(data:, schema: nil, names_io: nil, data_io: nil)
|
|
9
|
+
new(data: data, schema: schema, names_io: names_io, data_io: data_io)
|
|
10
|
+
.write_files
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def initialize(data:, schema: nil, names_io: nil, data_io: nil)
|
|
14
|
+
@data = data
|
|
15
|
+
@schema = schema # TODO: automatic schema from data objects' attributes
|
|
16
|
+
@names_io = names_io
|
|
17
|
+
@data_io = data_io
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def write_files
|
|
21
|
+
write_names_file
|
|
22
|
+
write_data_file
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def write_names_file
|
|
26
|
+
names_io.write(@schema.names_file_contents)
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
def write_data_file
|
|
30
|
+
# TODO: missing or N/A
|
|
31
|
+
@data.each do |record|
|
|
32
|
+
data_io.write(row(record))
|
|
33
|
+
data_io.write("\n")
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
private
|
|
38
|
+
|
|
39
|
+
def row(record)
|
|
40
|
+
@schema.attributes.map do |attr, _vals|
|
|
41
|
+
record.send(attr)
|
|
42
|
+
end.join(",")
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
def names_io
|
|
46
|
+
@names_io ||= File.open("/tmp/ruby-see5.names", "w")
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
def data_io
|
|
50
|
+
@data_io ||= File.open("/tmp/ruby-see5.data", "w")
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
end
|
data/lib/see5/model.rb
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module See5
|
|
4
|
+
class Model
|
|
5
|
+
attr_reader :rules
|
|
6
|
+
|
|
7
|
+
def initialize(default_classification:, rules:)
|
|
8
|
+
@default_classification = default_classification
|
|
9
|
+
@rules = rules
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
def classify(data)
|
|
13
|
+
# See5 orders rules by confidence within each class (TODO verify),
|
|
14
|
+
# so the first matching rule is the one with the highest confidence.
|
|
15
|
+
first_matching_rule = rules.find { |rule| rule.match?(data) }
|
|
16
|
+
|
|
17
|
+
return first_matching_rule.classification unless first_matching_rule.nil?
|
|
18
|
+
|
|
19
|
+
@default_classification
|
|
20
|
+
end
|
|
21
|
+
end
|
|
22
|
+
end
|
data/lib/see5/rule.rb
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module See5
|
|
4
|
+
class Rule
|
|
5
|
+
attr_reader :classification, :confidence, :rule_info, :conditions
|
|
6
|
+
|
|
7
|
+
def initialize(rule_info, conditions, class_info)
|
|
8
|
+
@rule_info = rule_info
|
|
9
|
+
@conditions = conditions
|
|
10
|
+
@classification = class_info[:classification]
|
|
11
|
+
@confidence = class_info[:confidence]
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def match?(data)
|
|
15
|
+
conditions
|
|
16
|
+
.map { |attr, val| data[attr] == val }
|
|
17
|
+
.all? { |matched| matched == true }
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def to_s
|
|
21
|
+
[
|
|
22
|
+
"See5::Rule",
|
|
23
|
+
"@classification=#{@classification}",
|
|
24
|
+
"@conditions=#{@conditions}"
|
|
25
|
+
]
|
|
26
|
+
.join(", ")
|
|
27
|
+
.yield_self { |s| "#<#{s}>" }
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
end
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "rule"
|
|
4
|
+
|
|
5
|
+
module See5
|
|
6
|
+
# Read See5 rules output and return an array of hashes representing the rules
|
|
7
|
+
# Note that this is the output normally sent to stdout, NOT the .rules file!
|
|
8
|
+
# The .rules file lacks some important information like confidence.
|
|
9
|
+
class RulesFileParser
|
|
10
|
+
def self.parse_file(fname)
|
|
11
|
+
new(fname).model
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def initialize(fname)
|
|
15
|
+
@file = File.open(fname)
|
|
16
|
+
@rules = []
|
|
17
|
+
|
|
18
|
+
parse_file
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def model
|
|
22
|
+
{
|
|
23
|
+
default_classification: @default_classification,
|
|
24
|
+
rules: @rules
|
|
25
|
+
}
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
def parse_file
|
|
29
|
+
discard_header
|
|
30
|
+
|
|
31
|
+
while (line = lines.next)
|
|
32
|
+
if line.start_with?("Rule ")
|
|
33
|
+
@rules << parse_rule(line)
|
|
34
|
+
elsif line.start_with?("Default class:")
|
|
35
|
+
@default_classification = line.split(":").last.strip
|
|
36
|
+
|
|
37
|
+
break
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
private
|
|
43
|
+
|
|
44
|
+
def lines
|
|
45
|
+
# TODO: lazy unnecessary given that rules are small?
|
|
46
|
+
@file.each_line.lazy
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Discard the file header and advance to the rules section
|
|
50
|
+
# TODO: save the data from the header, in case user wants it?
|
|
51
|
+
def discard_header
|
|
52
|
+
while (line = lines.next)
|
|
53
|
+
break if line == "Rules:\n"
|
|
54
|
+
end
|
|
55
|
+
# discard the final blank line
|
|
56
|
+
lines.next
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def parse_rule(line)
|
|
60
|
+
rule_info = parse_rule_info_line(line)
|
|
61
|
+
conditions = []
|
|
62
|
+
|
|
63
|
+
while (line = lines.next.strip)
|
|
64
|
+
if line.start_with?("->")
|
|
65
|
+
class_info = parse_class_line(line)
|
|
66
|
+
|
|
67
|
+
break
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
conditions << parse_condition_line(line)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
Rule.new(rule_info, conditions.to_h, class_info)
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def parse_class_line(line)
|
|
77
|
+
matches = line.match(/class ([\w]+) \[(.+)\]/)
|
|
78
|
+
|
|
79
|
+
{
|
|
80
|
+
classification: matches[1],
|
|
81
|
+
confidence: matches[2].to_f
|
|
82
|
+
}
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def parse_rule_info_line(line)
|
|
86
|
+
matches = line.match(%r{Rule \d+: \((\d+)(?:/)?([^,]*), lift (.+)\)})
|
|
87
|
+
|
|
88
|
+
{
|
|
89
|
+
cases_covered: matches[1].to_i,
|
|
90
|
+
cases_not_covered: matches[2]&.to_i,
|
|
91
|
+
lift: matches[3].to_f
|
|
92
|
+
}
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def parse_condition_line(line)
|
|
96
|
+
(attr, val) = line.split("=").map(&:strip)
|
|
97
|
+
|
|
98
|
+
[attr.to_sym, val]
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
end
|
data/lib/see5/schema.rb
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module See5
|
|
4
|
+
class Schema
|
|
5
|
+
attr_reader :class_attribute, :attributes
|
|
6
|
+
|
|
7
|
+
def initialize(classes:, attributes:, class_attribute: nil)
|
|
8
|
+
@classes = classes
|
|
9
|
+
@attributes = attributes
|
|
10
|
+
@class_attribute = class_attribute&.to_sym || :class_attribute
|
|
11
|
+
|
|
12
|
+
# if the class attribute doesn't exist in the attributes, add it
|
|
13
|
+
unless @attributes.key?(@class_attribute)
|
|
14
|
+
@attributes[@class_attribute] = @classes
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def names_file_contents
|
|
19
|
+
class_attribute.to_s + # TODO: continuous class attribute
|
|
20
|
+
"\n" +
|
|
21
|
+
attributes.map do |attr, vals|
|
|
22
|
+
"#{attr}: #{vals.join(',')}"
|
|
23
|
+
end.join("\n")
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
data/lib/see5/version.rb
ADDED
data/see5.gemspec
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
lib = File.expand_path("lib", __dir__)
|
|
4
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
5
|
+
require "see5/version"
|
|
6
|
+
|
|
7
|
+
Gem::Specification.new do |spec|
|
|
8
|
+
spec.name = "see5"
|
|
9
|
+
spec.version = See5::VERSION
|
|
10
|
+
spec.authors = ["Eddie Lebow"]
|
|
11
|
+
spec.email = ["elebow@users.noreply.github.com"]
|
|
12
|
+
|
|
13
|
+
spec.summary = "A Ruby frontend for the See5/C5.0 family of classifiers and modellers."
|
|
14
|
+
#spec.description = "TODO: Write a longer description or delete this line."
|
|
15
|
+
spec.homepage = "https://github.com/elebow/ruby-see5"
|
|
16
|
+
|
|
17
|
+
if spec.respond_to?(:metadata)
|
|
18
|
+
spec.metadata["homepage_uri"] = spec.homepage
|
|
19
|
+
spec.metadata["source_code_uri"] = "https://github.com/elebow/ruby-see5"
|
|
20
|
+
spec.metadata["changelog_uri"] = "https://github.com/elebow/ruby-see5/blob/master/CHANGELOG.md"
|
|
21
|
+
else
|
|
22
|
+
raise "RubyGems 2.0 or newer is required to protect against " \
|
|
23
|
+
"public gem pushes."
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Specify which files should be added to the gem when it is released.
|
|
27
|
+
# The `git ls-files -z` loads the files in the RubyGem that have been added into git.
|
|
28
|
+
spec.files = Dir.chdir(File.expand_path(__dir__)) do
|
|
29
|
+
`git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
|
30
|
+
end
|
|
31
|
+
spec.bindir = "exe"
|
|
32
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
|
33
|
+
spec.require_paths = ["lib"]
|
|
34
|
+
|
|
35
|
+
spec.add_development_dependency "bundler", "~> 1.17"
|
|
36
|
+
spec.add_development_dependency "minitest", "~> 5.0"
|
|
37
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
|
38
|
+
end
|
metadata
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
|
2
|
+
name: see5
|
|
3
|
+
version: !ruby/object:Gem::Version
|
|
4
|
+
version: 0.1.0
|
|
5
|
+
platform: ruby
|
|
6
|
+
authors:
|
|
7
|
+
- Eddie Lebow
|
|
8
|
+
autorequire:
|
|
9
|
+
bindir: exe
|
|
10
|
+
cert_chain: []
|
|
11
|
+
date: 2020-03-03 00:00:00.000000000 Z
|
|
12
|
+
dependencies:
|
|
13
|
+
- !ruby/object:Gem::Dependency
|
|
14
|
+
name: bundler
|
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
|
16
|
+
requirements:
|
|
17
|
+
- - "~>"
|
|
18
|
+
- !ruby/object:Gem::Version
|
|
19
|
+
version: '1.17'
|
|
20
|
+
type: :development
|
|
21
|
+
prerelease: false
|
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
23
|
+
requirements:
|
|
24
|
+
- - "~>"
|
|
25
|
+
- !ruby/object:Gem::Version
|
|
26
|
+
version: '1.17'
|
|
27
|
+
- !ruby/object:Gem::Dependency
|
|
28
|
+
name: minitest
|
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
|
30
|
+
requirements:
|
|
31
|
+
- - "~>"
|
|
32
|
+
- !ruby/object:Gem::Version
|
|
33
|
+
version: '5.0'
|
|
34
|
+
type: :development
|
|
35
|
+
prerelease: false
|
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
37
|
+
requirements:
|
|
38
|
+
- - "~>"
|
|
39
|
+
- !ruby/object:Gem::Version
|
|
40
|
+
version: '5.0'
|
|
41
|
+
- !ruby/object:Gem::Dependency
|
|
42
|
+
name: rake
|
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
|
44
|
+
requirements:
|
|
45
|
+
- - "~>"
|
|
46
|
+
- !ruby/object:Gem::Version
|
|
47
|
+
version: '10.0'
|
|
48
|
+
type: :development
|
|
49
|
+
prerelease: false
|
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
|
51
|
+
requirements:
|
|
52
|
+
- - "~>"
|
|
53
|
+
- !ruby/object:Gem::Version
|
|
54
|
+
version: '10.0'
|
|
55
|
+
description:
|
|
56
|
+
email:
|
|
57
|
+
- elebow@users.noreply.github.com
|
|
58
|
+
executables: []
|
|
59
|
+
extensions: []
|
|
60
|
+
extra_rdoc_files: []
|
|
61
|
+
files:
|
|
62
|
+
- ".gitignore"
|
|
63
|
+
- ".rubocop.yml"
|
|
64
|
+
- Gemfile
|
|
65
|
+
- README.md
|
|
66
|
+
- Rakefile
|
|
67
|
+
- lib/see5.rb
|
|
68
|
+
- lib/see5/input_file_writer.rb
|
|
69
|
+
- lib/see5/model.rb
|
|
70
|
+
- lib/see5/rule.rb
|
|
71
|
+
- lib/see5/rules_file_parser.rb
|
|
72
|
+
- lib/see5/schema.rb
|
|
73
|
+
- lib/see5/version.rb
|
|
74
|
+
- see5.gemspec
|
|
75
|
+
homepage: https://github.com/elebow/ruby-see5
|
|
76
|
+
licenses: []
|
|
77
|
+
metadata:
|
|
78
|
+
homepage_uri: https://github.com/elebow/ruby-see5
|
|
79
|
+
source_code_uri: https://github.com/elebow/ruby-see5
|
|
80
|
+
changelog_uri: https://github.com/elebow/ruby-see5/blob/master/CHANGELOG.md
|
|
81
|
+
post_install_message:
|
|
82
|
+
rdoc_options: []
|
|
83
|
+
require_paths:
|
|
84
|
+
- lib
|
|
85
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
|
86
|
+
requirements:
|
|
87
|
+
- - ">="
|
|
88
|
+
- !ruby/object:Gem::Version
|
|
89
|
+
version: '0'
|
|
90
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
|
91
|
+
requirements:
|
|
92
|
+
- - ">="
|
|
93
|
+
- !ruby/object:Gem::Version
|
|
94
|
+
version: '0'
|
|
95
|
+
requirements: []
|
|
96
|
+
rubygems_version: 3.0.3
|
|
97
|
+
signing_key:
|
|
98
|
+
specification_version: 4
|
|
99
|
+
summary: A Ruby frontend for the See5/C5.0 family of classifiers and modellers.
|
|
100
|
+
test_files: []
|