hashematics 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.editorconfig +8 -0
- data/.gitignore +4 -0
- data/.rubocop.yml +11 -0
- data/.ruby-version +1 -0
- data/.travis.yml +20 -0
- data/CHANGELOG.md +7 -0
- data/Gemfile +5 -0
- data/Gemfile.lock +123 -0
- data/Guardfile +16 -0
- data/LICENSE +7 -0
- data/README.md +660 -0
- data/bin/benchmark +134 -0
- data/bin/console +11 -0
- data/hashematics.gemspec +32 -0
- data/lib/hashematics.rb +10 -0
- data/lib/hashematics/category.rb +67 -0
- data/lib/hashematics/configuration.rb +90 -0
- data/lib/hashematics/dictionary.rb +72 -0
- data/lib/hashematics/graph.rb +62 -0
- data/lib/hashematics/group.rb +57 -0
- data/lib/hashematics/hashematics.rb +36 -0
- data/lib/hashematics/id.rb +29 -0
- data/lib/hashematics/key.rb +76 -0
- data/lib/hashematics/object_interface.rb +35 -0
- data/lib/hashematics/record.rb +60 -0
- data/lib/hashematics/record_set.rb +29 -0
- data/lib/hashematics/type.rb +72 -0
- data/lib/hashematics/version.rb +12 -0
- data/lib/hashematics/visitor.rb +50 -0
- data/spec/examples/person.rb +36 -0
- data/spec/fixtures/config.yml +44 -0
- data/spec/fixtures/data.csv +9 -0
- data/spec/fixtures/people.yml +84 -0
- data/spec/hashematics/category_spec.rb +62 -0
- data/spec/hashematics/graph_spec.rb +572 -0
- data/spec/hashematics/key_spec.rb +37 -0
- data/spec/hashematics/object_interface_spec.rb +42 -0
- data/spec/hashematics/record_set_spec.rb +24 -0
- data/spec/hashematics/record_spec.rb +49 -0
- data/spec/hashematics/type_spec.rb +104 -0
- data/spec/spec_helper.rb +42 -0
- metadata +211 -0
data/bin/benchmark
ADDED
@@ -0,0 +1,134 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'bundler/setup'
|
5
|
+
require 'csv'
|
6
|
+
require 'faker'
|
7
|
+
require 'hashematics'
|
8
|
+
require 'pry'
|
9
|
+
require 'benchmark'
|
10
|
+
|
11
|
+
def read(name)
|
12
|
+
File.open(name, 'r:bom|utf-8')
|
13
|
+
end
|
14
|
+
|
15
|
+
# rubocop:disable Security/YAMLLoad
|
16
|
+
def yaml_read(name)
|
17
|
+
YAML.load(read(name))
|
18
|
+
end
|
19
|
+
# rubocop:enable Security/YAMLLoad
|
20
|
+
|
21
|
+
def csv_read(name)
|
22
|
+
CSV.new(read(name), headers: true).map(&:to_h)
|
23
|
+
end
|
24
|
+
|
25
|
+
def make_people_rows(num)
|
26
|
+
(1..num).map do |i|
|
27
|
+
[
|
28
|
+
"#{i}-#{Faker::IDNumber.invalid}",
|
29
|
+
Faker::Name.first_name,
|
30
|
+
Faker::Name.last_name,
|
31
|
+
Faker::Boolean.boolean,
|
32
|
+
Faker::Date.birthday(16, 102)
|
33
|
+
]
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def make_house_rows(num, id_prefix)
|
38
|
+
(1..num).map do |k|
|
39
|
+
[
|
40
|
+
"#{id_prefix}-#{k}-#{Faker::Number.between(1, 9_999_999)}",
|
41
|
+
Faker::Address.street_address,
|
42
|
+
Faker::Address.secondary_address,
|
43
|
+
Faker::Address.city,
|
44
|
+
Faker::Address.state,
|
45
|
+
Faker::Address.zip
|
46
|
+
]
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
number_of_people = (ARGV[0] || 1_000).to_i
|
51
|
+
number_of_cars = (ARGV[1] || 20).to_i
|
52
|
+
number_of_houses = (ARGV[2] || 20).to_i
|
53
|
+
always_generate = ARGV[3] == 't'
|
54
|
+
file_name = ARGV[4] || File.join('tmp', 'benchmark_example.csv')
|
55
|
+
config_file_name = ARGV[5] || File.join('spec', 'fixtures', 'config.yml')
|
56
|
+
|
57
|
+
puts 'Options'
|
58
|
+
puts '-------------------------------------'
|
59
|
+
puts "number_of_people: #{number_of_people}"
|
60
|
+
puts "number_of_cars: #{number_of_cars}"
|
61
|
+
puts "number_of_houses: #{number_of_houses}"
|
62
|
+
puts "always_generate: #{always_generate}"
|
63
|
+
puts "file_name: #{file_name}"
|
64
|
+
puts "config_file_name: #{config_file_name}"
|
65
|
+
puts '-------------------------------------'
|
66
|
+
|
67
|
+
if always_generate || !File.exist?(file_name)
|
68
|
+
total_rows = number_of_people * number_of_cars * number_of_houses
|
69
|
+
puts "Generating benchmark file: #{file_name} with #{total_rows} rows"
|
70
|
+
|
71
|
+
headers = [
|
72
|
+
'ID #',
|
73
|
+
'First',
|
74
|
+
'Last',
|
75
|
+
'Smoker',
|
76
|
+
'DOB',
|
77
|
+
'Car ID #',
|
78
|
+
'Make',
|
79
|
+
'Model',
|
80
|
+
'Year',
|
81
|
+
'House ID #',
|
82
|
+
'Street 1',
|
83
|
+
'Street 2',
|
84
|
+
'City',
|
85
|
+
'St',
|
86
|
+
'Zip',
|
87
|
+
'Plate ID #',
|
88
|
+
'Plate Number'
|
89
|
+
]
|
90
|
+
|
91
|
+
people_rows = make_people_rows(number_of_people)
|
92
|
+
|
93
|
+
CSV.open(file_name, 'w') do |csv|
|
94
|
+
csv << headers
|
95
|
+
|
96
|
+
people_rows.each_with_index do |person_row, i|
|
97
|
+
house_rows = make_house_rows(number_of_houses, i)
|
98
|
+
|
99
|
+
(1..number_of_cars).each do |j|
|
100
|
+
car_row = [
|
101
|
+
"#{i}-#{j}-#{Faker::Vehicle.vin}",
|
102
|
+
Faker::Vehicle.make,
|
103
|
+
Faker::Vehicle.model,
|
104
|
+
Faker::Vehicle.year
|
105
|
+
]
|
106
|
+
|
107
|
+
plate_row = [
|
108
|
+
"#{i}-#{j}-0-#{Faker::Number.between(1, 9_999_999)}",
|
109
|
+
Faker::Vehicle.license_plate
|
110
|
+
]
|
111
|
+
|
112
|
+
house_rows.each do |house_row|
|
113
|
+
csv << person_row + car_row + house_row + plate_row
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
puts 'Benchmark file generated.'
|
120
|
+
end
|
121
|
+
|
122
|
+
puts 'Loading prerequisites'
|
123
|
+
configuration = yaml_read(config_file_name)
|
124
|
+
groups = ::Hashematics::Configuration.new(configuration).groups
|
125
|
+
graph = ::Hashematics::Graph.new(groups)
|
126
|
+
|
127
|
+
puts 'Loading data'
|
128
|
+
rows = csv_read(file_name)
|
129
|
+
|
130
|
+
puts "Executing benchmark for #{rows.length} rows"
|
131
|
+
Benchmark.bm do |x|
|
132
|
+
x.report('ADD') { graph.add(rows) }
|
133
|
+
x.report('DATA') { graph.data('people') }
|
134
|
+
end
|
data/bin/console
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'bundler/setup'
|
5
|
+
require 'hashematics'
|
6
|
+
|
7
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
8
|
+
# with your gem easier. You can also use a different console, if you like.
|
9
|
+
|
10
|
+
require 'pry'
|
11
|
+
Pry.start
|
data/hashematics.gemspec
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require './lib/hashematics/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = 'hashematics'
|
7
|
+
s.version = Hashematics::VERSION
|
8
|
+
s.summary = 'Configurable Data Shaper'
|
9
|
+
|
10
|
+
s.description = <<-DESCRIPTION
|
11
|
+
Hashematics is a configuration-based object graphing tool which can turn a flat, single dimensional dataset into a structure of deeply nested objects.
|
12
|
+
DESCRIPTION
|
13
|
+
|
14
|
+
s.authors = ['Matthew Ruggio']
|
15
|
+
s.email = ['mruggio@bluemarblepayroll.com']
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
|
19
|
+
s.homepage = 'https://github.com/bluemarblepayroll/hashematics'
|
20
|
+
s.license = 'MIT'
|
21
|
+
|
22
|
+
s.required_ruby_version = '>= 2.3.8'
|
23
|
+
|
24
|
+
s.add_development_dependency('faker', '~>1')
|
25
|
+
s.add_development_dependency('guard-rspec', '~>4.7')
|
26
|
+
s.add_development_dependency('pdf-inspector', '~>1')
|
27
|
+
s.add_development_dependency('pry', '~>0')
|
28
|
+
s.add_development_dependency('rspec', '~> 3.8')
|
29
|
+
s.add_development_dependency('rubocop', '~>0.63.1')
|
30
|
+
s.add_development_dependency('simplecov', '~>0.16.1')
|
31
|
+
s.add_development_dependency('simplecov-console', '~>0.4.2')
|
32
|
+
end
|
data/lib/hashematics.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# Copyright (c) 2019-present, Blue Marble Payroll, LLC
|
5
|
+
#
|
6
|
+
# This source code is licensed under the MIT license found in the
|
7
|
+
# LICENSE file in the root directory of this source tree.
|
8
|
+
#
|
9
|
+
|
10
|
+
require_relative 'hashematics/hashematics'
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# Copyright (c) 2019-present, Blue Marble Payroll, LLC
|
5
|
+
#
|
6
|
+
# This source code is licensed under the MIT license found in the
|
7
|
+
# LICENSE file in the root directory of this source tree.
|
8
|
+
#
|
9
|
+
|
10
|
+
module Hashematics
|
11
|
+
# A Category is an index of objects. It holds two things:
|
12
|
+
# 1. list of top-level objects
|
13
|
+
# 2. list of top-level objects cross-referenced by a parent.
|
14
|
+
class Category
|
15
|
+
attr_reader :id_key, :include_blank, :parent_key
|
16
|
+
|
17
|
+
def initialize(id_key:, include_blank: false, parent_key: nil)
|
18
|
+
raise ArgumentError, 'id_key is required' unless id_key
|
19
|
+
|
20
|
+
@default_parent_id = Id.default
|
21
|
+
@id_key = Key.get(id_key)
|
22
|
+
@include_blank = include_blank || false
|
23
|
+
@lookup = {}
|
24
|
+
@parent_key = Key.get(parent_key)
|
25
|
+
|
26
|
+
freeze
|
27
|
+
end
|
28
|
+
|
29
|
+
def records(parent_record = nil)
|
30
|
+
parent_id = parent_record&.id(parent_key) || default_parent_id
|
31
|
+
|
32
|
+
get(parent_id).values
|
33
|
+
end
|
34
|
+
|
35
|
+
def add(record)
|
36
|
+
return self if skip_record?(record)
|
37
|
+
|
38
|
+
set(
|
39
|
+
record.id(parent_key),
|
40
|
+
record.id(id_key),
|
41
|
+
record
|
42
|
+
)
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
attr_reader :default_parent_id, :lookup
|
48
|
+
|
49
|
+
def skip_record?(record)
|
50
|
+
!include_record?(record)
|
51
|
+
end
|
52
|
+
|
53
|
+
def include_record?(record)
|
54
|
+
include_blank || record.id?(id_key)
|
55
|
+
end
|
56
|
+
|
57
|
+
def get(parent_id)
|
58
|
+
lookup[parent_id] ||= {}
|
59
|
+
end
|
60
|
+
|
61
|
+
def set(parent_id, id, record)
|
62
|
+
get(parent_id)[id] = record
|
63
|
+
|
64
|
+
self
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# Copyright (c) 2019-present, Blue Marble Payroll, LLC
|
5
|
+
#
|
6
|
+
# This source code is licensed under the MIT license found in the
|
7
|
+
# LICENSE file in the root directory of this source tree.
|
8
|
+
#
|
9
|
+
|
10
|
+
module Hashematics
|
11
|
+
# This class understands how to take in a hash of options and construct an array of groups.
|
12
|
+
# See test fixtures for examples.
|
13
|
+
class Configuration
|
14
|
+
module Keys
|
15
|
+
BY = :by
|
16
|
+
GROUPS = :groups
|
17
|
+
INCLUDE_BLANK = :include_blank
|
18
|
+
OBJECT_CLASS = :object_class
|
19
|
+
PROPERTIES = :properties
|
20
|
+
TYPE = :type
|
21
|
+
TYPES = :types
|
22
|
+
end
|
23
|
+
include Keys
|
24
|
+
|
25
|
+
attr_reader :groups
|
26
|
+
|
27
|
+
def initialize(config = {})
|
28
|
+
types = build_types(config_value(config, TYPES))
|
29
|
+
@type_dictionary = Dictionary.new(Type.null_type).add(types, &:name)
|
30
|
+
|
31
|
+
@groups = build_groups(config_value(config, GROUPS))
|
32
|
+
|
33
|
+
freeze
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
attr_reader :type_dictionary
|
39
|
+
|
40
|
+
def build_types(type_config = {})
|
41
|
+
(type_config || {}).map do |name, options|
|
42
|
+
properties = config_value(options, PROPERTIES)
|
43
|
+
object_class = config_value(options, OBJECT_CLASS)
|
44
|
+
|
45
|
+
Type.new(name: name, properties: properties, object_class: object_class)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def build_groups(group_config = {}, parent_key_parts = [])
|
50
|
+
(group_config || {}).map do |name, options|
|
51
|
+
id_key_parts = make_id_key_parts(options)
|
52
|
+
|
53
|
+
category = Category.new(
|
54
|
+
id_key: id_key_parts,
|
55
|
+
include_blank: include_blank?(options),
|
56
|
+
parent_key: parent_key_parts
|
57
|
+
)
|
58
|
+
|
59
|
+
Group.new(
|
60
|
+
category: category,
|
61
|
+
children: make_children(options, parent_key_parts + id_key_parts),
|
62
|
+
name: name,
|
63
|
+
type: make_type(options)
|
64
|
+
)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def include_blank?(options)
|
69
|
+
options.is_a?(Hash) ? config_value(options, INCLUDE_BLANK) : false
|
70
|
+
end
|
71
|
+
|
72
|
+
def make_id_key_parts(options)
|
73
|
+
options.is_a?(Hash) ? Array(config_value(options, BY)) : Array(options)
|
74
|
+
end
|
75
|
+
|
76
|
+
def make_children(options, parent_key_parts)
|
77
|
+
options.is_a?(Hash) ? build_groups(config_value(options, GROUPS), parent_key_parts) : []
|
78
|
+
end
|
79
|
+
|
80
|
+
def make_type(options)
|
81
|
+
type_name = options.is_a?(Hash) ? config_value(options, TYPE) : nil
|
82
|
+
|
83
|
+
type_dictionary.get(type_name)
|
84
|
+
end
|
85
|
+
|
86
|
+
def config_value(config, key)
|
87
|
+
ObjectInterface.get(config, key)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# Copyright (c) 2019-present, Blue Marble Payroll, LLC
|
5
|
+
#
|
6
|
+
# This source code is licensed under the MIT license found in the
|
7
|
+
# LICENSE file in the root directory of this source tree.
|
8
|
+
#
|
9
|
+
|
10
|
+
module Hashematics
|
11
|
+
# A Dictionary is an array with a constant O(1) lookup time. It is basically a cross of a hash
|
12
|
+
# and an array. We could easily use a hashes everywhere, but explicitly coding our intentions
|
13
|
+
# of this common intra-library hash use is a nice way to communicate intentions while minimizing
|
14
|
+
# duplication.
|
15
|
+
class Dictionary
|
16
|
+
extend Forwardable
|
17
|
+
|
18
|
+
attr_reader :default_value
|
19
|
+
|
20
|
+
def_delegators :lookup, :keys
|
21
|
+
|
22
|
+
def_delegator :lookup, :values, :all
|
23
|
+
|
24
|
+
def initialize(default_value = nil)
|
25
|
+
@default_value = default_value
|
26
|
+
@lookup = {}
|
27
|
+
|
28
|
+
freeze
|
29
|
+
end
|
30
|
+
|
31
|
+
def add(enumerable)
|
32
|
+
raise ArgumentError, 'block must be given for key resolution' unless block_given?
|
33
|
+
|
34
|
+
enumerable.each do |entry|
|
35
|
+
key = yield entry
|
36
|
+
set(key, entry)
|
37
|
+
end
|
38
|
+
|
39
|
+
self
|
40
|
+
end
|
41
|
+
|
42
|
+
def set(key, object)
|
43
|
+
lookup[key.to_s] = object
|
44
|
+
|
45
|
+
self
|
46
|
+
end
|
47
|
+
|
48
|
+
def get(key)
|
49
|
+
exist?(key) ? lookup[key.to_s] : default_value
|
50
|
+
end
|
51
|
+
|
52
|
+
def exist?(key)
|
53
|
+
lookup.key?(key.to_s)
|
54
|
+
end
|
55
|
+
|
56
|
+
def each
|
57
|
+
return enum_for(:each) unless block_given?
|
58
|
+
|
59
|
+
all.each { |o| yield o }
|
60
|
+
end
|
61
|
+
|
62
|
+
def map(&block)
|
63
|
+
return enum_for(:map) unless block_given?
|
64
|
+
|
65
|
+
all.map(&block)
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
attr_reader :lookup
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# Copyright (c) 2019-present, Blue Marble Payroll, LLC
|
5
|
+
#
|
6
|
+
# This source code is licensed under the MIT license found in the
|
7
|
+
# LICENSE file in the root directory of this source tree.
|
8
|
+
#
|
9
|
+
|
10
|
+
module Hashematics
|
11
|
+
# Graph serves as the main point of entry for this system.
|
12
|
+
# Basic use:
|
13
|
+
# 1. Initialize a Graph by passing in an array of groups (tree structures)
|
14
|
+
# 2. Feed in objects into the graph using the #add method
|
15
|
+
# 3. Use the #groups, #records, and #objects methods to interact with the generated object graph.
|
16
|
+
class Graph
|
17
|
+
extend Forwardable
|
18
|
+
|
19
|
+
attr_reader :group_dictionary, :record_set
|
20
|
+
|
21
|
+
def_delegators :record_set, :rows
|
22
|
+
|
23
|
+
def initialize(groups = [])
|
24
|
+
@group_dictionary = Dictionary.new.add(groups, &:name)
|
25
|
+
@record_set = RecordSet.new
|
26
|
+
|
27
|
+
freeze
|
28
|
+
end
|
29
|
+
|
30
|
+
def add(enumerable)
|
31
|
+
enumerable.each { |object| add_one(object) }
|
32
|
+
|
33
|
+
self
|
34
|
+
end
|
35
|
+
|
36
|
+
def children
|
37
|
+
group_dictionary.map(&:name)
|
38
|
+
end
|
39
|
+
|
40
|
+
def visit(name)
|
41
|
+
group(name)&.visit || []
|
42
|
+
end
|
43
|
+
|
44
|
+
def data(name)
|
45
|
+
visit(name).map { |v| v.data(true) }
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def group(name)
|
51
|
+
group_dictionary.get(name)
|
52
|
+
end
|
53
|
+
|
54
|
+
def add_one(object)
|
55
|
+
record = record_set.add(object)
|
56
|
+
|
57
|
+
group_dictionary.each do |group|
|
58
|
+
group.add(record)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|