hashematics 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.editorconfig +8 -0
- data/.gitignore +4 -0
- data/.rubocop.yml +11 -0
- data/.ruby-version +1 -0
- data/.travis.yml +20 -0
- data/CHANGELOG.md +7 -0
- data/Gemfile +5 -0
- data/Gemfile.lock +123 -0
- data/Guardfile +16 -0
- data/LICENSE +7 -0
- data/README.md +660 -0
- data/bin/benchmark +134 -0
- data/bin/console +11 -0
- data/hashematics.gemspec +32 -0
- data/lib/hashematics.rb +10 -0
- data/lib/hashematics/category.rb +67 -0
- data/lib/hashematics/configuration.rb +90 -0
- data/lib/hashematics/dictionary.rb +72 -0
- data/lib/hashematics/graph.rb +62 -0
- data/lib/hashematics/group.rb +57 -0
- data/lib/hashematics/hashematics.rb +36 -0
- data/lib/hashematics/id.rb +29 -0
- data/lib/hashematics/key.rb +76 -0
- data/lib/hashematics/object_interface.rb +35 -0
- data/lib/hashematics/record.rb +60 -0
- data/lib/hashematics/record_set.rb +29 -0
- data/lib/hashematics/type.rb +72 -0
- data/lib/hashematics/version.rb +12 -0
- data/lib/hashematics/visitor.rb +50 -0
- data/spec/examples/person.rb +36 -0
- data/spec/fixtures/config.yml +44 -0
- data/spec/fixtures/data.csv +9 -0
- data/spec/fixtures/people.yml +84 -0
- data/spec/hashematics/category_spec.rb +62 -0
- data/spec/hashematics/graph_spec.rb +572 -0
- data/spec/hashematics/key_spec.rb +37 -0
- data/spec/hashematics/object_interface_spec.rb +42 -0
- data/spec/hashematics/record_set_spec.rb +24 -0
- data/spec/hashematics/record_spec.rb +49 -0
- data/spec/hashematics/type_spec.rb +104 -0
- data/spec/spec_helper.rb +42 -0
- metadata +211 -0
data/bin/benchmark
ADDED
@@ -0,0 +1,134 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'bundler/setup'
|
5
|
+
require 'csv'
|
6
|
+
require 'faker'
|
7
|
+
require 'hashematics'
|
8
|
+
require 'pry'
|
9
|
+
require 'benchmark'
|
10
|
+
|
11
|
+
def read(name)
|
12
|
+
File.open(name, 'r:bom|utf-8')
|
13
|
+
end
|
14
|
+
|
15
|
+
# rubocop:disable Security/YAMLLoad
|
16
|
+
def yaml_read(name)
|
17
|
+
YAML.load(read(name))
|
18
|
+
end
|
19
|
+
# rubocop:enable Security/YAMLLoad
|
20
|
+
|
21
|
+
def csv_read(name)
|
22
|
+
CSV.new(read(name), headers: true).map(&:to_h)
|
23
|
+
end
|
24
|
+
|
25
|
+
def make_people_rows(num)
|
26
|
+
(1..num).map do |i|
|
27
|
+
[
|
28
|
+
"#{i}-#{Faker::IDNumber.invalid}",
|
29
|
+
Faker::Name.first_name,
|
30
|
+
Faker::Name.last_name,
|
31
|
+
Faker::Boolean.boolean,
|
32
|
+
Faker::Date.birthday(16, 102)
|
33
|
+
]
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
def make_house_rows(num, id_prefix)
|
38
|
+
(1..num).map do |k|
|
39
|
+
[
|
40
|
+
"#{id_prefix}-#{k}-#{Faker::Number.between(1, 9_999_999)}",
|
41
|
+
Faker::Address.street_address,
|
42
|
+
Faker::Address.secondary_address,
|
43
|
+
Faker::Address.city,
|
44
|
+
Faker::Address.state,
|
45
|
+
Faker::Address.zip
|
46
|
+
]
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
number_of_people = (ARGV[0] || 1_000).to_i
|
51
|
+
number_of_cars = (ARGV[1] || 20).to_i
|
52
|
+
number_of_houses = (ARGV[2] || 20).to_i
|
53
|
+
always_generate = ARGV[3] == 't'
|
54
|
+
file_name = ARGV[4] || File.join('tmp', 'benchmark_example.csv')
|
55
|
+
config_file_name = ARGV[5] || File.join('spec', 'fixtures', 'config.yml')
|
56
|
+
|
57
|
+
puts 'Options'
|
58
|
+
puts '-------------------------------------'
|
59
|
+
puts "number_of_people: #{number_of_people}"
|
60
|
+
puts "number_of_cars: #{number_of_cars}"
|
61
|
+
puts "number_of_houses: #{number_of_houses}"
|
62
|
+
puts "always_generate: #{always_generate}"
|
63
|
+
puts "file_name: #{file_name}"
|
64
|
+
puts "config_file_name: #{config_file_name}"
|
65
|
+
puts '-------------------------------------'
|
66
|
+
|
67
|
+
if always_generate || !File.exist?(file_name)
|
68
|
+
total_rows = number_of_people * number_of_cars * number_of_houses
|
69
|
+
puts "Generating benchmark file: #{file_name} with #{total_rows} rows"
|
70
|
+
|
71
|
+
headers = [
|
72
|
+
'ID #',
|
73
|
+
'First',
|
74
|
+
'Last',
|
75
|
+
'Smoker',
|
76
|
+
'DOB',
|
77
|
+
'Car ID #',
|
78
|
+
'Make',
|
79
|
+
'Model',
|
80
|
+
'Year',
|
81
|
+
'House ID #',
|
82
|
+
'Street 1',
|
83
|
+
'Street 2',
|
84
|
+
'City',
|
85
|
+
'St',
|
86
|
+
'Zip',
|
87
|
+
'Plate ID #',
|
88
|
+
'Plate Number'
|
89
|
+
]
|
90
|
+
|
91
|
+
people_rows = make_people_rows(number_of_people)
|
92
|
+
|
93
|
+
CSV.open(file_name, 'w') do |csv|
|
94
|
+
csv << headers
|
95
|
+
|
96
|
+
people_rows.each_with_index do |person_row, i|
|
97
|
+
house_rows = make_house_rows(number_of_houses, i)
|
98
|
+
|
99
|
+
(1..number_of_cars).each do |j|
|
100
|
+
car_row = [
|
101
|
+
"#{i}-#{j}-#{Faker::Vehicle.vin}",
|
102
|
+
Faker::Vehicle.make,
|
103
|
+
Faker::Vehicle.model,
|
104
|
+
Faker::Vehicle.year
|
105
|
+
]
|
106
|
+
|
107
|
+
plate_row = [
|
108
|
+
"#{i}-#{j}-0-#{Faker::Number.between(1, 9_999_999)}",
|
109
|
+
Faker::Vehicle.license_plate
|
110
|
+
]
|
111
|
+
|
112
|
+
house_rows.each do |house_row|
|
113
|
+
csv << person_row + car_row + house_row + plate_row
|
114
|
+
end
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
118
|
+
|
119
|
+
puts 'Benchmark file generated.'
|
120
|
+
end
|
121
|
+
|
122
|
+
puts 'Loading prerequisites'
|
123
|
+
configuration = yaml_read(config_file_name)
|
124
|
+
groups = ::Hashematics::Configuration.new(configuration).groups
|
125
|
+
graph = ::Hashematics::Graph.new(groups)
|
126
|
+
|
127
|
+
puts 'Loading data'
|
128
|
+
rows = csv_read(file_name)
|
129
|
+
|
130
|
+
puts "Executing benchmark for #{rows.length} rows"
|
131
|
+
Benchmark.bm do |x|
|
132
|
+
x.report('ADD') { graph.add(rows) }
|
133
|
+
x.report('DATA') { graph.data('people') }
|
134
|
+
end
|
data/bin/console
ADDED
@@ -0,0 +1,11 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require 'bundler/setup'
|
5
|
+
require 'hashematics'
|
6
|
+
|
7
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
8
|
+
# with your gem easier. You can also use a different console, if you like.
|
9
|
+
|
10
|
+
require 'pry'
|
11
|
+
Pry.start
|
data/hashematics.gemspec
ADDED
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require './lib/hashematics/version'
|
4
|
+
|
5
|
+
Gem::Specification.new do |s|
|
6
|
+
s.name = 'hashematics'
|
7
|
+
s.version = Hashematics::VERSION
|
8
|
+
s.summary = 'Configurable Data Shaper'
|
9
|
+
|
10
|
+
s.description = <<-DESCRIPTION
|
11
|
+
Hashematics is a configuration-based object graphing tool which can turn a flat, single dimensional dataset into a structure of deeply nested objects.
|
12
|
+
DESCRIPTION
|
13
|
+
|
14
|
+
s.authors = ['Matthew Ruggio']
|
15
|
+
s.email = ['mruggio@bluemarblepayroll.com']
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
|
19
|
+
s.homepage = 'https://github.com/bluemarblepayroll/hashematics'
|
20
|
+
s.license = 'MIT'
|
21
|
+
|
22
|
+
s.required_ruby_version = '>= 2.3.8'
|
23
|
+
|
24
|
+
s.add_development_dependency('faker', '~>1')
|
25
|
+
s.add_development_dependency('guard-rspec', '~>4.7')
|
26
|
+
s.add_development_dependency('pdf-inspector', '~>1')
|
27
|
+
s.add_development_dependency('pry', '~>0')
|
28
|
+
s.add_development_dependency('rspec', '~> 3.8')
|
29
|
+
s.add_development_dependency('rubocop', '~>0.63.1')
|
30
|
+
s.add_development_dependency('simplecov', '~>0.16.1')
|
31
|
+
s.add_development_dependency('simplecov-console', '~>0.4.2')
|
32
|
+
end
|
data/lib/hashematics.rb
ADDED
@@ -0,0 +1,10 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# Copyright (c) 2019-present, Blue Marble Payroll, LLC
|
5
|
+
#
|
6
|
+
# This source code is licensed under the MIT license found in the
|
7
|
+
# LICENSE file in the root directory of this source tree.
|
8
|
+
#
|
9
|
+
|
10
|
+
require_relative 'hashematics/hashematics'
|
@@ -0,0 +1,67 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# Copyright (c) 2019-present, Blue Marble Payroll, LLC
|
5
|
+
#
|
6
|
+
# This source code is licensed under the MIT license found in the
|
7
|
+
# LICENSE file in the root directory of this source tree.
|
8
|
+
#
|
9
|
+
|
10
|
+
module Hashematics
|
11
|
+
# A Category is an index of objects. It holds two things:
|
12
|
+
# 1. list of top-level objects
|
13
|
+
# 2. list of top-level objects cross-referenced by a parent.
|
14
|
+
class Category
|
15
|
+
attr_reader :id_key, :include_blank, :parent_key
|
16
|
+
|
17
|
+
def initialize(id_key:, include_blank: false, parent_key: nil)
|
18
|
+
raise ArgumentError, 'id_key is required' unless id_key
|
19
|
+
|
20
|
+
@default_parent_id = Id.default
|
21
|
+
@id_key = Key.get(id_key)
|
22
|
+
@include_blank = include_blank || false
|
23
|
+
@lookup = {}
|
24
|
+
@parent_key = Key.get(parent_key)
|
25
|
+
|
26
|
+
freeze
|
27
|
+
end
|
28
|
+
|
29
|
+
def records(parent_record = nil)
|
30
|
+
parent_id = parent_record&.id(parent_key) || default_parent_id
|
31
|
+
|
32
|
+
get(parent_id).values
|
33
|
+
end
|
34
|
+
|
35
|
+
def add(record)
|
36
|
+
return self if skip_record?(record)
|
37
|
+
|
38
|
+
set(
|
39
|
+
record.id(parent_key),
|
40
|
+
record.id(id_key),
|
41
|
+
record
|
42
|
+
)
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
attr_reader :default_parent_id, :lookup
|
48
|
+
|
49
|
+
def skip_record?(record)
|
50
|
+
!include_record?(record)
|
51
|
+
end
|
52
|
+
|
53
|
+
def include_record?(record)
|
54
|
+
include_blank || record.id?(id_key)
|
55
|
+
end
|
56
|
+
|
57
|
+
def get(parent_id)
|
58
|
+
lookup[parent_id] ||= {}
|
59
|
+
end
|
60
|
+
|
61
|
+
def set(parent_id, id, record)
|
62
|
+
get(parent_id)[id] = record
|
63
|
+
|
64
|
+
self
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# Copyright (c) 2019-present, Blue Marble Payroll, LLC
|
5
|
+
#
|
6
|
+
# This source code is licensed under the MIT license found in the
|
7
|
+
# LICENSE file in the root directory of this source tree.
|
8
|
+
#
|
9
|
+
|
10
|
+
module Hashematics
|
11
|
+
# This class understands how to take in a hash of options and construct an array of groups.
|
12
|
+
# See test fixtures for examples.
|
13
|
+
class Configuration
|
14
|
+
module Keys
|
15
|
+
BY = :by
|
16
|
+
GROUPS = :groups
|
17
|
+
INCLUDE_BLANK = :include_blank
|
18
|
+
OBJECT_CLASS = :object_class
|
19
|
+
PROPERTIES = :properties
|
20
|
+
TYPE = :type
|
21
|
+
TYPES = :types
|
22
|
+
end
|
23
|
+
include Keys
|
24
|
+
|
25
|
+
attr_reader :groups
|
26
|
+
|
27
|
+
def initialize(config = {})
|
28
|
+
types = build_types(config_value(config, TYPES))
|
29
|
+
@type_dictionary = Dictionary.new(Type.null_type).add(types, &:name)
|
30
|
+
|
31
|
+
@groups = build_groups(config_value(config, GROUPS))
|
32
|
+
|
33
|
+
freeze
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
|
38
|
+
attr_reader :type_dictionary
|
39
|
+
|
40
|
+
def build_types(type_config = {})
|
41
|
+
(type_config || {}).map do |name, options|
|
42
|
+
properties = config_value(options, PROPERTIES)
|
43
|
+
object_class = config_value(options, OBJECT_CLASS)
|
44
|
+
|
45
|
+
Type.new(name: name, properties: properties, object_class: object_class)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def build_groups(group_config = {}, parent_key_parts = [])
|
50
|
+
(group_config || {}).map do |name, options|
|
51
|
+
id_key_parts = make_id_key_parts(options)
|
52
|
+
|
53
|
+
category = Category.new(
|
54
|
+
id_key: id_key_parts,
|
55
|
+
include_blank: include_blank?(options),
|
56
|
+
parent_key: parent_key_parts
|
57
|
+
)
|
58
|
+
|
59
|
+
Group.new(
|
60
|
+
category: category,
|
61
|
+
children: make_children(options, parent_key_parts + id_key_parts),
|
62
|
+
name: name,
|
63
|
+
type: make_type(options)
|
64
|
+
)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def include_blank?(options)
|
69
|
+
options.is_a?(Hash) ? config_value(options, INCLUDE_BLANK) : false
|
70
|
+
end
|
71
|
+
|
72
|
+
def make_id_key_parts(options)
|
73
|
+
options.is_a?(Hash) ? Array(config_value(options, BY)) : Array(options)
|
74
|
+
end
|
75
|
+
|
76
|
+
def make_children(options, parent_key_parts)
|
77
|
+
options.is_a?(Hash) ? build_groups(config_value(options, GROUPS), parent_key_parts) : []
|
78
|
+
end
|
79
|
+
|
80
|
+
def make_type(options)
|
81
|
+
type_name = options.is_a?(Hash) ? config_value(options, TYPE) : nil
|
82
|
+
|
83
|
+
type_dictionary.get(type_name)
|
84
|
+
end
|
85
|
+
|
86
|
+
def config_value(config, key)
|
87
|
+
ObjectInterface.get(config, key)
|
88
|
+
end
|
89
|
+
end
|
90
|
+
end
|
@@ -0,0 +1,72 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# Copyright (c) 2019-present, Blue Marble Payroll, LLC
|
5
|
+
#
|
6
|
+
# This source code is licensed under the MIT license found in the
|
7
|
+
# LICENSE file in the root directory of this source tree.
|
8
|
+
#
|
9
|
+
|
10
|
+
module Hashematics
|
11
|
+
# A Dictionary is an array with a constant O(1) lookup time. It is basically a cross of a hash
|
12
|
+
# and an array. We could easily use a hashes everywhere, but explicitly coding our intentions
|
13
|
+
# of this common intra-library hash use is a nice way to communicate intentions while minimizing
|
14
|
+
# duplication.
|
15
|
+
class Dictionary
|
16
|
+
extend Forwardable
|
17
|
+
|
18
|
+
attr_reader :default_value
|
19
|
+
|
20
|
+
def_delegators :lookup, :keys
|
21
|
+
|
22
|
+
def_delegator :lookup, :values, :all
|
23
|
+
|
24
|
+
def initialize(default_value = nil)
|
25
|
+
@default_value = default_value
|
26
|
+
@lookup = {}
|
27
|
+
|
28
|
+
freeze
|
29
|
+
end
|
30
|
+
|
31
|
+
def add(enumerable)
|
32
|
+
raise ArgumentError, 'block must be given for key resolution' unless block_given?
|
33
|
+
|
34
|
+
enumerable.each do |entry|
|
35
|
+
key = yield entry
|
36
|
+
set(key, entry)
|
37
|
+
end
|
38
|
+
|
39
|
+
self
|
40
|
+
end
|
41
|
+
|
42
|
+
def set(key, object)
|
43
|
+
lookup[key.to_s] = object
|
44
|
+
|
45
|
+
self
|
46
|
+
end
|
47
|
+
|
48
|
+
def get(key)
|
49
|
+
exist?(key) ? lookup[key.to_s] : default_value
|
50
|
+
end
|
51
|
+
|
52
|
+
def exist?(key)
|
53
|
+
lookup.key?(key.to_s)
|
54
|
+
end
|
55
|
+
|
56
|
+
def each
|
57
|
+
return enum_for(:each) unless block_given?
|
58
|
+
|
59
|
+
all.each { |o| yield o }
|
60
|
+
end
|
61
|
+
|
62
|
+
def map(&block)
|
63
|
+
return enum_for(:map) unless block_given?
|
64
|
+
|
65
|
+
all.map(&block)
|
66
|
+
end
|
67
|
+
|
68
|
+
private
|
69
|
+
|
70
|
+
attr_reader :lookup
|
71
|
+
end
|
72
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
#
|
4
|
+
# Copyright (c) 2019-present, Blue Marble Payroll, LLC
|
5
|
+
#
|
6
|
+
# This source code is licensed under the MIT license found in the
|
7
|
+
# LICENSE file in the root directory of this source tree.
|
8
|
+
#
|
9
|
+
|
10
|
+
module Hashematics
|
11
|
+
# Graph serves as the main point of entry for this system.
|
12
|
+
# Basic use:
|
13
|
+
# 1. Initialize a Graph by passing in an array of groups (tree structures)
|
14
|
+
# 2. Feed in objects into the graph using the #add method
|
15
|
+
# 3. Use the #groups, #records, and #objects methods to interact with the generated object graph.
|
16
|
+
class Graph
|
17
|
+
extend Forwardable
|
18
|
+
|
19
|
+
attr_reader :group_dictionary, :record_set
|
20
|
+
|
21
|
+
def_delegators :record_set, :rows
|
22
|
+
|
23
|
+
def initialize(groups = [])
|
24
|
+
@group_dictionary = Dictionary.new.add(groups, &:name)
|
25
|
+
@record_set = RecordSet.new
|
26
|
+
|
27
|
+
freeze
|
28
|
+
end
|
29
|
+
|
30
|
+
def add(enumerable)
|
31
|
+
enumerable.each { |object| add_one(object) }
|
32
|
+
|
33
|
+
self
|
34
|
+
end
|
35
|
+
|
36
|
+
def children
|
37
|
+
group_dictionary.map(&:name)
|
38
|
+
end
|
39
|
+
|
40
|
+
def visit(name)
|
41
|
+
group(name)&.visit || []
|
42
|
+
end
|
43
|
+
|
44
|
+
def data(name)
|
45
|
+
visit(name).map { |v| v.data(true) }
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
def group(name)
|
51
|
+
group_dictionary.get(name)
|
52
|
+
end
|
53
|
+
|
54
|
+
def add_one(object)
|
55
|
+
record = record_set.add(object)
|
56
|
+
|
57
|
+
group_dictionary.each do |group|
|
58
|
+
group.add(record)
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|