hashematics 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/benchmark ADDED
@@ -0,0 +1,134 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'bundler/setup'
5
+ require 'csv'
6
+ require 'faker'
7
+ require 'hashematics'
8
+ require 'pry'
9
+ require 'benchmark'
10
+
11
+ def read(name)
12
+ File.open(name, 'r:bom|utf-8')
13
+ end
14
+
15
+ # rubocop:disable Security/YAMLLoad
16
+ def yaml_read(name)
17
+ YAML.load(read(name))
18
+ end
19
+ # rubocop:enable Security/YAMLLoad
20
+
21
+ def csv_read(name)
22
+ CSV.new(read(name), headers: true).map(&:to_h)
23
+ end
24
+
25
+ def make_people_rows(num)
26
+ (1..num).map do |i|
27
+ [
28
+ "#{i}-#{Faker::IDNumber.invalid}",
29
+ Faker::Name.first_name,
30
+ Faker::Name.last_name,
31
+ Faker::Boolean.boolean,
32
+ Faker::Date.birthday(16, 102)
33
+ ]
34
+ end
35
+ end
36
+
37
+ def make_house_rows(num, id_prefix)
38
+ (1..num).map do |k|
39
+ [
40
+ "#{id_prefix}-#{k}-#{Faker::Number.between(1, 9_999_999)}",
41
+ Faker::Address.street_address,
42
+ Faker::Address.secondary_address,
43
+ Faker::Address.city,
44
+ Faker::Address.state,
45
+ Faker::Address.zip
46
+ ]
47
+ end
48
+ end
49
+
50
+ number_of_people = (ARGV[0] || 1_000).to_i
51
+ number_of_cars = (ARGV[1] || 20).to_i
52
+ number_of_houses = (ARGV[2] || 20).to_i
53
+ always_generate = ARGV[3] == 't'
54
+ file_name = ARGV[4] || File.join('tmp', 'benchmark_example.csv')
55
+ config_file_name = ARGV[5] || File.join('spec', 'fixtures', 'config.yml')
56
+
57
+ puts 'Options'
58
+ puts '-------------------------------------'
59
+ puts "number_of_people: #{number_of_people}"
60
+ puts "number_of_cars: #{number_of_cars}"
61
+ puts "number_of_houses: #{number_of_houses}"
62
+ puts "always_generate: #{always_generate}"
63
+ puts "file_name: #{file_name}"
64
+ puts "config_file_name: #{config_file_name}"
65
+ puts '-------------------------------------'
66
+
67
+ if always_generate || !File.exist?(file_name)
68
+ total_rows = number_of_people * number_of_cars * number_of_houses
69
+ puts "Generating benchmark file: #{file_name} with #{total_rows} rows"
70
+
71
+ headers = [
72
+ 'ID #',
73
+ 'First',
74
+ 'Last',
75
+ 'Smoker',
76
+ 'DOB',
77
+ 'Car ID #',
78
+ 'Make',
79
+ 'Model',
80
+ 'Year',
81
+ 'House ID #',
82
+ 'Street 1',
83
+ 'Street 2',
84
+ 'City',
85
+ 'St',
86
+ 'Zip',
87
+ 'Plate ID #',
88
+ 'Plate Number'
89
+ ]
90
+
91
+ people_rows = make_people_rows(number_of_people)
92
+
93
+ CSV.open(file_name, 'w') do |csv|
94
+ csv << headers
95
+
96
+ people_rows.each_with_index do |person_row, i|
97
+ house_rows = make_house_rows(number_of_houses, i)
98
+
99
+ (1..number_of_cars).each do |j|
100
+ car_row = [
101
+ "#{i}-#{j}-#{Faker::Vehicle.vin}",
102
+ Faker::Vehicle.make,
103
+ Faker::Vehicle.model,
104
+ Faker::Vehicle.year
105
+ ]
106
+
107
+ plate_row = [
108
+ "#{i}-#{j}-0-#{Faker::Number.between(1, 9_999_999)}",
109
+ Faker::Vehicle.license_plate
110
+ ]
111
+
112
+ house_rows.each do |house_row|
113
+ csv << person_row + car_row + house_row + plate_row
114
+ end
115
+ end
116
+ end
117
+ end
118
+
119
+ puts 'Benchmark file generated.'
120
+ end
121
+
122
+ puts 'Loading prerequisites'
123
+ configuration = yaml_read(config_file_name)
124
+ groups = ::Hashematics::Configuration.new(configuration).groups
125
+ graph = ::Hashematics::Graph.new(groups)
126
+
127
+ puts 'Loading data'
128
+ rows = csv_read(file_name)
129
+
130
+ puts "Executing benchmark for #{rows.length} rows"
131
+ Benchmark.bm do |x|
132
+ x.report('ADD') { graph.add(rows) }
133
+ x.report('DATA') { graph.data('people') }
134
+ end
data/bin/console ADDED
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'bundler/setup'
5
+ require 'hashematics'
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ require 'pry'
11
+ Pry.start
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require './lib/hashematics/version'
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = 'hashematics'
7
+ s.version = Hashematics::VERSION
8
+ s.summary = 'Configurable Data Shaper'
9
+
10
+ s.description = <<-DESCRIPTION
11
+ Hashematics is a configuration-based object graphing tool which can turn a flat, single dimensional dataset into a structure of deeply nested objects.
12
+ DESCRIPTION
13
+
14
+ s.authors = ['Matthew Ruggio']
15
+ s.email = ['mruggio@bluemarblepayroll.com']
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
19
+ s.homepage = 'https://github.com/bluemarblepayroll/hashematics'
20
+ s.license = 'MIT'
21
+
22
+ s.required_ruby_version = '>= 2.3.8'
23
+
24
+ s.add_development_dependency('faker', '~>1')
25
+ s.add_development_dependency('guard-rspec', '~>4.7')
26
+ s.add_development_dependency('pdf-inspector', '~>1')
27
+ s.add_development_dependency('pry', '~>0')
28
+ s.add_development_dependency('rspec', '~> 3.8')
29
+ s.add_development_dependency('rubocop', '~>0.63.1')
30
+ s.add_development_dependency('simplecov', '~>0.16.1')
31
+ s.add_development_dependency('simplecov-console', '~>0.4.2')
32
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2019-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ require_relative 'hashematics/hashematics'
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2019-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ module Hashematics
11
+ # A Category is an index of objects. It holds two things:
12
+ # 1. list of top-level objects
13
+ # 2. list of top-level objects cross-referenced by a parent.
14
+ class Category
15
+ attr_reader :id_key, :include_blank, :parent_key
16
+
17
+ def initialize(id_key:, include_blank: false, parent_key: nil)
18
+ raise ArgumentError, 'id_key is required' unless id_key
19
+
20
+ @default_parent_id = Id.default
21
+ @id_key = Key.get(id_key)
22
+ @include_blank = include_blank || false
23
+ @lookup = {}
24
+ @parent_key = Key.get(parent_key)
25
+
26
+ freeze
27
+ end
28
+
29
+ def records(parent_record = nil)
30
+ parent_id = parent_record&.id(parent_key) || default_parent_id
31
+
32
+ get(parent_id).values
33
+ end
34
+
35
+ def add(record)
36
+ return self if skip_record?(record)
37
+
38
+ set(
39
+ record.id(parent_key),
40
+ record.id(id_key),
41
+ record
42
+ )
43
+ end
44
+
45
+ private
46
+
47
+ attr_reader :default_parent_id, :lookup
48
+
49
+ def skip_record?(record)
50
+ !include_record?(record)
51
+ end
52
+
53
+ def include_record?(record)
54
+ include_blank || record.id?(id_key)
55
+ end
56
+
57
+ def get(parent_id)
58
+ lookup[parent_id] ||= {}
59
+ end
60
+
61
+ def set(parent_id, id, record)
62
+ get(parent_id)[id] = record
63
+
64
+ self
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2019-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ module Hashematics
11
+ # This class understands how to take in a hash of options and construct an array of groups.
12
+ # See test fixtures for examples.
13
+ class Configuration
14
+ module Keys
15
+ BY = :by
16
+ GROUPS = :groups
17
+ INCLUDE_BLANK = :include_blank
18
+ OBJECT_CLASS = :object_class
19
+ PROPERTIES = :properties
20
+ TYPE = :type
21
+ TYPES = :types
22
+ end
23
+ include Keys
24
+
25
+ attr_reader :groups
26
+
27
+ def initialize(config = {})
28
+ types = build_types(config_value(config, TYPES))
29
+ @type_dictionary = Dictionary.new(Type.null_type).add(types, &:name)
30
+
31
+ @groups = build_groups(config_value(config, GROUPS))
32
+
33
+ freeze
34
+ end
35
+
36
+ private
37
+
38
+ attr_reader :type_dictionary
39
+
40
+ def build_types(type_config = {})
41
+ (type_config || {}).map do |name, options|
42
+ properties = config_value(options, PROPERTIES)
43
+ object_class = config_value(options, OBJECT_CLASS)
44
+
45
+ Type.new(name: name, properties: properties, object_class: object_class)
46
+ end
47
+ end
48
+
49
+ def build_groups(group_config = {}, parent_key_parts = [])
50
+ (group_config || {}).map do |name, options|
51
+ id_key_parts = make_id_key_parts(options)
52
+
53
+ category = Category.new(
54
+ id_key: id_key_parts,
55
+ include_blank: include_blank?(options),
56
+ parent_key: parent_key_parts
57
+ )
58
+
59
+ Group.new(
60
+ category: category,
61
+ children: make_children(options, parent_key_parts + id_key_parts),
62
+ name: name,
63
+ type: make_type(options)
64
+ )
65
+ end
66
+ end
67
+
68
+ def include_blank?(options)
69
+ options.is_a?(Hash) ? config_value(options, INCLUDE_BLANK) : false
70
+ end
71
+
72
+ def make_id_key_parts(options)
73
+ options.is_a?(Hash) ? Array(config_value(options, BY)) : Array(options)
74
+ end
75
+
76
+ def make_children(options, parent_key_parts)
77
+ options.is_a?(Hash) ? build_groups(config_value(options, GROUPS), parent_key_parts) : []
78
+ end
79
+
80
+ def make_type(options)
81
+ type_name = options.is_a?(Hash) ? config_value(options, TYPE) : nil
82
+
83
+ type_dictionary.get(type_name)
84
+ end
85
+
86
+ def config_value(config, key)
87
+ ObjectInterface.get(config, key)
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2019-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ module Hashematics
11
+ # A Dictionary is an array with a constant O(1) lookup time. It is basically a cross of a hash
12
+ # and an array. We could easily use a hashes everywhere, but explicitly coding our intentions
13
+ # of this common intra-library hash use is a nice way to communicate intentions while minimizing
14
+ # duplication.
15
+ class Dictionary
16
+ extend Forwardable
17
+
18
+ attr_reader :default_value
19
+
20
+ def_delegators :lookup, :keys
21
+
22
+ def_delegator :lookup, :values, :all
23
+
24
+ def initialize(default_value = nil)
25
+ @default_value = default_value
26
+ @lookup = {}
27
+
28
+ freeze
29
+ end
30
+
31
+ def add(enumerable)
32
+ raise ArgumentError, 'block must be given for key resolution' unless block_given?
33
+
34
+ enumerable.each do |entry|
35
+ key = yield entry
36
+ set(key, entry)
37
+ end
38
+
39
+ self
40
+ end
41
+
42
+ def set(key, object)
43
+ lookup[key.to_s] = object
44
+
45
+ self
46
+ end
47
+
48
+ def get(key)
49
+ exist?(key) ? lookup[key.to_s] : default_value
50
+ end
51
+
52
+ def exist?(key)
53
+ lookup.key?(key.to_s)
54
+ end
55
+
56
+ def each
57
+ return enum_for(:each) unless block_given?
58
+
59
+ all.each { |o| yield o }
60
+ end
61
+
62
+ def map(&block)
63
+ return enum_for(:map) unless block_given?
64
+
65
+ all.map(&block)
66
+ end
67
+
68
+ private
69
+
70
+ attr_reader :lookup
71
+ end
72
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2019-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ module Hashematics
11
+ # Graph serves as the main point of entry for this system.
12
+ # Basic use:
13
+ # 1. Initialize a Graph by passing in an array of groups (tree structures)
14
+ # 2. Feed in objects into the graph using the #add method
15
+ # 3. Use the #groups, #records, and #objects methods to interact with the generated object graph.
16
+ class Graph
17
+ extend Forwardable
18
+
19
+ attr_reader :group_dictionary, :record_set
20
+
21
+ def_delegators :record_set, :rows
22
+
23
+ def initialize(groups = [])
24
+ @group_dictionary = Dictionary.new.add(groups, &:name)
25
+ @record_set = RecordSet.new
26
+
27
+ freeze
28
+ end
29
+
30
+ def add(enumerable)
31
+ enumerable.each { |object| add_one(object) }
32
+
33
+ self
34
+ end
35
+
36
+ def children
37
+ group_dictionary.map(&:name)
38
+ end
39
+
40
+ def visit(name)
41
+ group(name)&.visit || []
42
+ end
43
+
44
+ def data(name)
45
+ visit(name).map { |v| v.data(true) }
46
+ end
47
+
48
+ private
49
+
50
+ def group(name)
51
+ group_dictionary.get(name)
52
+ end
53
+
54
+ def add_one(object)
55
+ record = record_set.add(object)
56
+
57
+ group_dictionary.each do |group|
58
+ group.add(record)
59
+ end
60
+ end
61
+ end
62
+ end