hashematics 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/bin/benchmark ADDED
@@ -0,0 +1,134 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'bundler/setup'
5
+ require 'csv'
6
+ require 'faker'
7
+ require 'hashematics'
8
+ require 'pry'
9
+ require 'benchmark'
10
+
11
+ def read(name)
12
+ File.open(name, 'r:bom|utf-8')
13
+ end
14
+
15
+ # rubocop:disable Security/YAMLLoad
16
+ def yaml_read(name)
17
+ YAML.load(read(name))
18
+ end
19
+ # rubocop:enable Security/YAMLLoad
20
+
21
+ def csv_read(name)
22
+ CSV.new(read(name), headers: true).map(&:to_h)
23
+ end
24
+
25
+ def make_people_rows(num)
26
+ (1..num).map do |i|
27
+ [
28
+ "#{i}-#{Faker::IDNumber.invalid}",
29
+ Faker::Name.first_name,
30
+ Faker::Name.last_name,
31
+ Faker::Boolean.boolean,
32
+ Faker::Date.birthday(16, 102)
33
+ ]
34
+ end
35
+ end
36
+
37
+ def make_house_rows(num, id_prefix)
38
+ (1..num).map do |k|
39
+ [
40
+ "#{id_prefix}-#{k}-#{Faker::Number.between(1, 9_999_999)}",
41
+ Faker::Address.street_address,
42
+ Faker::Address.secondary_address,
43
+ Faker::Address.city,
44
+ Faker::Address.state,
45
+ Faker::Address.zip
46
+ ]
47
+ end
48
+ end
49
+
50
+ number_of_people = (ARGV[0] || 1_000).to_i
51
+ number_of_cars = (ARGV[1] || 20).to_i
52
+ number_of_houses = (ARGV[2] || 20).to_i
53
+ always_generate = ARGV[3] == 't'
54
+ file_name = ARGV[4] || File.join('tmp', 'benchmark_example.csv')
55
+ config_file_name = ARGV[5] || File.join('spec', 'fixtures', 'config.yml')
56
+
57
+ puts 'Options'
58
+ puts '-------------------------------------'
59
+ puts "number_of_people: #{number_of_people}"
60
+ puts "number_of_cars: #{number_of_cars}"
61
+ puts "number_of_houses: #{number_of_houses}"
62
+ puts "always_generate: #{always_generate}"
63
+ puts "file_name: #{file_name}"
64
+ puts "config_file_name: #{config_file_name}"
65
+ puts '-------------------------------------'
66
+
67
+ if always_generate || !File.exist?(file_name)
68
+ total_rows = number_of_people * number_of_cars * number_of_houses
69
+ puts "Generating benchmark file: #{file_name} with #{total_rows} rows"
70
+
71
+ headers = [
72
+ 'ID #',
73
+ 'First',
74
+ 'Last',
75
+ 'Smoker',
76
+ 'DOB',
77
+ 'Car ID #',
78
+ 'Make',
79
+ 'Model',
80
+ 'Year',
81
+ 'House ID #',
82
+ 'Street 1',
83
+ 'Street 2',
84
+ 'City',
85
+ 'St',
86
+ 'Zip',
87
+ 'Plate ID #',
88
+ 'Plate Number'
89
+ ]
90
+
91
+ people_rows = make_people_rows(number_of_people)
92
+
93
+ CSV.open(file_name, 'w') do |csv|
94
+ csv << headers
95
+
96
+ people_rows.each_with_index do |person_row, i|
97
+ house_rows = make_house_rows(number_of_houses, i)
98
+
99
+ (1..number_of_cars).each do |j|
100
+ car_row = [
101
+ "#{i}-#{j}-#{Faker::Vehicle.vin}",
102
+ Faker::Vehicle.make,
103
+ Faker::Vehicle.model,
104
+ Faker::Vehicle.year
105
+ ]
106
+
107
+ plate_row = [
108
+ "#{i}-#{j}-0-#{Faker::Number.between(1, 9_999_999)}",
109
+ Faker::Vehicle.license_plate
110
+ ]
111
+
112
+ house_rows.each do |house_row|
113
+ csv << person_row + car_row + house_row + plate_row
114
+ end
115
+ end
116
+ end
117
+ end
118
+
119
+ puts 'Benchmark file generated.'
120
+ end
121
+
122
+ puts 'Loading prerequisites'
123
+ configuration = yaml_read(config_file_name)
124
+ groups = ::Hashematics::Configuration.new(configuration).groups
125
+ graph = ::Hashematics::Graph.new(groups)
126
+
127
+ puts 'Loading data'
128
+ rows = csv_read(file_name)
129
+
130
+ puts "Executing benchmark for #{rows.length} rows"
131
+ Benchmark.bm do |x|
132
+ x.report('ADD') { graph.add(rows) }
133
+ x.report('DATA') { graph.data('people') }
134
+ end
data/bin/console ADDED
@@ -0,0 +1,11 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ require 'bundler/setup'
5
+ require 'hashematics'
6
+
7
+ # You can add fixtures and/or initialization code here to make experimenting
8
+ # with your gem easier. You can also use a different console, if you like.
9
+
10
+ require 'pry'
11
+ Pry.start
@@ -0,0 +1,32 @@
1
+ # frozen_string_literal: true
2
+
3
+ require './lib/hashematics/version'
4
+
5
+ Gem::Specification.new do |s|
6
+ s.name = 'hashematics'
7
+ s.version = Hashematics::VERSION
8
+ s.summary = 'Configurable Data Shaper'
9
+
10
+ s.description = <<-DESCRIPTION
11
+ Hashematics is a configuration-based object graphing tool which can turn a flat, single dimensional dataset into a structure of deeply nested objects.
12
+ DESCRIPTION
13
+
14
+ s.authors = ['Matthew Ruggio']
15
+ s.email = ['mruggio@bluemarblepayroll.com']
16
+ s.files = `git ls-files`.split("\n")
17
+ s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
18
+ s.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
19
+ s.homepage = 'https://github.com/bluemarblepayroll/hashematics'
20
+ s.license = 'MIT'
21
+
22
+ s.required_ruby_version = '>= 2.3.8'
23
+
24
+ s.add_development_dependency('faker', '~>1')
25
+ s.add_development_dependency('guard-rspec', '~>4.7')
26
+ s.add_development_dependency('pdf-inspector', '~>1')
27
+ s.add_development_dependency('pry', '~>0')
28
+ s.add_development_dependency('rspec', '~> 3.8')
29
+ s.add_development_dependency('rubocop', '~>0.63.1')
30
+ s.add_development_dependency('simplecov', '~>0.16.1')
31
+ s.add_development_dependency('simplecov-console', '~>0.4.2')
32
+ end
@@ -0,0 +1,10 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2019-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ require_relative 'hashematics/hashematics'
@@ -0,0 +1,67 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2019-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ module Hashematics
11
+ # A Category is an index of objects. It holds two things:
12
+ # 1. list of top-level objects
13
+ # 2. list of top-level objects cross-referenced by a parent.
14
+ class Category
15
+ attr_reader :id_key, :include_blank, :parent_key
16
+
17
+ def initialize(id_key:, include_blank: false, parent_key: nil)
18
+ raise ArgumentError, 'id_key is required' unless id_key
19
+
20
+ @default_parent_id = Id.default
21
+ @id_key = Key.get(id_key)
22
+ @include_blank = include_blank || false
23
+ @lookup = {}
24
+ @parent_key = Key.get(parent_key)
25
+
26
+ freeze
27
+ end
28
+
29
+ def records(parent_record = nil)
30
+ parent_id = parent_record&.id(parent_key) || default_parent_id
31
+
32
+ get(parent_id).values
33
+ end
34
+
35
+ def add(record)
36
+ return self if skip_record?(record)
37
+
38
+ set(
39
+ record.id(parent_key),
40
+ record.id(id_key),
41
+ record
42
+ )
43
+ end
44
+
45
+ private
46
+
47
+ attr_reader :default_parent_id, :lookup
48
+
49
+ def skip_record?(record)
50
+ !include_record?(record)
51
+ end
52
+
53
+ def include_record?(record)
54
+ include_blank || record.id?(id_key)
55
+ end
56
+
57
+ def get(parent_id)
58
+ lookup[parent_id] ||= {}
59
+ end
60
+
61
+ def set(parent_id, id, record)
62
+ get(parent_id)[id] = record
63
+
64
+ self
65
+ end
66
+ end
67
+ end
@@ -0,0 +1,90 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2019-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ module Hashematics
11
+ # This class understands how to take in a hash of options and construct an array of groups.
12
+ # See test fixtures for examples.
13
+ class Configuration
14
+ module Keys
15
+ BY = :by
16
+ GROUPS = :groups
17
+ INCLUDE_BLANK = :include_blank
18
+ OBJECT_CLASS = :object_class
19
+ PROPERTIES = :properties
20
+ TYPE = :type
21
+ TYPES = :types
22
+ end
23
+ include Keys
24
+
25
+ attr_reader :groups
26
+
27
+ def initialize(config = {})
28
+ types = build_types(config_value(config, TYPES))
29
+ @type_dictionary = Dictionary.new(Type.null_type).add(types, &:name)
30
+
31
+ @groups = build_groups(config_value(config, GROUPS))
32
+
33
+ freeze
34
+ end
35
+
36
+ private
37
+
38
+ attr_reader :type_dictionary
39
+
40
+ def build_types(type_config = {})
41
+ (type_config || {}).map do |name, options|
42
+ properties = config_value(options, PROPERTIES)
43
+ object_class = config_value(options, OBJECT_CLASS)
44
+
45
+ Type.new(name: name, properties: properties, object_class: object_class)
46
+ end
47
+ end
48
+
49
+ def build_groups(group_config = {}, parent_key_parts = [])
50
+ (group_config || {}).map do |name, options|
51
+ id_key_parts = make_id_key_parts(options)
52
+
53
+ category = Category.new(
54
+ id_key: id_key_parts,
55
+ include_blank: include_blank?(options),
56
+ parent_key: parent_key_parts
57
+ )
58
+
59
+ Group.new(
60
+ category: category,
61
+ children: make_children(options, parent_key_parts + id_key_parts),
62
+ name: name,
63
+ type: make_type(options)
64
+ )
65
+ end
66
+ end
67
+
68
+ def include_blank?(options)
69
+ options.is_a?(Hash) ? config_value(options, INCLUDE_BLANK) : false
70
+ end
71
+
72
+ def make_id_key_parts(options)
73
+ options.is_a?(Hash) ? Array(config_value(options, BY)) : Array(options)
74
+ end
75
+
76
+ def make_children(options, parent_key_parts)
77
+ options.is_a?(Hash) ? build_groups(config_value(options, GROUPS), parent_key_parts) : []
78
+ end
79
+
80
+ def make_type(options)
81
+ type_name = options.is_a?(Hash) ? config_value(options, TYPE) : nil
82
+
83
+ type_dictionary.get(type_name)
84
+ end
85
+
86
+ def config_value(config, key)
87
+ ObjectInterface.get(config, key)
88
+ end
89
+ end
90
+ end
@@ -0,0 +1,72 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2019-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ module Hashematics
11
+ # A Dictionary is an array with a constant O(1) lookup time. It is basically a cross of a hash
12
+ # and an array. We could easily use a hashes everywhere, but explicitly coding our intentions
13
+ # of this common intra-library hash use is a nice way to communicate intentions while minimizing
14
+ # duplication.
15
+ class Dictionary
16
+ extend Forwardable
17
+
18
+ attr_reader :default_value
19
+
20
+ def_delegators :lookup, :keys
21
+
22
+ def_delegator :lookup, :values, :all
23
+
24
+ def initialize(default_value = nil)
25
+ @default_value = default_value
26
+ @lookup = {}
27
+
28
+ freeze
29
+ end
30
+
31
+ def add(enumerable)
32
+ raise ArgumentError, 'block must be given for key resolution' unless block_given?
33
+
34
+ enumerable.each do |entry|
35
+ key = yield entry
36
+ set(key, entry)
37
+ end
38
+
39
+ self
40
+ end
41
+
42
+ def set(key, object)
43
+ lookup[key.to_s] = object
44
+
45
+ self
46
+ end
47
+
48
+ def get(key)
49
+ exist?(key) ? lookup[key.to_s] : default_value
50
+ end
51
+
52
+ def exist?(key)
53
+ lookup.key?(key.to_s)
54
+ end
55
+
56
+ def each
57
+ return enum_for(:each) unless block_given?
58
+
59
+ all.each { |o| yield o }
60
+ end
61
+
62
+ def map(&block)
63
+ return enum_for(:map) unless block_given?
64
+
65
+ all.map(&block)
66
+ end
67
+
68
+ private
69
+
70
+ attr_reader :lookup
71
+ end
72
+ end
@@ -0,0 +1,62 @@
1
+ # frozen_string_literal: true
2
+
3
+ #
4
+ # Copyright (c) 2019-present, Blue Marble Payroll, LLC
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ module Hashematics
11
+ # Graph serves as the main point of entry for this system.
12
+ # Basic use:
13
+ # 1. Initialize a Graph by passing in an array of groups (tree structures)
14
+ # 2. Feed in objects into the graph using the #add method
15
+ # 3. Use the #groups, #records, and #objects methods to interact with the generated object graph.
16
+ class Graph
17
+ extend Forwardable
18
+
19
+ attr_reader :group_dictionary, :record_set
20
+
21
+ def_delegators :record_set, :rows
22
+
23
+ def initialize(groups = [])
24
+ @group_dictionary = Dictionary.new.add(groups, &:name)
25
+ @record_set = RecordSet.new
26
+
27
+ freeze
28
+ end
29
+
30
+ def add(enumerable)
31
+ enumerable.each { |object| add_one(object) }
32
+
33
+ self
34
+ end
35
+
36
+ def children
37
+ group_dictionary.map(&:name)
38
+ end
39
+
40
+ def visit(name)
41
+ group(name)&.visit || []
42
+ end
43
+
44
+ def data(name)
45
+ visit(name).map { |v| v.data(true) }
46
+ end
47
+
48
+ private
49
+
50
+ def group(name)
51
+ group_dictionary.get(name)
52
+ end
53
+
54
+ def add_one(object)
55
+ record = record_set.add(object)
56
+
57
+ group_dictionary.each do |group|
58
+ group.add(record)
59
+ end
60
+ end
61
+ end
62
+ end