petasos 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 5b1ffed36c52ec275b881ac07ff08b6dda3d4446ba8c2ab0b74266698044b7e6
4
+ data.tar.gz: 3b42c94a62222b7f133c4fe0ed5caea15d1623bc49f1c953b07c9c7bf1fb64fe
5
+ SHA512:
6
+ metadata.gz: 93023bc9d2cad8a5be3bf800683264b7ef20247b3824c2191a6e75dc4c0c0a311b2c595dec10d1377b8e1e3152f1d141f7d5d9a39ea1d02b7c535014dada2adb
7
+ data.tar.gz: 868e3587fc99dfd6a1cdc4b74d1ac5045d948f20b149bd7bb0294c72dfb2aa8d1afef0d6cd3a03ee1a11c02c4903b2fda2df818e076e336476126f34e7fdb1f6
data/bin/petasos ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'petasos'
4
+
5
+ Petasos.new.run
@@ -0,0 +1,141 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rake"
4
+ require "yaml"
5
+ require "petasos/node"
6
+
7
+ class Petasos::Distributor
8
+ attr_reader :config
9
+
10
+ def initialize(config)
11
+ @config = config
12
+ end
13
+
14
+ def run
15
+ @nodes = {}
16
+ @config.each do |node|
17
+ @nodes[node["name"]] = Petasos::Node.new(node)
18
+ end
19
+
20
+ @manifests = {}
21
+ @nodes.each_pair do |node_name, node|
22
+ @manifests[node_name] = node.manifests
23
+ end
24
+
25
+ # {"petasos-node-a"=>
26
+ # [{"name"=>"linux-laptop-source",
27
+ # "imports"=>{},
28
+ # "exports"=>
29
+ # {"wow-ah"=>
30
+ # {"path"=>"/home/justin/play/petasos/test/sandbox/node_a/location_a",
31
+ # "canonical"=>true}}}],
32
+ # "petasos-node-b"=>
33
+ # [{"name"=>"linux-laptop-storage",
34
+ # "imports"=>
35
+ # {"wow-ah"=>
36
+ # {"import_path"=>
37
+ # "/home/justin/play/petasos/test/sandbox/node_b/location_a/data",
38
+ # "backfill"=>true}},
39
+ # "exports"=>{}}]}
40
+
41
+ @pools = Hash.new { |h, k|
42
+ h[k] = {
43
+ "import_paths" => [],
44
+ "backfill_import_paths" => [],
45
+ "canonical_exporters" => [],
46
+ }
47
+ }
48
+
49
+ @manifests.each_pair do |node_name, manifest_list|
50
+ manifest_list.each do |manifest|
51
+ manifest["imports"].each_pair do |pool_name, import_hash|
52
+ @pools[pool_name]["import_paths"] << [node_name, import_hash["import_path"]]
53
+ @pools[pool_name]["backfill_import_paths"] << [node_name, manifest["name"], import_hash["import_path"]] if import_hash["backfill"]
54
+ end
55
+ manifest["exports"].each_pair do |pool_name, export_hash|
56
+ @pools[pool_name]["canonical_exporters"] << [node_name, manifest["name"]] if export_hash["canonical"]
57
+ end
58
+ end
59
+ end
60
+
61
+ # {"wow-ah"=>
62
+ # {"import_paths"=>
63
+ # [["petasos-node-b",
64
+ # "/home/justin/play/petasos/test/sandbox/node_b/location_a/data"]],
65
+ # "backfill_import_paths"=>
66
+ # [["petasos-node-b",
67
+ # "linux-laptop-storage",
68
+ # "/home/justin/play/petasos/test/sandbox/node_b/location_a/data"]],
69
+ # "canonical_exporters"=>[["petasos-node-a", "linux-laptop-source"]]}}
70
+
71
+ # Process the exports files and return them as completed.
72
+ FileList.new(File.join(Dir.pwd, "**/exports_*")).each do |exports_file_path|
73
+ from_node_name = File.basename(File.dirname(exports_file_path))
74
+ from_node = find_node(from_node_name)
75
+ export_filename = File.basename(exports_file_path, ".*")
76
+ label, location_name, pool_name, datetime = export_filename.split("_")
77
+ export_paths = YAML.load_file(exports_file_path)
78
+ export_paths.each do |export_path|
79
+ @pools[pool_name]["import_paths"].each do |pool_storage|
80
+ to_node = find_node(pool_storage.first)
81
+ `scp #{from_node.host}:#{export_path}* #{to_node.host}:#{pool_storage.last}`
82
+ end
83
+ end
84
+ # mark it as completed
85
+ completed_export_file_path = File.join(Dir.pwd, "completed-#{File.basename(exports_file_path)}")
86
+ `mv #{exports_file_path} #{completed_export_file_path}`
87
+ # and then put it back where it came from
88
+ `scp #{completed_export_file_path} #{from_node.host}:#{from_node.path}`
89
+ `rm #{completed_export_file_path}`
90
+ end
91
+
92
+ # {"wow-ah"=>
93
+ # {"import_paths"=>
94
+ # [["petasos-node-b",
95
+ # "/home/justin/play/petasos/test/sandbox/node_b/location_a/data"]],
96
+ # "backfill_import_paths"=>
97
+ # [["petasos-node-b",
98
+ # "linux-laptop-storage",
99
+ # "/home/justin/play/petasos/test/sandbox/node_b/location_a/data"]],
100
+ # "canonical_exporters"=>[["petasos-node-a", "linux-laptop-source"]]}}
101
+
102
+ # Process the backfills.
103
+ # grab the seen files on the canonical exporters
104
+ @pools.each_pair do |pool_name, manifest_hash|
105
+ manifest_hash["canonical_exporters"].each do |canonical_exporter_details|
106
+ find_node(canonical_exporter_details.first).grab_seen_file_for_location(canonical_exporter_details.last, pool_name)
107
+ end
108
+
109
+ # grab the seen files on the backfill importers
110
+ manifest_hash["backfill_import_paths"].each do |backfill_importer_details|
111
+ find_node(backfill_importer_details.first).grab_seen_file_for_location(backfill_importer_details[1], pool_name)
112
+ end
113
+
114
+ # for each canonical exporter loop through the backfill lists, identify files that need moving and move them
115
+ manifest_hash["canonical_exporters"].each do |canonical_exporter_details|
116
+ exporter_seen_files = {}
117
+ exporter_file_list = YAML.load_file("seen_#{canonical_exporter_details.last}_#{pool_name}.yaml")
118
+ exporter_file_list.each { |f| exporter_seen_files[File.basename(f)] = f }
119
+ manifest_hash["backfill_import_paths"].each do |backfill_importer_details|
120
+ backfill_importer_files = {}
121
+ backfill_file_list = YAML.load_file("seen_#{backfill_importer_details[1]}_#{pool_name}.yaml")
122
+ backfill_file_list.each { |f| backfill_importer_files[File.basename(f)] = f }
123
+
124
+ exporter_seen_files.each_pair do |file_name, file_path|
125
+ unless backfill_importer_files[file_name]
126
+ from_node = find_node(canonical_exporter_details.first)
127
+ to_node = find_node(backfill_importer_details.first)
128
+ `scp #{from_node.host}:#{file_path} #{to_node.host}:#{backfill_importer_details.last}`
129
+ end
130
+ end
131
+ end
132
+ end
133
+ # clear the seen files locally.
134
+ `rm seen_*`
135
+ end
136
+ end
137
+
138
+ def find_node(node_name)
139
+ @nodes[node_name]
140
+ end
141
+ end
@@ -0,0 +1,117 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rake"
4
+ require "yaml"
5
+
6
+ class Petasos::Location
7
+ attr_reader :config
8
+
9
+ def initialize(config)
10
+ @config = config
11
+ initialize_all_seen_pool_files
12
+ update_manifest_file
13
+ end
14
+
15
+ def run
16
+ # delete exports file if completed file exists
17
+ FileList.new(File.join(Dir.pwd, "exports_#{@config["name"]}*.yaml")).each do |export_file_path|
18
+ completed_export_file_path = "completed-" + export_file_path
19
+ `rm #{export_file_path}` if File.file?(completed_export_file_path)
20
+ end
21
+
22
+ pools.each do |pool|
23
+ # get all filenames in this location that belong to this pool
24
+ current_files = current_pool_files(pool)
25
+
26
+ # get all filenames from the list of seen files
27
+ seen_pool_files = read_seen_pool_files(pool)
28
+
29
+ # identify which are new
30
+ new_files = current_files - seen_pool_files
31
+
32
+ # put a list of the new files where the cluster manager can find it
33
+ # if we are an exporter
34
+ if pool["export"]
35
+ create_file_export_list(pool, new_files.to_a) if new_files.length > 0
36
+ end
37
+
38
+ # this is where the "after_seen" hooks would run
39
+
40
+ # update list of seen files
41
+ update_seen_pool_files(pool, seen_pool_files + new_files)
42
+ end
43
+ end
44
+
45
+ def pools
46
+ config["pools"]
47
+ end
48
+
49
+ def update_manifest_file
50
+ # a list of pools and their import paths from locations
51
+ pool_imports = Hash.new { |h, k| h[k] = {} }
52
+ pool_exports = Hash.new { |h, k| h[k] = {} }
53
+ pools.each do |pool|
54
+ pool_import_path = pool["import_path"]
55
+ if pool_import_path
56
+ pool_imports[pool["name"]]["import_path"] = File.join(pool["path"], pool_import_path)
57
+ pool_imports[pool["name"]]["backfill"] = pool["backfill"] ? true : false
58
+ end
59
+ if pool["export"]
60
+ pool_exports[pool["name"]]["path"] = pool["path"]
61
+ pool_exports[pool["name"]]["canonical"] = pool["canonical"] ? true : false
62
+ end
63
+ end
64
+ manifest_hash = {
65
+ "name" => config["name"],
66
+ "imports" => pool_imports,
67
+ "exports" => pool_exports,
68
+ }
69
+ write_yaml("manifest_#{config["name"]}.yaml", manifest_hash)
70
+ end
71
+
72
+ def included_matchers(pool)
73
+ (pool["included_matchers"] || ["**/*.*"]).map { |fp| File.join(pool["path"], fp) }
74
+ end
75
+
76
+ def excluded_matchers(pool)
77
+ (pool["excluded_matchers"] || []).map { |fp| File.join(pool["path"], fp) }
78
+ end
79
+
80
+ def current_pool_files(pool)
81
+ FileList.new(included_matchers(pool)).exclude(excluded_matchers(pool))
82
+ end
83
+
84
+ def read_seen_pool_files(pool)
85
+ YAML.load_file(File.join(Dir.pwd, "seen_#{config["name"]}_#{pool["name"]}.yaml"))
86
+ end
87
+
88
+ def update_seen_pool_files(pool, file_paths)
89
+ yaml_path = File.join(Dir.pwd, "seen_#{config["name"]}_#{pool["name"]}.yaml")
90
+ write_yaml(yaml_path, file_paths)
91
+ end
92
+
93
+ def initialize_all_seen_pool_files
94
+ pools.each do |pool|
95
+ yaml_path = "seen_#{config["name"]}_#{pool["name"]}.yaml"
96
+ write_yaml(yaml_path, []) unless File.file?(yaml_path)
97
+ end
98
+ end
99
+
100
+ def clear_all_seen_pool_files
101
+ pools.each do |pool|
102
+ yaml_path = File.join(path, "seen_#{pool["name"]}.yaml")
103
+ write_yaml(yaml_path, [])
104
+ end
105
+ end
106
+
107
+ def create_file_export_list(pool, file_paths)
108
+ yaml_path = File.join(Dir.pwd, "exports_#{@config["name"]}_#{pool["name"]}_#{Time.now.strftime("%Y-%m-%d-%H:%M:%S")}.yaml")
109
+ write_yaml(yaml_path, file_paths)
110
+ end
111
+
112
+ def write_yaml(yaml_path, content)
113
+ File.open(yaml_path, "w") do |out|
114
+ YAML.dump(content, out)
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rake"
4
+ require "yaml"
5
+
6
+ class Petasos::Node
7
+ attr_reader :config, :manifests
8
+
9
+ def initialize(config)
10
+ @config = config
11
+ @manifests = []
12
+ `mkdir -p #{config["name"]}`
13
+ grab_manifest_and_exports
14
+ parse_manifests
15
+ end
16
+
17
+ def host
18
+ @config["host"]
19
+ end
20
+
21
+ def path
22
+ @config["path"]
23
+ end
24
+
25
+ def grab_manifest_and_exports
26
+ `rsync #{config["host"]}:#{config["path"]}/manifest* #{config["name"]}/`
27
+ `rsync --ignore-missing-args --ignore-existing #{config["host"]}:#{config["path"]}/exports* #{config["name"]}/`
28
+ end
29
+
30
+ def grab_seen_file_for_location(location_name, pool_name)
31
+ `scp #{config["host"]}:#{config["path"]}/seen_#{location_name}_#{pool_name}.yaml .`
32
+ end
33
+
34
+ def parse_manifests
35
+ FileList.new("#{config["name"]}/manifest_*").each do |manifest_file_path|
36
+ @manifests << YAML.load_file(manifest_file_path)
37
+ end
38
+ end
39
+ end
data/lib/petasos.rb ADDED
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rake"
4
+ require "yaml"
5
+
6
+ class Petasos
7
+ class Error < StandardError; end
8
+
9
+ def run
10
+ process_locations
11
+ process_distribution if File.file?(File.join(Dir.pwd, "petasos_distribution-config.yaml"))
12
+ end
13
+
14
+ def process_locations
15
+ # look for petasos_location-*.yaml files
16
+ # and pass each one to a petasos location manager
17
+ FileList.new("petasos_location-*.yaml").each do |location_file|
18
+ YAML.load_file(location_file).each do |location|
19
+ Petasos::Location.new(location).run
20
+ end
21
+ end
22
+ end
23
+
24
+ def process_distribution
25
+ # look for petasos_distribution-*.yaml files
26
+ # and pass each one to a petasos distribution
27
+ FileList.new("petasos_distribution-*.yaml").each do |distribution_file|
28
+ node_config = YAML.load_file(distribution_file)
29
+ Petasos::Distributor.new(node_config).run
30
+ end
31
+ end
32
+ end
33
+
34
+ require "petasos/location"
35
+ require "petasos/node"
36
+ require "petasos/distributor"
metadata ADDED
@@ -0,0 +1,49 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: petasos
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Justin Myers
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-02-14 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Petasos identifies new files and distributes them to where they belong
14
+ email:
15
+ - justin@tenmillionyears.org
16
+ executables:
17
+ - petasos
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - bin/petasos
22
+ - lib/petasos.rb
23
+ - lib/petasos/distributor.rb
24
+ - lib/petasos/location.rb
25
+ - lib/petasos/node.rb
26
+ homepage: https://github.com/JustinMyers/petasos
27
+ licenses:
28
+ - MIT
29
+ metadata: {}
30
+ post_install_message:
31
+ rdoc_options: []
32
+ require_paths:
33
+ - lib
34
+ required_ruby_version: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - ">="
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ required_rubygems_version: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ requirements: []
45
+ rubygems_version: 3.1.6
46
+ signing_key:
47
+ specification_version: 4
48
+ summary: Petasos identifies new files and distributes them to where they belong
49
+ test_files: []