petasos 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 5b1ffed36c52ec275b881ac07ff08b6dda3d4446ba8c2ab0b74266698044b7e6
4
+ data.tar.gz: 3b42c94a62222b7f133c4fe0ed5caea15d1623bc49f1c953b07c9c7bf1fb64fe
5
+ SHA512:
6
+ metadata.gz: 93023bc9d2cad8a5be3bf800683264b7ef20247b3824c2191a6e75dc4c0c0a311b2c595dec10d1377b8e1e3152f1d141f7d5d9a39ea1d02b7c535014dada2adb
7
+ data.tar.gz: 868e3587fc99dfd6a1cdc4b74d1ac5045d948f20b149bd7bb0294c72dfb2aa8d1afef0d6cd3a03ee1a11c02c4903b2fda2df818e076e336476126f34e7fdb1f6
data/bin/petasos ADDED
@@ -0,0 +1,5 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'petasos'
4
+
5
+ Petasos.new.run
@@ -0,0 +1,141 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rake"
4
+ require "yaml"
5
+ require "petasos/node"
6
+
7
+ class Petasos::Distributor
8
+ attr_reader :config
9
+
10
+ def initialize(config)
11
+ @config = config
12
+ end
13
+
14
+ def run
15
+ @nodes = {}
16
+ @config.each do |node|
17
+ @nodes[node["name"]] = Petasos::Node.new(node)
18
+ end
19
+
20
+ @manifests = {}
21
+ @nodes.each_pair do |node_name, node|
22
+ @manifests[node_name] = node.manifests
23
+ end
24
+
25
+ # {"petasos-node-a"=>
26
+ # [{"name"=>"linux-laptop-source",
27
+ # "imports"=>{},
28
+ # "exports"=>
29
+ # {"wow-ah"=>
30
+ # {"path"=>"/home/justin/play/petasos/test/sandbox/node_a/location_a",
31
+ # "canonical"=>true}}}],
32
+ # "petasos-node-b"=>
33
+ # [{"name"=>"linux-laptop-storage",
34
+ # "imports"=>
35
+ # {"wow-ah"=>
36
+ # {"import_path"=>
37
+ # "/home/justin/play/petasos/test/sandbox/node_b/location_a/data",
38
+ # "backfill"=>true}},
39
+ # "exports"=>{}}]}
40
+
41
+ @pools = Hash.new { |h, k|
42
+ h[k] = {
43
+ "import_paths" => [],
44
+ "backfill_import_paths" => [],
45
+ "canonical_exporters" => [],
46
+ }
47
+ }
48
+
49
+ @manifests.each_pair do |node_name, manifest_list|
50
+ manifest_list.each do |manifest|
51
+ manifest["imports"].each_pair do |pool_name, import_hash|
52
+ @pools[pool_name]["import_paths"] << [node_name, import_hash["import_path"]]
53
+ @pools[pool_name]["backfill_import_paths"] << [node_name, manifest["name"], import_hash["import_path"]] if import_hash["backfill"]
54
+ end
55
+ manifest["exports"].each_pair do |pool_name, export_hash|
56
+ @pools[pool_name]["canonical_exporters"] << [node_name, manifest["name"]] if export_hash["canonical"]
57
+ end
58
+ end
59
+ end
60
+
61
+ # {"wow-ah"=>
62
+ # {"import_paths"=>
63
+ # [["petasos-node-b",
64
+ # "/home/justin/play/petasos/test/sandbox/node_b/location_a/data"]],
65
+ # "backfill_import_paths"=>
66
+ # [["petasos-node-b",
67
+ # "linux-laptop-storage",
68
+ # "/home/justin/play/petasos/test/sandbox/node_b/location_a/data"]],
69
+ # "canonical_exporters"=>[["petasos-node-a", "linux-laptop-source"]]}}
70
+
71
+ # Process the exports files and return them as completed.
72
+ FileList.new(File.join(Dir.pwd, "**/exports_*")).each do |exports_file_path|
73
+ from_node_name = File.basename(File.dirname(exports_file_path))
74
+ from_node = find_node(from_node_name)
75
+ export_filename = File.basename(exports_file_path, ".*")
76
+ label, location_name, pool_name, datetime = export_filename.split("_")
77
+ export_paths = YAML.load_file(exports_file_path)
78
+ export_paths.each do |export_path|
79
+ @pools[pool_name]["import_paths"].each do |pool_storage|
80
+ to_node = find_node(pool_storage.first)
81
+ `scp #{from_node.host}:#{export_path}* #{to_node.host}:#{pool_storage.last}`
82
+ end
83
+ end
84
+ # mark it as completed
85
+ completed_export_file_path = File.join(Dir.pwd, "completed-#{File.basename(exports_file_path)}")
86
+ `mv #{exports_file_path} #{completed_export_file_path}`
87
+ # and then put it back where it came from
88
+ `scp #{completed_export_file_path} #{from_node.host}:#{from_node.path}`
89
+ `rm #{completed_export_file_path}`
90
+ end
91
+
92
+ # {"wow-ah"=>
93
+ # {"import_paths"=>
94
+ # [["petasos-node-b",
95
+ # "/home/justin/play/petasos/test/sandbox/node_b/location_a/data"]],
96
+ # "backfill_import_paths"=>
97
+ # [["petasos-node-b",
98
+ # "linux-laptop-storage",
99
+ # "/home/justin/play/petasos/test/sandbox/node_b/location_a/data"]],
100
+ # "canonical_exporters"=>[["petasos-node-a", "linux-laptop-source"]]}}
101
+
102
+ # Process the backfills.
103
+ # grab the seen files on the canonical exporters
104
+ @pools.each_pair do |pool_name, manifest_hash|
105
+ manifest_hash["canonical_exporters"].each do |canonical_exporter_details|
106
+ find_node(canonical_exporter_details.first).grab_seen_file_for_location(canonical_exporter_details.last, pool_name)
107
+ end
108
+
109
+ # grab the seen files on the backfill importers
110
+ manifest_hash["backfill_import_paths"].each do |backfill_importer_details|
111
+ find_node(backfill_importer_details.first).grab_seen_file_for_location(backfill_importer_details[1], pool_name)
112
+ end
113
+
114
+ # for each canonical exporter loop through the backfill lists, identify files that need moving and move them
115
+ manifest_hash["canonical_exporters"].each do |canonical_exporter_details|
116
+ exporter_seen_files = {}
117
+ exporter_file_list = YAML.load_file("seen_#{canonical_exporter_details.last}_#{pool_name}.yaml")
118
+ exporter_file_list.each { |f| exporter_seen_files[File.basename(f)] = f }
119
+ manifest_hash["backfill_import_paths"].each do |backfill_importer_details|
120
+ backfill_importer_files = {}
121
+ backfill_file_list = YAML.load_file("seen_#{backfill_importer_details[1]}_#{pool_name}.yaml")
122
+ backfill_file_list.each { |f| backfill_importer_files[File.basename(f)] = f }
123
+
124
+ exporter_seen_files.each_pair do |file_name, file_path|
125
+ unless backfill_importer_files[file_name]
126
+ from_node = find_node(canonical_exporter_details.first)
127
+ to_node = find_node(backfill_importer_details.first)
128
+ `scp #{from_node.host}:#{file_path} #{to_node.host}:#{backfill_importer_details.last}`
129
+ end
130
+ end
131
+ end
132
+ end
133
+ # clear the seen files locally.
134
+ `rm seen_*`
135
+ end
136
+ end
137
+
138
+ def find_node(node_name)
139
+ @nodes[node_name]
140
+ end
141
+ end
@@ -0,0 +1,117 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rake"
4
+ require "yaml"
5
+
6
+ class Petasos::Location
7
+ attr_reader :config
8
+
9
+ def initialize(config)
10
+ @config = config
11
+ initialize_all_seen_pool_files
12
+ update_manifest_file
13
+ end
14
+
15
+ def run
16
+ # delete exports file if completed file exists
17
+ FileList.new(File.join(Dir.pwd, "exports_#{@config["name"]}*.yaml")).each do |export_file_path|
18
+ completed_export_file_path = "completed-" + export_file_path
19
+ `rm #{export_file_path}` if File.file?(completed_export_file_path)
20
+ end
21
+
22
+ pools.each do |pool|
23
+ # get all filenames in this location that belong to this pool
24
+ current_files = current_pool_files(pool)
25
+
26
+ # get all filenames from the list of seen files
27
+ seen_pool_files = read_seen_pool_files(pool)
28
+
29
+ # identify which are new
30
+ new_files = current_files - seen_pool_files
31
+
32
+ # put a list of the new files where the cluster manager can find it
33
+ # if we are an exporter
34
+ if pool["export"]
35
+ create_file_export_list(pool, new_files.to_a) if new_files.length > 0
36
+ end
37
+
38
+ # this is where the "after_seen" hooks would run
39
+
40
+ # update list of seen files
41
+ update_seen_pool_files(pool, seen_pool_files + new_files)
42
+ end
43
+ end
44
+
45
+ def pools
46
+ config["pools"]
47
+ end
48
+
49
+ def update_manifest_file
50
+ # a list of pools and their import paths from locations
51
+ pool_imports = Hash.new { |h, k| h[k] = {} }
52
+ pool_exports = Hash.new { |h, k| h[k] = {} }
53
+ pools.each do |pool|
54
+ pool_import_path = pool["import_path"]
55
+ if pool_import_path
56
+ pool_imports[pool["name"]]["import_path"] = File.join(pool["path"], pool_import_path)
57
+ pool_imports[pool["name"]]["backfill"] = pool["backfill"] ? true : false
58
+ end
59
+ if pool["export"]
60
+ pool_exports[pool["name"]]["path"] = pool["path"]
61
+ pool_exports[pool["name"]]["canonical"] = pool["canonical"] ? true : false
62
+ end
63
+ end
64
+ manifest_hash = {
65
+ "name" => config["name"],
66
+ "imports" => pool_imports,
67
+ "exports" => pool_exports,
68
+ }
69
+ write_yaml("manifest_#{config["name"]}.yaml", manifest_hash)
70
+ end
71
+
72
+ def included_matchers(pool)
73
+ (pool["included_matchers"] || ["**/*.*"]).map { |fp| File.join(pool["path"], fp) }
74
+ end
75
+
76
+ def excluded_matchers(pool)
77
+ (pool["excluded_matchers"] || []).map { |fp| File.join(pool["path"], fp) }
78
+ end
79
+
80
+ def current_pool_files(pool)
81
+ FileList.new(included_matchers(pool)).exclude(excluded_matchers(pool))
82
+ end
83
+
84
+ def read_seen_pool_files(pool)
85
+ YAML.load_file(File.join(Dir.pwd, "seen_#{config["name"]}_#{pool["name"]}.yaml"))
86
+ end
87
+
88
+ def update_seen_pool_files(pool, file_paths)
89
+ yaml_path = File.join(Dir.pwd, "seen_#{config["name"]}_#{pool["name"]}.yaml")
90
+ write_yaml(yaml_path, file_paths)
91
+ end
92
+
93
+ def initialize_all_seen_pool_files
94
+ pools.each do |pool|
95
+ yaml_path = "seen_#{config["name"]}_#{pool["name"]}.yaml"
96
+ write_yaml(yaml_path, []) unless File.file?(yaml_path)
97
+ end
98
+ end
99
+
100
+ def clear_all_seen_pool_files
101
+ pools.each do |pool|
102
+ yaml_path = File.join(path, "seen_#{pool["name"]}.yaml")
103
+ write_yaml(yaml_path, [])
104
+ end
105
+ end
106
+
107
+ def create_file_export_list(pool, file_paths)
108
+ yaml_path = File.join(Dir.pwd, "exports_#{@config["name"]}_#{pool["name"]}_#{Time.now.strftime("%Y-%m-%d-%H:%M:%S")}.yaml")
109
+ write_yaml(yaml_path, file_paths)
110
+ end
111
+
112
+ def write_yaml(yaml_path, content)
113
+ File.open(yaml_path, "w") do |out|
114
+ YAML.dump(content, out)
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rake"
4
+ require "yaml"
5
+
6
+ class Petasos::Node
7
+ attr_reader :config, :manifests
8
+
9
+ def initialize(config)
10
+ @config = config
11
+ @manifests = []
12
+ `mkdir -p #{config["name"]}`
13
+ grab_manifest_and_exports
14
+ parse_manifests
15
+ end
16
+
17
+ def host
18
+ @config["host"]
19
+ end
20
+
21
+ def path
22
+ @config["path"]
23
+ end
24
+
25
+ def grab_manifest_and_exports
26
+ `rsync #{config["host"]}:#{config["path"]}/manifest* #{config["name"]}/`
27
+ `rsync --ignore-missing-args --ignore-existing #{config["host"]}:#{config["path"]}/exports* #{config["name"]}/`
28
+ end
29
+
30
+ def grab_seen_file_for_location(location_name, pool_name)
31
+ `scp #{config["host"]}:#{config["path"]}/seen_#{location_name}_#{pool_name}.yaml .`
32
+ end
33
+
34
+ def parse_manifests
35
+ FileList.new("#{config["name"]}/manifest_*").each do |manifest_file_path|
36
+ @manifests << YAML.load_file(manifest_file_path)
37
+ end
38
+ end
39
+ end
data/lib/petasos.rb ADDED
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "rake"
4
+ require "yaml"
5
+
6
+ class Petasos
7
+ class Error < StandardError; end
8
+
9
+ def run
10
+ process_locations
11
+ process_distribution if File.file?(File.join(Dir.pwd, "petasos_distribution-config.yaml"))
12
+ end
13
+
14
+ def process_locations
15
+ # look for petasos_location-*.yaml files
16
+ # and pass each one to a petasos location manager
17
+ FileList.new("petasos_location-*.yaml").each do |location_file|
18
+ YAML.load_file(location_file).each do |location|
19
+ Petasos::Location.new(location).run
20
+ end
21
+ end
22
+ end
23
+
24
+ def process_distribution
25
+ # look for petasos_distribution-*.yaml files
26
+ # and pass each one to a petasos distribution
27
+ FileList.new("petasos_distribution-*.yaml").each do |distribution_file|
28
+ node_config = YAML.load_file(distribution_file)
29
+ Petasos::Distributor.new(node_config).run
30
+ end
31
+ end
32
+ end
33
+
34
+ require "petasos/location"
35
+ require "petasos/node"
36
+ require "petasos/distributor"
metadata ADDED
@@ -0,0 +1,49 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: petasos
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Justin Myers
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2023-02-14 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: Petasos identifies new files and distributes them to where they belong
14
+ email:
15
+ - justin@tenmillionyears.org
16
+ executables:
17
+ - petasos
18
+ extensions: []
19
+ extra_rdoc_files: []
20
+ files:
21
+ - bin/petasos
22
+ - lib/petasos.rb
23
+ - lib/petasos/distributor.rb
24
+ - lib/petasos/location.rb
25
+ - lib/petasos/node.rb
26
+ homepage: https://github.com/JustinMyers/petasos
27
+ licenses:
28
+ - MIT
29
+ metadata: {}
30
+ post_install_message:
31
+ rdoc_options: []
32
+ require_paths:
33
+ - lib
34
+ required_ruby_version: !ruby/object:Gem::Requirement
35
+ requirements:
36
+ - - ">="
37
+ - !ruby/object:Gem::Version
38
+ version: '0'
39
+ required_rubygems_version: !ruby/object:Gem::Requirement
40
+ requirements:
41
+ - - ">="
42
+ - !ruby/object:Gem::Version
43
+ version: '0'
44
+ requirements: []
45
+ rubygems_version: 3.1.6
46
+ signing_key:
47
+ specification_version: 4
48
+ summary: Petasos identifies new files and distributes them to where they belong
49
+ test_files: []