petasos 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/bin/petasos +5 -0
- data/lib/petasos/distributor.rb +141 -0
- data/lib/petasos/location.rb +117 -0
- data/lib/petasos/node.rb +39 -0
- data/lib/petasos.rb +36 -0
- metadata +49 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 5b1ffed36c52ec275b881ac07ff08b6dda3d4446ba8c2ab0b74266698044b7e6
|
4
|
+
data.tar.gz: 3b42c94a62222b7f133c4fe0ed5caea15d1623bc49f1c953b07c9c7bf1fb64fe
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 93023bc9d2cad8a5be3bf800683264b7ef20247b3824c2191a6e75dc4c0c0a311b2c595dec10d1377b8e1e3152f1d141f7d5d9a39ea1d02b7c535014dada2adb
|
7
|
+
data.tar.gz: 868e3587fc99dfd6a1cdc4b74d1ac5045d948f20b149bd7bb0294c72dfb2aa8d1afef0d6cd3a03ee1a11c02c4903b2fda2df818e076e336476126f34e7fdb1f6
|
data/bin/petasos
ADDED
@@ -0,0 +1,141 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "rake"
|
4
|
+
require "yaml"
|
5
|
+
require "petasos/node"
|
6
|
+
|
7
|
+
class Petasos::Distributor
|
8
|
+
attr_reader :config
|
9
|
+
|
10
|
+
def initialize(config)
|
11
|
+
@config = config
|
12
|
+
end
|
13
|
+
|
14
|
+
def run
|
15
|
+
@nodes = {}
|
16
|
+
@config.each do |node|
|
17
|
+
@nodes[node["name"]] = Petasos::Node.new(node)
|
18
|
+
end
|
19
|
+
|
20
|
+
@manifests = {}
|
21
|
+
@nodes.each_pair do |node_name, node|
|
22
|
+
@manifests[node_name] = node.manifests
|
23
|
+
end
|
24
|
+
|
25
|
+
# {"petasos-node-a"=>
|
26
|
+
# [{"name"=>"linux-laptop-source",
|
27
|
+
# "imports"=>{},
|
28
|
+
# "exports"=>
|
29
|
+
# {"wow-ah"=>
|
30
|
+
# {"path"=>"/home/justin/play/petasos/test/sandbox/node_a/location_a",
|
31
|
+
# "canonical"=>true}}}],
|
32
|
+
# "petasos-node-b"=>
|
33
|
+
# [{"name"=>"linux-laptop-storage",
|
34
|
+
# "imports"=>
|
35
|
+
# {"wow-ah"=>
|
36
|
+
# {"import_path"=>
|
37
|
+
# "/home/justin/play/petasos/test/sandbox/node_b/location_a/data",
|
38
|
+
# "backfill"=>true}},
|
39
|
+
# "exports"=>{}}]}
|
40
|
+
|
41
|
+
@pools = Hash.new { |h, k|
|
42
|
+
h[k] = {
|
43
|
+
"import_paths" => [],
|
44
|
+
"backfill_import_paths" => [],
|
45
|
+
"canonical_exporters" => [],
|
46
|
+
}
|
47
|
+
}
|
48
|
+
|
49
|
+
@manifests.each_pair do |node_name, manifest_list|
|
50
|
+
manifest_list.each do |manifest|
|
51
|
+
manifest["imports"].each_pair do |pool_name, import_hash|
|
52
|
+
@pools[pool_name]["import_paths"] << [node_name, import_hash["import_path"]]
|
53
|
+
@pools[pool_name]["backfill_import_paths"] << [node_name, manifest["name"], import_hash["import_path"]] if import_hash["backfill"]
|
54
|
+
end
|
55
|
+
manifest["exports"].each_pair do |pool_name, export_hash|
|
56
|
+
@pools[pool_name]["canonical_exporters"] << [node_name, manifest["name"]] if export_hash["canonical"]
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# {"wow-ah"=>
|
62
|
+
# {"import_paths"=>
|
63
|
+
# [["petasos-node-b",
|
64
|
+
# "/home/justin/play/petasos/test/sandbox/node_b/location_a/data"]],
|
65
|
+
# "backfill_import_paths"=>
|
66
|
+
# [["petasos-node-b",
|
67
|
+
# "linux-laptop-storage",
|
68
|
+
# "/home/justin/play/petasos/test/sandbox/node_b/location_a/data"]],
|
69
|
+
# "canonical_exporters"=>[["petasos-node-a", "linux-laptop-source"]]}}
|
70
|
+
|
71
|
+
# Process the exports files and return them as completed.
|
72
|
+
FileList.new(File.join(Dir.pwd, "**/exports_*")).each do |exports_file_path|
|
73
|
+
from_node_name = File.basename(File.dirname(exports_file_path))
|
74
|
+
from_node = find_node(from_node_name)
|
75
|
+
export_filename = File.basename(exports_file_path, ".*")
|
76
|
+
label, location_name, pool_name, datetime = export_filename.split("_")
|
77
|
+
export_paths = YAML.load_file(exports_file_path)
|
78
|
+
export_paths.each do |export_path|
|
79
|
+
@pools[pool_name]["import_paths"].each do |pool_storage|
|
80
|
+
to_node = find_node(pool_storage.first)
|
81
|
+
`scp #{from_node.host}:#{export_path}* #{to_node.host}:#{pool_storage.last}`
|
82
|
+
end
|
83
|
+
end
|
84
|
+
# mark it as completed
|
85
|
+
completed_export_file_path = File.join(Dir.pwd, "completed-#{File.basename(exports_file_path)}")
|
86
|
+
`mv #{exports_file_path} #{completed_export_file_path}`
|
87
|
+
# and then put it back where it came from
|
88
|
+
`scp #{completed_export_file_path} #{from_node.host}:#{from_node.path}`
|
89
|
+
`rm #{completed_export_file_path}`
|
90
|
+
end
|
91
|
+
|
92
|
+
# {"wow-ah"=>
|
93
|
+
# {"import_paths"=>
|
94
|
+
# [["petasos-node-b",
|
95
|
+
# "/home/justin/play/petasos/test/sandbox/node_b/location_a/data"]],
|
96
|
+
# "backfill_import_paths"=>
|
97
|
+
# [["petasos-node-b",
|
98
|
+
# "linux-laptop-storage",
|
99
|
+
# "/home/justin/play/petasos/test/sandbox/node_b/location_a/data"]],
|
100
|
+
# "canonical_exporters"=>[["petasos-node-a", "linux-laptop-source"]]}}
|
101
|
+
|
102
|
+
# Process the backfills.
|
103
|
+
# grab the seen files on the canonical exporters
|
104
|
+
@pools.each_pair do |pool_name, manifest_hash|
|
105
|
+
manifest_hash["canonical_exporters"].each do |canonical_exporter_details|
|
106
|
+
find_node(canonical_exporter_details.first).grab_seen_file_for_location(canonical_exporter_details.last, pool_name)
|
107
|
+
end
|
108
|
+
|
109
|
+
# grab the seen files on the backfill importers
|
110
|
+
manifest_hash["backfill_import_paths"].each do |backfill_importer_details|
|
111
|
+
find_node(backfill_importer_details.first).grab_seen_file_for_location(backfill_importer_details[1], pool_name)
|
112
|
+
end
|
113
|
+
|
114
|
+
# for each canonical exporter loop through the backfill lists, identify files that need moving and move them
|
115
|
+
manifest_hash["canonical_exporters"].each do |canonical_exporter_details|
|
116
|
+
exporter_seen_files = {}
|
117
|
+
exporter_file_list = YAML.load_file("seen_#{canonical_exporter_details.last}_#{pool_name}.yaml")
|
118
|
+
exporter_file_list.each { |f| exporter_seen_files[File.basename(f)] = f }
|
119
|
+
manifest_hash["backfill_import_paths"].each do |backfill_importer_details|
|
120
|
+
backfill_importer_files = {}
|
121
|
+
backfill_file_list = YAML.load_file("seen_#{backfill_importer_details[1]}_#{pool_name}.yaml")
|
122
|
+
backfill_file_list.each { |f| backfill_importer_files[File.basename(f)] = f }
|
123
|
+
|
124
|
+
exporter_seen_files.each_pair do |file_name, file_path|
|
125
|
+
unless backfill_importer_files[file_name]
|
126
|
+
from_node = find_node(canonical_exporter_details.first)
|
127
|
+
to_node = find_node(backfill_importer_details.first)
|
128
|
+
`scp #{from_node.host}:#{file_path} #{to_node.host}:#{backfill_importer_details.last}`
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
# clear the seen files locally.
|
134
|
+
`rm seen_*`
|
135
|
+
end
|
136
|
+
end
|
137
|
+
|
138
|
+
def find_node(node_name)
|
139
|
+
@nodes[node_name]
|
140
|
+
end
|
141
|
+
end
|
@@ -0,0 +1,117 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "rake"
|
4
|
+
require "yaml"
|
5
|
+
|
6
|
+
class Petasos::Location
|
7
|
+
attr_reader :config
|
8
|
+
|
9
|
+
def initialize(config)
|
10
|
+
@config = config
|
11
|
+
initialize_all_seen_pool_files
|
12
|
+
update_manifest_file
|
13
|
+
end
|
14
|
+
|
15
|
+
def run
|
16
|
+
# delete exports file if completed file exists
|
17
|
+
FileList.new(File.join(Dir.pwd, "exports_#{@config["name"]}*.yaml")).each do |export_file_path|
|
18
|
+
completed_export_file_path = "completed-" + export_file_path
|
19
|
+
`rm #{export_file_path}` if File.file?(completed_export_file_path)
|
20
|
+
end
|
21
|
+
|
22
|
+
pools.each do |pool|
|
23
|
+
# get all filenames in this location that belong to this pool
|
24
|
+
current_files = current_pool_files(pool)
|
25
|
+
|
26
|
+
# get all filenames from the list of seen files
|
27
|
+
seen_pool_files = read_seen_pool_files(pool)
|
28
|
+
|
29
|
+
# identify which are new
|
30
|
+
new_files = current_files - seen_pool_files
|
31
|
+
|
32
|
+
# put a list of the new files where the cluster manager can find it
|
33
|
+
# if we are an exporter
|
34
|
+
if pool["export"]
|
35
|
+
create_file_export_list(pool, new_files.to_a) if new_files.length > 0
|
36
|
+
end
|
37
|
+
|
38
|
+
# this is where the "after_seen" hooks would run
|
39
|
+
|
40
|
+
# update list of seen files
|
41
|
+
update_seen_pool_files(pool, seen_pool_files + new_files)
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def pools
|
46
|
+
config["pools"]
|
47
|
+
end
|
48
|
+
|
49
|
+
def update_manifest_file
|
50
|
+
# a list of pools and their import paths from locations
|
51
|
+
pool_imports = Hash.new { |h, k| h[k] = {} }
|
52
|
+
pool_exports = Hash.new { |h, k| h[k] = {} }
|
53
|
+
pools.each do |pool|
|
54
|
+
pool_import_path = pool["import_path"]
|
55
|
+
if pool_import_path
|
56
|
+
pool_imports[pool["name"]]["import_path"] = File.join(pool["path"], pool_import_path)
|
57
|
+
pool_imports[pool["name"]]["backfill"] = pool["backfill"] ? true : false
|
58
|
+
end
|
59
|
+
if pool["export"]
|
60
|
+
pool_exports[pool["name"]]["path"] = pool["path"]
|
61
|
+
pool_exports[pool["name"]]["canonical"] = pool["canonical"] ? true : false
|
62
|
+
end
|
63
|
+
end
|
64
|
+
manifest_hash = {
|
65
|
+
"name" => config["name"],
|
66
|
+
"imports" => pool_imports,
|
67
|
+
"exports" => pool_exports,
|
68
|
+
}
|
69
|
+
write_yaml("manifest_#{config["name"]}.yaml", manifest_hash)
|
70
|
+
end
|
71
|
+
|
72
|
+
def included_matchers(pool)
|
73
|
+
(pool["included_matchers"] || ["**/*.*"]).map { |fp| File.join(pool["path"], fp) }
|
74
|
+
end
|
75
|
+
|
76
|
+
def excluded_matchers(pool)
|
77
|
+
(pool["excluded_matchers"] || []).map { |fp| File.join(pool["path"], fp) }
|
78
|
+
end
|
79
|
+
|
80
|
+
def current_pool_files(pool)
|
81
|
+
FileList.new(included_matchers(pool)).exclude(excluded_matchers(pool))
|
82
|
+
end
|
83
|
+
|
84
|
+
def read_seen_pool_files(pool)
|
85
|
+
YAML.load_file(File.join(Dir.pwd, "seen_#{config["name"]}_#{pool["name"]}.yaml"))
|
86
|
+
end
|
87
|
+
|
88
|
+
def update_seen_pool_files(pool, file_paths)
|
89
|
+
yaml_path = File.join(Dir.pwd, "seen_#{config["name"]}_#{pool["name"]}.yaml")
|
90
|
+
write_yaml(yaml_path, file_paths)
|
91
|
+
end
|
92
|
+
|
93
|
+
def initialize_all_seen_pool_files
|
94
|
+
pools.each do |pool|
|
95
|
+
yaml_path = "seen_#{config["name"]}_#{pool["name"]}.yaml"
|
96
|
+
write_yaml(yaml_path, []) unless File.file?(yaml_path)
|
97
|
+
end
|
98
|
+
end
|
99
|
+
|
100
|
+
def clear_all_seen_pool_files
|
101
|
+
pools.each do |pool|
|
102
|
+
yaml_path = File.join(path, "seen_#{pool["name"]}.yaml")
|
103
|
+
write_yaml(yaml_path, [])
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def create_file_export_list(pool, file_paths)
|
108
|
+
yaml_path = File.join(Dir.pwd, "exports_#{@config["name"]}_#{pool["name"]}_#{Time.now.strftime("%Y-%m-%d-%H:%M:%S")}.yaml")
|
109
|
+
write_yaml(yaml_path, file_paths)
|
110
|
+
end
|
111
|
+
|
112
|
+
def write_yaml(yaml_path, content)
|
113
|
+
File.open(yaml_path, "w") do |out|
|
114
|
+
YAML.dump(content, out)
|
115
|
+
end
|
116
|
+
end
|
117
|
+
end
|
data/lib/petasos/node.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "rake"
|
4
|
+
require "yaml"
|
5
|
+
|
6
|
+
class Petasos::Node
|
7
|
+
attr_reader :config, :manifests
|
8
|
+
|
9
|
+
def initialize(config)
|
10
|
+
@config = config
|
11
|
+
@manifests = []
|
12
|
+
`mkdir -p #{config["name"]}`
|
13
|
+
grab_manifest_and_exports
|
14
|
+
parse_manifests
|
15
|
+
end
|
16
|
+
|
17
|
+
def host
|
18
|
+
@config["host"]
|
19
|
+
end
|
20
|
+
|
21
|
+
def path
|
22
|
+
@config["path"]
|
23
|
+
end
|
24
|
+
|
25
|
+
def grab_manifest_and_exports
|
26
|
+
`rsync #{config["host"]}:#{config["path"]}/manifest* #{config["name"]}/`
|
27
|
+
`rsync --ignore-missing-args --ignore-existing #{config["host"]}:#{config["path"]}/exports* #{config["name"]}/`
|
28
|
+
end
|
29
|
+
|
30
|
+
def grab_seen_file_for_location(location_name, pool_name)
|
31
|
+
`scp #{config["host"]}:#{config["path"]}/seen_#{location_name}_#{pool_name}.yaml .`
|
32
|
+
end
|
33
|
+
|
34
|
+
def parse_manifests
|
35
|
+
FileList.new("#{config["name"]}/manifest_*").each do |manifest_file_path|
|
36
|
+
@manifests << YAML.load_file(manifest_file_path)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
data/lib/petasos.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "rake"
|
4
|
+
require "yaml"
|
5
|
+
|
6
|
+
class Petasos
|
7
|
+
class Error < StandardError; end
|
8
|
+
|
9
|
+
def run
|
10
|
+
process_locations
|
11
|
+
process_distribution if File.file?(File.join(Dir.pwd, "petasos_distribution-config.yaml"))
|
12
|
+
end
|
13
|
+
|
14
|
+
def process_locations
|
15
|
+
# look for petasos_location-*.yaml files
|
16
|
+
# and pass each one to a petasos location manager
|
17
|
+
FileList.new("petasos_location-*.yaml").each do |location_file|
|
18
|
+
YAML.load_file(location_file).each do |location|
|
19
|
+
Petasos::Location.new(location).run
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
def process_distribution
|
25
|
+
# look for petasos_distribution-*.yaml files
|
26
|
+
# and pass each one to a petasos distribution
|
27
|
+
FileList.new("petasos_distribution-*.yaml").each do |distribution_file|
|
28
|
+
node_config = YAML.load_file(distribution_file)
|
29
|
+
Petasos::Distributor.new(node_config).run
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
require "petasos/location"
|
35
|
+
require "petasos/node"
|
36
|
+
require "petasos/distributor"
|
metadata
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: petasos
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Justin Myers
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
date: 2023-02-14 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: Petasos identifies new files and distributes them to where they belong
|
14
|
+
email:
|
15
|
+
- justin@tenmillionyears.org
|
16
|
+
executables:
|
17
|
+
- petasos
|
18
|
+
extensions: []
|
19
|
+
extra_rdoc_files: []
|
20
|
+
files:
|
21
|
+
- bin/petasos
|
22
|
+
- lib/petasos.rb
|
23
|
+
- lib/petasos/distributor.rb
|
24
|
+
- lib/petasos/location.rb
|
25
|
+
- lib/petasos/node.rb
|
26
|
+
homepage: https://github.com/JustinMyers/petasos
|
27
|
+
licenses:
|
28
|
+
- MIT
|
29
|
+
metadata: {}
|
30
|
+
post_install_message:
|
31
|
+
rdoc_options: []
|
32
|
+
require_paths:
|
33
|
+
- lib
|
34
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
35
|
+
requirements:
|
36
|
+
- - ">="
|
37
|
+
- !ruby/object:Gem::Version
|
38
|
+
version: '0'
|
39
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: '0'
|
44
|
+
requirements: []
|
45
|
+
rubygems_version: 3.1.6
|
46
|
+
signing_key:
|
47
|
+
specification_version: 4
|
48
|
+
summary: Petasos identifies new files and distributes them to where they belong
|
49
|
+
test_files: []
|