storazzo 0.7.0 โ 0.7.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/README.md +17 -19
- data/VERSION +1 -1
- data/bin/hello-storazzo +2 -0
- data/bin/ricdisk-magic +7 -13
- data/bin/stats-with-md5 +1 -1
- data/bin/storazzo +2 -0
- data/lib/storazzo/common.rb +18 -5
- data/lib/storazzo/gcs/client.rb +85 -0
- data/lib/storazzo/ric_disk.rb +24 -11
- data/lib/storazzo/search_engine.rb +124 -26
- data/lib/storazzo.rb +1 -0
- data/storazzo.gemspec +4 -0
- data/test/gcs/test_client.rb +42 -0
- metadata +6 -2
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: cf4a2d3f1f19159e98d91c9ffa590733482e5024d2fc773ddac89be26b7c3384
|
|
4
|
+
data.tar.gz: 81679032b930b55b6077ee2f4557ad1b00dab5684c702f25b79b8fb8131efd3f
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 9ecccb88f2966188b3a54b53de353338810a7e578ab4ea73de4fa3279d3c6575aa05639e6fbbc70db27a6bb0501c76ab06e6748292e339ae8c0d7da60385d780
|
|
7
|
+
data.tar.gz: 75254ee445f4c73341361e627d75fdc284e0169571c3cff3a3dd6860be45857ee8af6482c512f42614b7d74373250b71a9d64427458a2de4fd4aac3f109275cc
|
data/Gemfile
CHANGED
data/README.md
CHANGED
|
@@ -3,35 +3,33 @@
|
|
|
3
3
|
|
|
4
4
|
๐ฆ Storazzo ๐ gem - a Gem to automatically parse your FS for mounts (๐ฝ ๐พ ๐ฟ ) and compute MD5 (๐ค) of all files therein and then collect in central DB ๐ through ๐ฆ StorazzoApp๐ฆ (TM).
|
|
5
5
|
|
|
6
|
+

|
|
7
|
+
|
|
6
8
|
# INSTALL
|
|
7
9
|
|
|
8
10
|
`gem install storazzo`
|
|
9
11
|
|
|
10
12
|
(Latest version is hosted in https://rubygems.org/gems/storazzo)
|
|
11
13
|
|
|
12
|
-
#
|
|
13
|
-
|
|
14
|
-
I still struggle to enforce the include of LOCAL unchecked code rather than latest required system gem (cmon Ruby!)
|
|
15
|
-
but I found loads of interesting ways to test my code by googling and StoackOverflowing:
|
|
16
|
-
|
|
17
|
-
* `rake test TEST="test/sum_test.rb"`
|
|
18
|
-
* test-gcs-bucket: `ruby -I test test/test_gcs_bucket.rb` (meh - see below)
|
|
19
|
-
* test-media-subfolder: `rake test TEST="test/media/*.rb"`
|
|
20
|
-
|
|
21
|
-
Single test in single file:
|
|
14
|
+
# Development & Testing
|
|
22
15
|
|
|
23
|
-
|
|
24
|
-
* `rake test TEST="test/media/test_local_folder.rb" TESTOPTS="--name=test_1_first_directory_parsing_actually_works"`
|
|
25
|
-
* `ruby -I test test/test_local_folder.rb -n test_first_directory_parsing_actually_works` (note this includes `storazzo` latest gem
|
|
26
|
-
and doesnt benefit from LATEST code so its NOT good for testing: use RAKE for that).
|
|
16
|
+
To run the tools locally without installing the gem, use `just` (recommended) or standard Ruby flags.
|
|
27
17
|
|
|
28
|
-
|
|
29
|
-
|
|
18
|
+
### Using `just` (Recommended)
|
|
19
|
+
This repository includes a `justfile` with common development commands:
|
|
20
|
+
* `just setup` - Install dependencies
|
|
21
|
+
* `just test` - Run all tests
|
|
22
|
+
* `just run scan /path` - Run the local `storazzo` CLI
|
|
23
|
+
* `just hello` - Run the local `hello-storazzo` script
|
|
30
24
|
|
|
31
|
-
|
|
32
|
-
|
|
25
|
+
### Manual Local Execution
|
|
26
|
+
If you don't have `just` installed, you can use `bundle exec` and the `-Ilib` flag:
|
|
27
|
+
* **Run CLI**: `bundle exec ruby -Ilib bin/storazzo scan /path`
|
|
28
|
+
* **Run Tests**: `bundle exec rake test`
|
|
29
|
+
* **Single Test**: `bundle exec ruby -Ilib:test test/media/test_local_folder.rb`
|
|
33
30
|
|
|
34
|
-
|
|
31
|
+
### Why `bundle exec` and `-Ilib`?
|
|
32
|
+
Using `bundle exec` ensures all dependencies from the `Gemfile` are available. The `-Ilib` flag adds the local `lib/` directory to the Ruby load path, ensuring your local changes are used instead of any installed gem version.
|
|
35
33
|
|
|
36
34
|
# Thanks
|
|
37
35
|
|
data/VERSION
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
0.7.
|
|
1
|
+
0.7.4
|
data/bin/hello-storazzo
CHANGED
data/bin/ricdisk-magic
CHANGED
|
@@ -1,21 +1,15 @@
|
|
|
1
1
|
#!/usr/bin/env ruby
|
|
2
2
|
# frozen_string_literal: true
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
# require 'storazzo'
|
|
8
|
-
# require_relative '../lib/storazzo'
|
|
4
|
+
require 'bundler/setup'
|
|
5
|
+
$LOAD_PATH.unshift(File.expand_path('../lib', __dir__))
|
|
6
|
+
require 'storazzo'
|
|
9
7
|
require 'fileutils'
|
|
10
8
|
require 'yaml'
|
|
11
9
|
require 'socket'
|
|
12
|
-
require 'optparse'
|
|
10
|
+
require 'optparse'
|
|
13
11
|
|
|
14
|
-
#
|
|
15
|
-
# puts File.expand_path(FileUtils.pwd, "/../")
|
|
16
|
-
# local_gem_path = File.expand_path(FileUtils.pwd, "/../")
|
|
17
|
-
# gem 'storazzo', path: local_gem_path
|
|
18
|
-
require 'storazzo'
|
|
12
|
+
# require 'storazzo'
|
|
19
13
|
# include Storazzo
|
|
20
14
|
# include Storazzo::Colors
|
|
21
15
|
extend Storazzo::Colors
|
|
@@ -32,8 +26,8 @@ include Storazzo::Common # instead
|
|
|
32
26
|
# include 'lib/ric_disk'
|
|
33
27
|
# extend Storazzo::Colors
|
|
34
28
|
|
|
35
|
-
if RUBY_VERSION.split('.')[0]
|
|
36
|
-
puts 'Refusing to launch a script form Ruby 1. Sorry Ric, its 2020 damn it!'
|
|
29
|
+
if RUBY_VERSION.split('.')[0].to_i < 3
|
|
30
|
+
puts 'Refusing to launch a script form Ruby 1 or 2. Sorry Ric, its 2020 damn it!'
|
|
37
31
|
exit 2020
|
|
38
32
|
end
|
|
39
33
|
|
data/bin/stats-with-md5
CHANGED
|
@@ -7,7 +7,7 @@ require 'optparse' # http://ruby.about.com/od/advancedruby/a/optionparser.htm
|
|
|
7
7
|
require 'date' # for DateTime
|
|
8
8
|
require 'tempfile'
|
|
9
9
|
|
|
10
|
-
if RUBY_VERSION.split('.')[0] == 1
|
|
10
|
+
if RUBY_VERSION.split('.')[0].to_i == 1
|
|
11
11
|
puts 'Refusing to launch a script form Ruby 1. Sorry Ric, its 2020 damn it!'
|
|
12
12
|
exit 2020
|
|
13
13
|
end
|
data/bin/storazzo
CHANGED
data/lib/storazzo/common.rb
CHANGED
|
@@ -28,7 +28,12 @@
|
|
|
28
28
|
# ๐ถ ๐ง ๐ฆ ๐ง ๐ฉ ๐จ ๐ง ๐ง ๐ง ๐ด ๐ต ๐ค ๐ฅ ๐ช ๐ซ ๐ฌ ๐ญ ๐ฑ ๐ณ ๐ฒ ๐ง ๐ธ ๐คด ๐
๐คถ disabled
|
|
29
29
|
# ๐ง ๐ฆป ๐ฆฎ ๐ฆฏ ๐ฆบ ๐ฆผ ๐ฆฝ ๐ฆพ ๐ฆฟ ๐คต ๐ฎ ๐ท ๐ ๐ ๐ด ๐ต ๐ฆธ ๐ฆน ๐ง ๐ง ๐ง ๐ง ๐ง ๐ง ๐ง ๐ผ ๐ฟ ๐ป ๐น ๐บ ๐ฝ ๐พ ๐ธ ๐ โ ๐ฑ ๐ง ๐ฆด ๐ ๐ ๐ ๐ ๐ ๐ข ๐
๐ฆท ๐ฆต ๐ฆถ ๐ญ ๐ฌ ๐ญ ๐ฌ ๐จ ๐ฉ ๐ช ๐ซ ๐ฐ ๐ฑ ๐ฎ ๐ฏ ๐ฃ ๐ค ๐ฅ ๐ฆ ๐ง ๐ฆ ๐ง ๐ข ๐ซ ๐ค ๐จ ๐ฅ ๐ช ๐ฒ ๐ฅ ๐ก ๐ฉ ๐ฏ ๐ ๐ฐ ๐ฒ
|
|
30
30
|
require_relative 'colors'
|
|
31
|
-
require 'pry'
|
|
31
|
+
# require 'pry'
|
|
32
|
+
begin
|
|
33
|
+
require 'pry'
|
|
34
|
+
rescue LoadError
|
|
35
|
+
# Pry not available, no worries
|
|
36
|
+
end
|
|
32
37
|
|
|
33
38
|
module Storazzo
|
|
34
39
|
module Common
|
|
@@ -52,8 +57,8 @@ module Storazzo
|
|
|
52
57
|
puts "[Wโ ๏ธRN] #{azure(s)}"
|
|
53
58
|
end
|
|
54
59
|
|
|
55
|
-
def err(
|
|
56
|
-
puts "[ERR๐] #{red(
|
|
60
|
+
def err(str)
|
|
61
|
+
puts "[ERR๐] #{red(str)}" # โ
|
|
57
62
|
end
|
|
58
63
|
|
|
59
64
|
def bug(s)
|
|
@@ -68,8 +73,12 @@ module Storazzo
|
|
|
68
73
|
end
|
|
69
74
|
|
|
70
75
|
def ppp(complex_object_to_colorize)
|
|
71
|
-
|
|
72
|
-
|
|
76
|
+
if defined?(Pry::ColorPrinter)
|
|
77
|
+
Pry::ColorPrinter.pp(complex_object_to_colorize)
|
|
78
|
+
else
|
|
79
|
+
require 'pp'
|
|
80
|
+
pp complex_object_to_colorize
|
|
81
|
+
end
|
|
73
82
|
end
|
|
74
83
|
|
|
75
84
|
def fatal(s)
|
|
@@ -84,6 +93,10 @@ module Storazzo
|
|
|
84
93
|
`uname`.chomp == 'Linux'
|
|
85
94
|
end
|
|
86
95
|
|
|
96
|
+
def slugify(string)
|
|
97
|
+
string.to_s.downcase.strip.gsub(/\s+/, '_').gsub(/[^\w-]/, '')
|
|
98
|
+
end
|
|
99
|
+
|
|
87
100
|
private
|
|
88
101
|
|
|
89
102
|
def _debug_true
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'google/cloud/storage'
|
|
4
|
+
require 'storazzo/common'
|
|
5
|
+
|
|
6
|
+
module Storazzo
|
|
7
|
+
module GCS
|
|
8
|
+
class Client
|
|
9
|
+
include Storazzo::Common
|
|
10
|
+
|
|
11
|
+
attr_reader :storage, :project_id, :bucket_name
|
|
12
|
+
|
|
13
|
+
def initialize(project_id = nil, bucket_name = nil)
|
|
14
|
+
@project_id = project_id || autodetect_project_id
|
|
15
|
+
@bucket_name = bucket_name || ENV['GCS_BUCKET'] || "#{@project_id}-storazzo"
|
|
16
|
+
deb "GCS Client initialized for project: #{@project_id}, bucket: #{@bucket_name}"
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def storage
|
|
20
|
+
@storage ||= Google::Cloud::Storage.new(project_id: @project_id)
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def ensure_bucket_exists
|
|
24
|
+
return true if bucket_exists?(@bucket_name)
|
|
25
|
+
|
|
26
|
+
deb "Bucket #{@bucket_name} not found. Creating it in project #{@project_id}..."
|
|
27
|
+
storage.create_bucket(@bucket_name)
|
|
28
|
+
true
|
|
29
|
+
rescue StandardError => e
|
|
30
|
+
err "Failed to ensure/create bucket #{@bucket_name}: #{e.message}"
|
|
31
|
+
false
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def autodetect_project_id
|
|
35
|
+
# Priority 1: Environment Variable
|
|
36
|
+
return ENV['GOOGLE_CLOUD_PROJECT'] if ENV['GOOGLE_CLOUD_PROJECT']
|
|
37
|
+
|
|
38
|
+
# Priority 2: Config file
|
|
39
|
+
config = Storazzo::RicDiskConfig.instance
|
|
40
|
+
config.load
|
|
41
|
+
return config.project_id if config.project_id && config.project_id != 'YOUR-PROJECT-ID'
|
|
42
|
+
|
|
43
|
+
# Priority 3: GCloud CLI default
|
|
44
|
+
begin
|
|
45
|
+
`gcloud config get-value project 2>/dev/null`.strip
|
|
46
|
+
rescue StandardError
|
|
47
|
+
nil
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def bucket_exists?(bucket_name)
|
|
52
|
+
bucket_name = bucket_name.gsub('gs://', '').split('/').first
|
|
53
|
+
!storage.bucket(bucket_name).nil?
|
|
54
|
+
rescue StandardError => e
|
|
55
|
+
warn "Error checking bucket #{bucket_name}: #{e.message}"
|
|
56
|
+
false
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def list_buckets
|
|
60
|
+
storage.buckets.map(&:name)
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def upload_file(local_path, bucket_name, remote_path)
|
|
64
|
+
bucket_name = bucket_name.gsub('gs://', '').split('/').first
|
|
65
|
+
bucket = storage.bucket(bucket_name)
|
|
66
|
+
raise "Bucket #{bucket_name} not found!" unless bucket
|
|
67
|
+
|
|
68
|
+
deb "Uploading #{local_path} to gs://#{bucket_name}/#{remote_path}..."
|
|
69
|
+
bucket.create_file(local_path, remote_path)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
def download_file(bucket_name, remote_path, local_path)
|
|
73
|
+
bucket_name = bucket_name.gsub('gs://', '').split('/').first
|
|
74
|
+
bucket = storage.bucket(bucket_name)
|
|
75
|
+
raise "Bucket #{bucket_name} not found!" unless bucket
|
|
76
|
+
|
|
77
|
+
file = bucket.file(remote_path)
|
|
78
|
+
raise "Remote file #{remote_path} not found in bucket #{bucket_name}!" unless file
|
|
79
|
+
|
|
80
|
+
deb "Downloading gs://#{bucket_name}/#{remote_path} to #{local_path}..."
|
|
81
|
+
file.download(local_path)
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
data/lib/storazzo/ric_disk.rb
CHANGED
|
@@ -4,6 +4,7 @@
|
|
|
4
4
|
# it's considered interesting if there's a ".ricdisk/.ricdisk"
|
|
5
5
|
|
|
6
6
|
require 'digest'
|
|
7
|
+
require 'securerandom'
|
|
7
8
|
|
|
8
9
|
module Storazzo
|
|
9
10
|
class RicDisk
|
|
@@ -34,7 +35,7 @@ module Storazzo
|
|
|
34
35
|
# # todo substitute with protobuf..
|
|
35
36
|
attr_accessor :name, :description, :ricdisk_file, :ricdisk_file_full, :local_mountpoint, :wr, :path,
|
|
36
37
|
:ricdisk_file_empty, :size, :active_dirs, :ricdisk_version,
|
|
37
|
-
:unique_hash # new 202207
|
|
38
|
+
:unique_hash, :disk_uuid, :llm_description, :llm_storage_type # new 202207
|
|
38
39
|
|
|
39
40
|
################################
|
|
40
41
|
## INSTANCE methods
|
|
@@ -53,7 +54,6 @@ module Storazzo
|
|
|
53
54
|
# ok back to business, now path is a String :)
|
|
54
55
|
path = ric_disk_object.path
|
|
55
56
|
deb "RicDisk initialize.. path=#{path}"
|
|
56
|
-
deb "RicDisk initialize.. path=#{path}"
|
|
57
57
|
@local_mountpoint = File.expand_path(path)
|
|
58
58
|
@ard = ric_disk_object # AbstractRicDiskObject
|
|
59
59
|
@description = "This is an automated RicDisk description from v.#{RicdiskVersion}. Created on #{Time.now}'"
|
|
@@ -62,21 +62,34 @@ module Storazzo
|
|
|
62
62
|
@ricdisk_file_full = "#{@local_mountpoint}/#{@ricdisk_file}"
|
|
63
63
|
@label = path.split('/').last
|
|
64
64
|
@name = path.split('/').last
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
65
|
+
|
|
66
|
+
load_existing_config
|
|
67
|
+
|
|
68
|
+
@tags ||= %w[ricdisk storazzo]
|
|
68
69
|
@size = RicDisk._compute_size_could_take_long(path)
|
|
69
|
-
@unique_hash = "MD5::#{Digest::MD5.hexdigest(File.expand_path(path))}"
|
|
70
|
+
@unique_hash = "MD5::#{Digest::MD5.hexdigest(File.expand_path(path))}"
|
|
71
|
+
@disk_uuid ||= SecureRandom.uuid if defined?(SecureRandom)
|
|
70
72
|
@computation_hostname = Socket.gethostname
|
|
71
|
-
@created_at
|
|
73
|
+
@created_at ||= Time.now
|
|
72
74
|
|
|
73
75
|
@ricdisk_file_empty = ricdisk_file_empty?
|
|
74
76
|
|
|
75
|
-
# @config = RicDiskConfig.instance.get_config
|
|
76
|
-
# #puts @config if @config
|
|
77
|
-
# find_info_from_mount(path)
|
|
78
77
|
deb "RicDisk initialize. to_s: #{self}"
|
|
79
|
-
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def load_existing_config
|
|
81
|
+
return unless File.exist?(@ricdisk_file_full) && !File.empty?(@ricdisk_file_full)
|
|
82
|
+
|
|
83
|
+
begin
|
|
84
|
+
config = YAML.safe_load(File.read(@ricdisk_file_full))
|
|
85
|
+
@disk_uuid = config['disk_uuid']
|
|
86
|
+
@llm_description = config['llm_description']
|
|
87
|
+
@llm_storage_type = config['llm_storage_type']
|
|
88
|
+
@tags = config['tags']
|
|
89
|
+
@name = config['name'] || @name
|
|
90
|
+
rescue StandardError => e
|
|
91
|
+
warn "Error loading existing config from #{@ricdisk_file_full}: #{e.message}"
|
|
92
|
+
end
|
|
80
93
|
end
|
|
81
94
|
|
|
82
95
|
def ricdisk_file_empty?
|
|
@@ -3,9 +3,11 @@
|
|
|
3
3
|
require 'sqlite3'
|
|
4
4
|
require 'fileutils'
|
|
5
5
|
require 'google/cloud/storage'
|
|
6
|
+
require 'storazzo/common'
|
|
6
7
|
|
|
7
8
|
module Storazzo
|
|
8
9
|
class SearchEngine
|
|
10
|
+
include Storazzo::Common
|
|
9
11
|
DB_PATH = File.expand_path("~/.storazzo_index.db")
|
|
10
12
|
|
|
11
13
|
def initialize
|
|
@@ -15,50 +17,146 @@ module Storazzo
|
|
|
15
17
|
end
|
|
16
18
|
|
|
17
19
|
def create_tables
|
|
20
|
+
@db.execute <<-SQL
|
|
21
|
+
CREATE TABLE IF NOT EXISTS disks (
|
|
22
|
+
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
23
|
+
name TEXT,
|
|
24
|
+
slug TEXT UNIQUE,
|
|
25
|
+
type TEXT,
|
|
26
|
+
uuid TEXT,
|
|
27
|
+
llm_description TEXT,
|
|
28
|
+
llm_storage_type TEXT,
|
|
29
|
+
last_scanned_at DATETIME
|
|
30
|
+
);
|
|
31
|
+
SQL
|
|
32
|
+
|
|
18
33
|
@db.execute <<-SQL
|
|
19
34
|
CREATE TABLE IF NOT EXISTS files (
|
|
20
35
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
21
36
|
md5 VARCHAR(32),
|
|
22
37
|
size INTEGER,
|
|
23
38
|
path TEXT,
|
|
24
|
-
|
|
25
|
-
|
|
39
|
+
disk_id INTEGER,
|
|
40
|
+
file_mtime DATETIME,
|
|
41
|
+
content_type TEXT,
|
|
42
|
+
ingested_at DATETIME,
|
|
43
|
+
FOREIGN KEY(disk_id) REFERENCES disks(id),
|
|
44
|
+
UNIQUE(disk_id, path)
|
|
26
45
|
);
|
|
27
46
|
SQL
|
|
28
47
|
end
|
|
29
48
|
|
|
30
|
-
def
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
49
|
+
def sync_all_from_gcs
|
|
50
|
+
client = Storazzo::GCS::Client.new
|
|
51
|
+
config = Storazzo::RicDiskConfig.instance
|
|
52
|
+
config.load
|
|
53
|
+
|
|
54
|
+
buckets = config.get_bucket_paths
|
|
55
|
+
puts "Syncing metadata from #{buckets.size} buckets..."
|
|
56
|
+
|
|
57
|
+
buckets.each do |bucket_url|
|
|
58
|
+
bucket_name = bucket_url.gsub('gs://', '').split('/').first
|
|
59
|
+
# For now, we search in the standard 'backup/ricdisk-magic/' path
|
|
60
|
+
prefix = "backup/ricdisk-magic/"
|
|
61
|
+
|
|
62
|
+
begin
|
|
63
|
+
bucket = client.storage.bucket(bucket_name)
|
|
64
|
+
next unless bucket
|
|
65
|
+
|
|
66
|
+
files = bucket.files(prefix: prefix)
|
|
67
|
+
rds_files = files.select { |f| f.name.end_with?('.rds') }
|
|
68
|
+
|
|
69
|
+
puts "--- Bucket: gs://#{bucket_name} (#{rds_files.size} catalogs found) ---"
|
|
70
|
+
|
|
71
|
+
rds_files.each do |remote_file|
|
|
72
|
+
# 1. Download to local tmp
|
|
73
|
+
local_tmp_path = File.join(Dir.tmpdir, File.basename(remote_file.name))
|
|
74
|
+
puts "Downloading #{remote_file.name}..."
|
|
75
|
+
remote_file.download(local_tmp_path)
|
|
76
|
+
|
|
77
|
+
# 2. Ingest into SQLite
|
|
78
|
+
disk_name = File.basename(remote_file.name, '.rds').gsub('-ricdisk_stats_v11', '')
|
|
79
|
+
ingest_stats_file(local_tmp_path, disk_name)
|
|
80
|
+
|
|
81
|
+
# 3. Cleanup
|
|
82
|
+
FileUtils.rm(local_tmp_path)
|
|
83
|
+
end
|
|
84
|
+
rescue => e
|
|
85
|
+
warn "Error syncing from gs://#{bucket_name}: #{e.message}"
|
|
86
|
+
end
|
|
87
|
+
end
|
|
34
88
|
end
|
|
35
89
|
|
|
36
90
|
def query(string)
|
|
37
|
-
@db.execute
|
|
91
|
+
@db.execute <<-SQL, ["%#{string}%", "%#{string}%", "%#{string}%"]
|
|
92
|
+
SELECT f.*, d.name as disk_name, d.slug as disk_slug
|
|
93
|
+
FROM files f
|
|
94
|
+
JOIN disks d ON f.disk_id = d.id
|
|
95
|
+
WHERE f.path LIKE ? OR d.name LIKE ? OR d.slug LIKE ?
|
|
96
|
+
SQL
|
|
38
97
|
end
|
|
39
98
|
|
|
99
|
+
def find_or_create_disk(disk_name, opts = {})
|
|
100
|
+
slug = slugify(disk_name)
|
|
101
|
+
type = opts[:type] || 'local'
|
|
102
|
+
uuid = opts[:uuid]
|
|
103
|
+
llm_desc = opts[:llm_description]
|
|
104
|
+
llm_storage = opts[:llm_storage_type]
|
|
105
|
+
|
|
106
|
+
@db.execute <<-SQL, [disk_name, slug, type, uuid, llm_desc, llm_storage]
|
|
107
|
+
INSERT OR IGNORE INTO disks (name, slug, type, uuid, llm_description, llm_storage_type)
|
|
108
|
+
VALUES (?, ?, ?, ?, ?, ?)
|
|
109
|
+
SQL
|
|
110
|
+
|
|
111
|
+
# Update existing record if new info is provided
|
|
112
|
+
if uuid || llm_desc || llm_storage
|
|
113
|
+
@db.execute <<-SQL, [uuid, llm_desc, llm_storage, slug]
|
|
114
|
+
UPDATE disks SET uuid = COALESCE(?, uuid),
|
|
115
|
+
llm_description = COALESCE(?, llm_description),
|
|
116
|
+
llm_storage_type = COALESCE(?, llm_storage_type)
|
|
117
|
+
WHERE slug = ?
|
|
118
|
+
SQL
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
@db.get_first_value("SELECT id FROM disks WHERE slug = ?", [slug])
|
|
122
|
+
end
|
|
123
|
+
|
|
40
124
|
def ingest_stats_file(file_path, disk_name)
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
125
|
+
disk_id = find_or_create_disk(disk_name)
|
|
126
|
+
ingested_at = Time.now.iso8601
|
|
127
|
+
|
|
128
|
+
puts "Ingesting #{file_path} (disk_id: #{disk_id})..."
|
|
129
|
+
|
|
130
|
+
@db.transaction do
|
|
131
|
+
File.readlines(file_path).each do |line|
|
|
132
|
+
next if line.start_with?('#') || line.strip.empty?
|
|
133
|
+
|
|
134
|
+
# Example format:
|
|
135
|
+
# [file_v1.2] md5 mode type datetime size [content_type] filename
|
|
136
|
+
parts = line.split(' ')
|
|
137
|
+
|
|
138
|
+
# Locating the `[content_type]` bracket
|
|
139
|
+
content_type_idx = parts.find_index { |p| p.start_with?('[') && p.end_with?(']') && p != parts.first }
|
|
140
|
+
next unless content_type_idx
|
|
141
|
+
|
|
142
|
+
md5 = parts[1]
|
|
143
|
+
file_mtime = parts[4] # Standardized creation/mod time
|
|
144
|
+
size = parts[content_type_idx - 1].to_i
|
|
145
|
+
content_type = parts[content_type_idx].gsub(/[\[\]]/, '')
|
|
146
|
+
path = parts[(content_type_idx + 1)..-1].join(' ')
|
|
147
|
+
|
|
148
|
+
begin
|
|
149
|
+
@db.execute <<-SQL, [md5, size, path, disk_id, file_mtime, content_type, ingested_at]
|
|
150
|
+
INSERT OR REPLACE INTO files (md5, size, path, disk_id, file_mtime, content_type, ingested_at)
|
|
151
|
+
VALUES (?, ?, ?, ?, ?, ?, ?)
|
|
152
|
+
SQL
|
|
153
|
+
rescue SQLite3::Exception => e
|
|
154
|
+
puts "Error inserting #{path}: #{e.message}"
|
|
155
|
+
end
|
|
60
156
|
end
|
|
61
157
|
end
|
|
158
|
+
|
|
159
|
+
@db.execute("UPDATE disks SET last_scanned_at = ? WHERE id = ?", [ingested_at, disk_id])
|
|
62
160
|
end
|
|
63
161
|
end
|
|
64
162
|
end
|
data/lib/storazzo.rb
CHANGED
|
@@ -41,6 +41,7 @@ require 'storazzo/media/abstract_ric_disk'
|
|
|
41
41
|
require 'storazzo/media/gcs_bucket'
|
|
42
42
|
require 'storazzo/media/local_folder'
|
|
43
43
|
require 'storazzo/media/mount_point'
|
|
44
|
+
require 'storazzo/gcs/client'
|
|
44
45
|
require 'storazzo/ric_disk_ugly' # OLD and 90% working
|
|
45
46
|
require 'storazzo/ric_disk_config' # => RicDiskConfif
|
|
46
47
|
require 'storazzo/ric_disk_sample_config' # => NOTHING!!
|
data/storazzo.gemspec
CHANGED
|
@@ -27,6 +27,10 @@ Gem::Specification.new do |s|
|
|
|
27
27
|
# "storazzo",
|
|
28
28
|
# "hello-storazzo",
|
|
29
29
|
# ]
|
|
30
|
+
s.metadata = {
|
|
31
|
+
"github_repo" => "https://github.com/palladius/storazzo"
|
|
32
|
+
}
|
|
33
|
+
|
|
30
34
|
s.homepage = 'https://rubygems.org/gems/storazzo' # maybe https://github.com/palladius/storazzo
|
|
31
35
|
s.license = 'MIT'
|
|
32
36
|
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
require 'minitest/autorun'
|
|
2
|
+
require 'mocha/minitest'
|
|
3
|
+
require 'storazzo'
|
|
4
|
+
require 'storazzo/gcs/client'
|
|
5
|
+
|
|
6
|
+
class GcsClientTest < Minitest::Test
|
|
7
|
+
def setup
|
|
8
|
+
ENV['GOOGLE_CLOUD_PROJECT'] = 'test-project'
|
|
9
|
+
ENV['GCS_BUCKET'] = nil
|
|
10
|
+
@mock_storage = mock()
|
|
11
|
+
Google::Cloud::Storage.stubs(:new).returns(@mock_storage)
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
def test_default_bucket_name
|
|
15
|
+
client = Storazzo::GCS::Client.new('test-project')
|
|
16
|
+
assert_equal 'test-project-storazzo', client.bucket_name
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def test_custom_bucket_name_from_env
|
|
20
|
+
ENV['GCS_BUCKET'] = 'my-custom-bucket'
|
|
21
|
+
client = Storazzo::GCS::Client.new('test-project')
|
|
22
|
+
assert_equal 'my-custom-bucket', client.bucket_name
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
def test_ensure_bucket_exists_when_it_does
|
|
26
|
+
client = Storazzo::GCS::Client.new('test-project')
|
|
27
|
+
mock_bucket = mock()
|
|
28
|
+
@mock_storage.expects(:bucket).with('test-project-storazzo').returns(mock_bucket)
|
|
29
|
+
|
|
30
|
+
assert client.ensure_bucket_exists
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def test_ensure_bucket_exists_when_it_does_not_creates_it
|
|
34
|
+
client = Storazzo::GCS::Client.new('test-project')
|
|
35
|
+
|
|
36
|
+
# First check returns nil, then create_bucket is called
|
|
37
|
+
@mock_storage.expects(:bucket).with('test-project-storazzo').returns(nil)
|
|
38
|
+
@mock_storage.expects(:create_bucket).with('test-project-storazzo').returns(true)
|
|
39
|
+
|
|
40
|
+
assert client.ensure_bucket_exists
|
|
41
|
+
end
|
|
42
|
+
end
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: storazzo
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.7.
|
|
4
|
+
version: 0.7.4
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Riccardo Carlesso
|
|
@@ -80,6 +80,7 @@ files:
|
|
|
80
80
|
- lib/storazzo/colors.rb
|
|
81
81
|
- lib/storazzo/common.rb
|
|
82
82
|
- lib/storazzo/debug.rb
|
|
83
|
+
- lib/storazzo/gcs/client.rb
|
|
83
84
|
- lib/storazzo/hashify.rb
|
|
84
85
|
- lib/storazzo/main.rb
|
|
85
86
|
- lib/storazzo/media/README.md
|
|
@@ -100,6 +101,7 @@ files:
|
|
|
100
101
|
- test/benchmark/test_hashing_functions-speed.rb
|
|
101
102
|
- test/bin/new-idea.rb
|
|
102
103
|
- test/bin/storazzo.rb
|
|
104
|
+
- test/gcs/test_client.rb
|
|
103
105
|
- test/media/test_abstract_ric_disk.rb
|
|
104
106
|
- test/media/test_gcs_bucket.rb
|
|
105
107
|
- test/media/test_local_folder.rb
|
|
@@ -118,7 +120,8 @@ files:
|
|
|
118
120
|
homepage: https://rubygems.org/gems/storazzo
|
|
119
121
|
licenses:
|
|
120
122
|
- MIT
|
|
121
|
-
metadata:
|
|
123
|
+
metadata:
|
|
124
|
+
github_repo: https://github.com/palladius/storazzo
|
|
122
125
|
rdoc_options: []
|
|
123
126
|
require_paths:
|
|
124
127
|
- lib
|
|
@@ -141,6 +144,7 @@ test_files:
|
|
|
141
144
|
- test/benchmark/test_hashing_functions-speed.rb
|
|
142
145
|
- test/bin/new-idea.rb
|
|
143
146
|
- test/bin/storazzo.rb
|
|
147
|
+
- test/gcs/test_client.rb
|
|
144
148
|
- test/media/test_abstract_ric_disk.rb
|
|
145
149
|
- test/media/test_gcs_bucket.rb
|
|
146
150
|
- test/media/test_local_folder.rb
|