parallel_sftp 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,164 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "open3"
4
+ require "fileutils"
5
+
6
+ module ParallelSftp
7
+ # Executes lftp downloads with progress tracking
8
+ class Download
9
+ attr_reader :lftp_command, :on_progress, :on_segment_progress, :output_buffer
10
+
11
+ # Default interval for polling the status file (in seconds)
12
+ DEFAULT_POLL_INTERVAL = 1
13
+
14
+ def initialize(lftp_command, on_progress: nil, on_segment_progress: nil)
15
+ @lftp_command = lftp_command
16
+ @on_progress = on_progress
17
+ @on_segment_progress = on_segment_progress
18
+ @output_buffer = []
19
+ @progress_parser = ProgressParser.new
20
+ @segment_parser = SegmentProgressParser.new
21
+ @time_estimator = TimeEstimator.new
22
+ @polling_thread = nil
23
+ @stop_polling = false
24
+ end
25
+
26
+ # Execute the download
27
+ # Returns the local file path on success
28
+ # Raises DownloadError on failure
29
+ def execute
30
+ ParallelSftp.ensure_lftp_available!
31
+
32
+ # Ensure local directory exists
33
+ FileUtils.mkdir_p(File.dirname(lftp_command.local_path))
34
+
35
+ run_lftp
36
+ end
37
+
38
+ private
39
+
40
+ def run_lftp
41
+ exit_status = nil
42
+ status_file = status_file_path
43
+
44
+ Open3.popen2e(*lftp_command.to_command) do |stdin, stdout_stderr, wait_thr|
45
+ stdin.close
46
+
47
+ # Start background polling for segment progress
48
+ start_segment_polling(status_file) if on_segment_progress
49
+
50
+ stdout_stderr.each_line do |line|
51
+ @output_buffer << line
52
+ process_output_line(line)
53
+ end
54
+
55
+ exit_status = wait_thr.value
56
+ end
57
+
58
+ stop_segment_polling
59
+ handle_result(exit_status)
60
+ end
61
+
62
+ def status_file_path
63
+ "#{lftp_command.local_path}.lftp-pget-status"
64
+ end
65
+
66
+ def start_segment_polling(status_file)
67
+ @stop_polling = false
68
+ @polling_thread = Thread.new do
69
+ poll_segment_progress(status_file)
70
+ end
71
+ end
72
+
73
+ def stop_segment_polling
74
+ @stop_polling = true
75
+ if @polling_thread&.alive?
76
+ @polling_thread.join(2) # Wait up to 2 seconds for clean shutdown
77
+ @polling_thread.kill if @polling_thread.alive?
78
+ end
79
+ end
80
+
81
+ def poll_segment_progress(status_file)
82
+ until @stop_polling
83
+ sleep DEFAULT_POLL_INTERVAL
84
+
85
+ begin
86
+ next unless File.exist?(status_file)
87
+
88
+ if @segment_parser.parse(status_file)
89
+ progress = build_segment_progress
90
+ on_segment_progress&.call(progress)
91
+ end
92
+ rescue StandardError
93
+ # Silently continue on parse errors - the file may be mid-write
94
+ end
95
+ end
96
+ end
97
+
98
+ def build_segment_progress
99
+ progress = @segment_parser.to_h
100
+ total_downloaded = progress[:total_downloaded]
101
+ total_size = progress[:total_size]
102
+
103
+ # Record sample for time estimation
104
+ @time_estimator.record(total_downloaded)
105
+
106
+ # Add calculated time estimates
107
+ progress[:speed] = @time_estimator.speed_bytes_per_second
108
+ progress[:eta] = @time_estimator.eta_formatted(total_size, total_downloaded) if total_size && total_size > 0
109
+ progress[:elapsed] = @time_estimator.elapsed_seconds
110
+ progress[:average_speed] = @time_estimator.average_speed
111
+
112
+ progress
113
+ end
114
+
115
+ def process_output_line(line)
116
+ return unless on_progress
117
+
118
+ if @progress_parser.parse(line)
119
+ on_progress.call(@progress_parser.to_h)
120
+ end
121
+ end
122
+
123
+ def handle_result(exit_status)
124
+ if exit_status.success?
125
+ verify_download
126
+ lftp_command.local_path
127
+ else
128
+ raise DownloadError.new(
129
+ "lftp exited with status #{exit_status.exitstatus}",
130
+ remote_path: lftp_command.remote_path,
131
+ exit_status: exit_status.exitstatus,
132
+ output: @output_buffer.join
133
+ )
134
+ end
135
+ end
136
+
137
+ def verify_download
138
+ unless File.exist?(lftp_command.local_path)
139
+ raise DownloadError.new(
140
+ "Downloaded file not found at expected location",
141
+ remote_path: lftp_command.remote_path,
142
+ output: @output_buffer.join
143
+ )
144
+ end
145
+
146
+ # Verify zip integrity if applicable
147
+ verify_zip_integrity if lftp_command.local_path.end_with?(".zip")
148
+ end
149
+
150
+ def verify_zip_integrity
151
+ path = lftp_command.local_path
152
+ output, status = Open3.capture2e("unzip", "-t", path)
153
+
154
+ unless status.success?
155
+ error_lines = output.lines.grep(/error:|bad zipfile|invalid compressed/).first(5).join
156
+ raise ZipIntegrityError.new(
157
+ "Zip file corrupted (possible segment boundary issue)",
158
+ path: path,
159
+ output: error_lines
160
+ )
161
+ end
162
+ end
163
+ end
164
+ end
@@ -0,0 +1,58 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ParallelSftp
4
+ # Base error class for all ParallelSftp errors
5
+ class Error < StandardError; end
6
+
7
+ # Raised when lftp is not installed or not found in PATH
8
+ class LftpNotFoundError < Error
9
+ def initialize(msg = "lftp is not installed or not found in PATH. Install with: brew install lftp (macOS) or apt install lftp (Linux)")
10
+ super
11
+ end
12
+ end
13
+
14
+ # Raised when SFTP connection fails
15
+ class ConnectionError < Error
16
+ attr_reader :host, :exit_status
17
+
18
+ def initialize(msg = nil, host: nil, exit_status: nil)
19
+ @host = host
20
+ @exit_status = exit_status
21
+ super(msg || "Failed to connect to SFTP server#{host ? ": #{host}" : ""}")
22
+ end
23
+ end
24
+
25
+ # Raised when file download fails
26
+ class DownloadError < Error
27
+ attr_reader :remote_path, :exit_status, :output
28
+
29
+ def initialize(msg = nil, remote_path: nil, exit_status: nil, output: nil)
30
+ @remote_path = remote_path
31
+ @exit_status = exit_status
32
+ @output = output
33
+ super(msg || "Failed to download file#{remote_path ? ": #{remote_path}" : ""}")
34
+ end
35
+ end
36
+
37
+ # Raised when downloaded file integrity check fails
38
+ class IntegrityError < Error
39
+ attr_reader :expected_size, :actual_size
40
+
41
+ def initialize(msg = nil, expected_size: nil, actual_size: nil)
42
+ @expected_size = expected_size
43
+ @actual_size = actual_size
44
+ super(msg || "File integrity check failed. Expected: #{expected_size}, Got: #{actual_size}")
45
+ end
46
+ end
47
+
48
+ # Raised when zip file integrity check fails (segment boundary corruption)
49
+ class ZipIntegrityError < IntegrityError
50
+ attr_reader :path, :output
51
+
52
+ def initialize(msg = nil, path: nil, output: nil)
53
+ @path = path
54
+ @output = output
55
+ super(msg || "Zip integrity check failed#{path ? ": #{path}" : ""}")
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,64 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ParallelSftp
4
+ # Builds lftp command scripts for SFTP downloads
5
+ class LftpCommand
6
+ attr_reader :host, :user, :password, :port, :remote_path, :local_path,
7
+ :segments, :timeout, :max_retries, :reconnect_interval, :resume,
8
+ :sftp_connect_program
9
+
10
+ def initialize(options = {})
11
+ @host = options.fetch(:host)
12
+ @user = options.fetch(:user)
13
+ @password = options.fetch(:password)
14
+ @port = options.fetch(:port, ParallelSftp.configuration.default_port)
15
+ @remote_path = options.fetch(:remote_path)
16
+ @local_path = options.fetch(:local_path)
17
+ @segments = options.fetch(:segments, ParallelSftp.configuration.default_segments)
18
+ @timeout = options.fetch(:timeout, ParallelSftp.configuration.timeout)
19
+ @max_retries = options.fetch(:max_retries, ParallelSftp.configuration.max_retries)
20
+ @reconnect_interval = options.fetch(:reconnect_interval, ParallelSftp.configuration.reconnect_interval)
21
+ @resume = options.fetch(:resume, true)
22
+ @sftp_connect_program = options.fetch(:sftp_connect_program,
23
+ ParallelSftp.configuration.sftp_connect_program)
24
+ end
25
+
26
+ # Generate the lftp script for download
27
+ def to_script
28
+ lines = [
29
+ "set net:timeout #{timeout}",
30
+ "set net:max-retries #{max_retries}",
31
+ "set net:reconnect-interval-base #{reconnect_interval}",
32
+ "set sftp:auto-confirm yes",
33
+ "set ssl:verify-certificate no"
34
+ ]
35
+
36
+ # Add custom SSH connect program if configured (for legacy host key algorithms)
37
+ if sftp_connect_program
38
+ lines << "set sftp:connect-program \"#{sftp_connect_program}\""
39
+ end
40
+
41
+ lines << "open -p #{port} sftp://#{user}:#{escaped_password}@#{host}"
42
+ lines << "pget -n #{segments}#{resume_flag} \"#{remote_path}\" -o \"#{local_path}\""
43
+ lines << "quit"
44
+
45
+ lines.join("\n") + "\n"
46
+ end
47
+
48
+ # Generate the full lftp command with script
49
+ def to_command
50
+ ["lftp", "-c", to_script]
51
+ end
52
+
53
+ private
54
+
55
+ def escaped_password
56
+ # Escape special characters in password for URL
57
+ password.gsub(/[^a-zA-Z0-9_.-]/) { |c| format("%%%02X", c.ord) }
58
+ end
59
+
60
+ def resume_flag
61
+ resume ? " -c" : ""
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,82 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ParallelSftp
4
+ # Parses lftp output to extract progress information
5
+ class ProgressParser
6
+ # Regex patterns for parsing lftp pget output
7
+ PROGRESS_PATTERN = /(\d+(?:\.\d+)?)\s*([KMGT]?B?)\/s/.freeze
8
+ BYTES_PATTERN = /(\d+(?:\.\d+)?)\s*([KMGT]?)B?\s+(?:of\s+)?(\d+(?:\.\d+)?)\s*([KMGT]?)B?/.freeze
9
+ PERCENT_PATTERN = /(\d+)%/.freeze
10
+ ETA_PATTERN = /eta:?\s*(\d+[hms](?:\d+[ms])?|\d+:\d+(?::\d+)?)/.freeze
11
+
12
+ attr_reader :bytes_transferred, :total_bytes, :speed, :percent, :eta
13
+
14
+ def initialize
15
+ @bytes_transferred = 0
16
+ @total_bytes = 0
17
+ @speed = 0
18
+ @percent = 0
19
+ @eta = nil
20
+ end
21
+
22
+ # Parse a line of lftp output and update progress info
23
+ # Returns true if progress was updated, false otherwise
24
+ def parse(line)
25
+ return false if line.nil? || line.strip.empty?
26
+
27
+ updated = false
28
+
29
+ # Extract percentage
30
+ if (match = line.match(PERCENT_PATTERN))
31
+ @percent = match[1].to_i
32
+ updated = true
33
+ end
34
+
35
+ # Extract speed
36
+ if (match = line.match(PROGRESS_PATTERN))
37
+ @speed = parse_size(match[1], match[2])
38
+ updated = true
39
+ end
40
+
41
+ # Extract bytes transferred and total
42
+ if (match = line.match(BYTES_PATTERN))
43
+ @bytes_transferred = parse_size(match[1], match[2])
44
+ @total_bytes = parse_size(match[3], match[4])
45
+ updated = true
46
+ end
47
+
48
+ # Extract ETA
49
+ if (match = line.match(ETA_PATTERN))
50
+ @eta = match[1]
51
+ updated = true
52
+ end
53
+
54
+ updated
55
+ end
56
+
57
+ # Return progress info as a hash
58
+ def to_h
59
+ {
60
+ bytes_transferred: bytes_transferred,
61
+ total_bytes: total_bytes,
62
+ speed: speed,
63
+ percent: percent,
64
+ eta: eta
65
+ }
66
+ end
67
+
68
+ private
69
+
70
+ def parse_size(value, unit)
71
+ base = value.to_f
72
+ multiplier = case unit.upcase.gsub("B", "")
73
+ when "K" then 1024
74
+ when "M" then 1024**2
75
+ when "G" then 1024**3
76
+ when "T" then 1024**4
77
+ else 1
78
+ end
79
+ (base * multiplier).to_i
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,153 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ParallelSftp
4
+ # Parses lftp's .lftp-pget-status file to extract per-segment progress
5
+ #
6
+ # The status file format is:
7
+ # size=20955686931
8
+ # 0.pos=57442304
9
+ # 0.limit=2619460869
10
+ # 1.pos=2670611717
11
+ # 1.limit=5238921735
12
+ # ...
13
+ #
14
+ # Where:
15
+ # - size: Total file size in bytes
16
+ # - N.pos: Current position (bytes downloaded) for segment N
17
+ # - N.limit: End position (byte limit) for segment N
18
+ class SegmentProgressParser
19
+ # Represents a single download segment
20
+ Segment = Struct.new(:index, :pos, :limit, :start, keyword_init: true) do
21
+ def downloaded
22
+ pos - start
23
+ end
24
+
25
+ def segment_size
26
+ limit - start
27
+ end
28
+
29
+ def percent
30
+ return 0.0 if segment_size.zero?
31
+ ((downloaded.to_f / segment_size) * 100).round(1)
32
+ end
33
+ end
34
+
35
+ attr_reader :total_size, :segments
36
+
37
+ def initialize
38
+ @total_size = nil
39
+ @segments = []
40
+ end
41
+
42
+ # Parse a status file from disk
43
+ #
44
+ # @param status_file_path [String] Path to the .lftp-pget-status file
45
+ # @return [Boolean] true if file was parsed successfully, false otherwise
46
+ def parse(status_file_path)
47
+ return false unless File.exist?(status_file_path)
48
+
49
+ content = File.read(status_file_path)
50
+ parse_content(content)
51
+ end
52
+
53
+ # Parse status file content directly
54
+ #
55
+ # @param content [String] Content of the status file
56
+ # @return [Boolean] true if content was parsed successfully
57
+ def parse_content(content)
58
+ @segments = []
59
+ @total_size = nil
60
+ segment_data = {}
61
+
62
+ lines = content.strip.split("\n")
63
+
64
+ lines.each do |line|
65
+ case line
66
+ when /^size=(-?\d+)/
67
+ @total_size = ::Regexp.last_match(1).to_i
68
+ when /^(\d+)\.pos=(\d+)/
69
+ idx = ::Regexp.last_match(1).to_i
70
+ pos = ::Regexp.last_match(2).to_i
71
+ segment_data[idx] ||= {}
72
+ segment_data[idx][:pos] = pos
73
+ when /^(\d+)\.limit=(\d+)/
74
+ idx = ::Regexp.last_match(1).to_i
75
+ limit = ::Regexp.last_match(2).to_i
76
+ segment_data[idx] ||= {}
77
+ segment_data[idx][:limit] = limit
78
+ end
79
+ end
80
+
81
+ build_segments(segment_data)
82
+ true
83
+ end
84
+
85
+ # Convert to hash representation
86
+ #
87
+ # @return [Hash] Hash with total_size, segments array, total_downloaded, and overall_percent
88
+ def to_h
89
+ {
90
+ total_size: total_size,
91
+ segments: segments.map { |s| segment_to_h(s) },
92
+ total_downloaded: total_downloaded,
93
+ overall_percent: overall_percent
94
+ }
95
+ end
96
+
97
+ # Total bytes downloaded across all segments
98
+ #
99
+ # @return [Integer] Total bytes downloaded
100
+ def total_downloaded
101
+ segments.sum(&:downloaded)
102
+ end
103
+
104
+ # Overall download percentage
105
+ #
106
+ # @return [Float] Percentage complete (0.0 to 100.0)
107
+ def overall_percent
108
+ return 0.0 if total_size.nil? || total_size <= 0
109
+ ((total_downloaded.to_f / total_size) * 100).round(1)
110
+ end
111
+
112
+ private
113
+
114
+ def build_segments(segment_data)
115
+ return if segment_data.empty?
116
+
117
+ # Sort by index to ensure correct order
118
+ sorted_indices = segment_data.keys.sort
119
+
120
+ sorted_indices.each_with_index do |idx, position|
121
+ data = segment_data[idx]
122
+ next unless data[:pos] && data[:limit]
123
+
124
+ # Calculate start position: segment 0 starts at 0, others start at previous segment's limit
125
+ start = if position.zero?
126
+ 0
127
+ else
128
+ prev_idx = sorted_indices[position - 1]
129
+ segment_data[prev_idx][:limit]
130
+ end
131
+
132
+ @segments << Segment.new(
133
+ index: idx,
134
+ pos: data[:pos],
135
+ limit: data[:limit],
136
+ start: start
137
+ )
138
+ end
139
+ end
140
+
141
+ def segment_to_h(segment)
142
+ {
143
+ index: segment.index,
144
+ pos: segment.pos,
145
+ limit: segment.limit,
146
+ start: segment.start,
147
+ downloaded: segment.downloaded,
148
+ segment_size: segment.segment_size,
149
+ percent: segment.percent
150
+ }
151
+ end
152
+ end
153
+ end
@@ -0,0 +1,122 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ParallelSftp
4
+ # Calculates download speed and time estimates using a moving window of samples
5
+ class TimeEstimator
6
+ # Represents a single progress sample
7
+ Sample = Struct.new(:bytes, :time, keyword_init: true)
8
+
9
+ attr_reader :window_size
10
+
11
+ # Initialize a new TimeEstimator
12
+ #
13
+ # @param window_size [Integer] Number of samples to keep for speed calculation (default: 10)
14
+ def initialize(window_size: 10)
15
+ @samples = []
16
+ @window_size = window_size
17
+ @start_time = nil
18
+ @start_bytes = nil
19
+ end
20
+
21
+ # Record a progress sample
22
+ #
23
+ # @param bytes_downloaded [Integer] Total bytes downloaded so far
24
+ # @param timestamp [Time] Time of the sample (default: Time.now)
25
+ def record(bytes_downloaded, timestamp = Time.now)
26
+ @start_time ||= timestamp
27
+ @start_bytes ||= bytes_downloaded
28
+
29
+ @samples << Sample.new(bytes: bytes_downloaded, time: timestamp)
30
+ @samples.shift if @samples.size > @window_size
31
+ end
32
+
33
+ # Calculate current download speed based on recent samples
34
+ #
35
+ # @return [Integer, nil] Speed in bytes per second, or nil if insufficient data
36
+ def speed_bytes_per_second
37
+ return nil if @samples.size < 2
38
+
39
+ first = @samples.first
40
+ last = @samples.last
41
+
42
+ bytes_delta = last.bytes - first.bytes
43
+ time_delta = last.time - first.time
44
+
45
+ return nil if time_delta <= 0
46
+ (bytes_delta / time_delta).round
47
+ end
48
+
49
+ # Calculate estimated time remaining
50
+ #
51
+ # @param total_bytes [Integer] Total file size in bytes
52
+ # @param current_bytes [Integer] Current bytes downloaded
53
+ # @return [Integer, nil] Estimated seconds remaining, or nil if cannot calculate
54
+ def eta_seconds(total_bytes, current_bytes)
55
+ speed = speed_bytes_per_second
56
+ return nil if speed.nil? || speed <= 0
57
+
58
+ remaining = total_bytes - current_bytes
59
+ return 0 if remaining <= 0
60
+
61
+ (remaining.to_f / speed).round
62
+ end
63
+
64
+ # Calculate estimated time remaining as a formatted string
65
+ #
66
+ # @param total_bytes [Integer] Total file size in bytes
67
+ # @param current_bytes [Integer] Current bytes downloaded
68
+ # @return [String, nil] Formatted duration (e.g., "1h25m", "5m30s"), or nil
69
+ def eta_formatted(total_bytes, current_bytes)
70
+ seconds = eta_seconds(total_bytes, current_bytes)
71
+ return nil if seconds.nil?
72
+
73
+ format_duration(seconds)
74
+ end
75
+
76
+ # Elapsed time since first sample
77
+ #
78
+ # @return [Integer] Elapsed seconds
79
+ def elapsed_seconds
80
+ return 0 if @start_time.nil?
81
+ (Time.now - @start_time).round
82
+ end
83
+
84
+ # Average speed since start of download
85
+ #
86
+ # @return [Integer, nil] Average speed in bytes per second, or nil
87
+ def average_speed
88
+ return nil if @start_time.nil? || @samples.empty?
89
+
90
+ elapsed = Time.now - @start_time
91
+ return nil if elapsed <= 0
92
+
93
+ bytes = @samples.last.bytes - @start_bytes
94
+ (bytes / elapsed).round
95
+ end
96
+
97
+ # Clear all recorded samples and reset state
98
+ def reset!
99
+ @samples = []
100
+ @start_time = nil
101
+ @start_bytes = nil
102
+ end
103
+
104
+ private
105
+
106
+ def format_duration(seconds)
107
+ return "0s" if seconds <= 0
108
+
109
+ hours = seconds / 3600
110
+ minutes = (seconds % 3600) / 60
111
+ secs = seconds % 60
112
+
113
+ if hours > 0
114
+ "#{hours}h#{minutes}m"
115
+ elsif minutes > 0
116
+ "#{minutes}m#{secs}s"
117
+ else
118
+ "#{secs}s"
119
+ end
120
+ end
121
+ end
122
+ end
@@ -0,0 +1,3 @@
1
+ module ParallelSftp
2
+ VERSION = "0.3.0"
3
+ end