parallel_sftp 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.envrc +1 -0
- data/.gitignore +18 -0
- data/.rspec +3 -0
- data/CHANGELOG.md +45 -0
- data/CLAUDE.md +178 -0
- data/Gemfile +6 -0
- data/LICENSE +21 -0
- data/README.md +186 -0
- data/Rakefile +6 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/lib/parallel_sftp/client.rb +108 -0
- data/lib/parallel_sftp/configuration.rb +41 -0
- data/lib/parallel_sftp/download.rb +164 -0
- data/lib/parallel_sftp/errors.rb +58 -0
- data/lib/parallel_sftp/lftp_command.rb +64 -0
- data/lib/parallel_sftp/progress_parser.rb +82 -0
- data/lib/parallel_sftp/segment_progress_parser.rb +153 -0
- data/lib/parallel_sftp/time_estimator.rb +122 -0
- data/lib/parallel_sftp/version.rb +3 -0
- data/lib/parallel_sftp.rb +128 -0
- data/parallel_sftp.gemspec +35 -0
- data/shell.nix +29 -0
- metadata +109 -0
checksums.yaml
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
---
|
|
2
|
+
SHA256:
|
|
3
|
+
metadata.gz: '0899099ab70f5bc7db699b0efb103fc2670fa1519b7aff996a095abeef41caed'
|
|
4
|
+
data.tar.gz: e83babc2dd7d491cb3a32f7699a23b9366f243973ca6d6c4ae6b4b4b1075a627
|
|
5
|
+
SHA512:
|
|
6
|
+
metadata.gz: 33c36ce67292ba5fc79873d4db509bbd665e1a21968cad089ea530e0cf053f51f3cf8f17b5521c25a3696ebf9df392067c558176fcefde279667049796d35e35
|
|
7
|
+
data.tar.gz: 5a947dffa20be7cb1969166c4231b0588db67f21e1d070a3e3a8fdd5eb3beea628a95717f36e8107e1b60780311b6d309114692dd1bf5742be8f4b35538d626a
|
data/.envrc
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
use nix
|
data/.gitignore
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
/.bundle/
|
|
2
|
+
/.yardoc
|
|
3
|
+
/_yardoc/
|
|
4
|
+
/coverage/
|
|
5
|
+
/doc/
|
|
6
|
+
/pkg/
|
|
7
|
+
/spec/reports/
|
|
8
|
+
/tmp/
|
|
9
|
+
/.gems/
|
|
10
|
+
|
|
11
|
+
# rspec failure tracking
|
|
12
|
+
.rspec_status
|
|
13
|
+
|
|
14
|
+
# Gem build artifacts
|
|
15
|
+
*.gem
|
|
16
|
+
|
|
17
|
+
# Bundler lockfile (gems should test against fresh dependency resolution)
|
|
18
|
+
Gemfile.lock
|
data/.rspec
ADDED
data/CHANGELOG.md
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# Changelog
|
|
2
|
+
|
|
3
|
+
All notable changes to this project will be documented in this file.
|
|
4
|
+
|
|
5
|
+
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
|
|
6
|
+
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
|
|
7
|
+
|
|
8
|
+
## [Unreleased]
|
|
9
|
+
|
|
10
|
+
## [0.3.0] - 2026-02-05
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
- `ZipIntegrityError` class for zip file corruption detection
|
|
14
|
+
- Automatic zip integrity verification using `unzip -t` after download
|
|
15
|
+
- Auto-retry on zip corruption with parallel-first strategy
|
|
16
|
+
- New `retry_on_corruption` option (default: true) to enable/disable auto-retry
|
|
17
|
+
- New `parallel_retries` option (default: 2) - retries with same segment count before reducing
|
|
18
|
+
|
|
19
|
+
### Changed
|
|
20
|
+
- Downloads now verify zip integrity before returning success
|
|
21
|
+
- Corrupted downloads are automatically cleaned up before retry
|
|
22
|
+
|
|
23
|
+
## [0.2.0] - 2026-02-04
|
|
24
|
+
|
|
25
|
+
### Added
|
|
26
|
+
- Per-segment progress tracking via `on_segment_progress` callback
|
|
27
|
+
- `SegmentProgressParser` class for parsing `.lftp-pget-status` files
|
|
28
|
+
- `TimeEstimator` class for calculating download speed and ETA with moving window
|
|
29
|
+
- Calculated time estimates independent of lftp's reported ETA
|
|
30
|
+
- Elapsed time tracking since download start
|
|
31
|
+
- Average speed calculation from download start
|
|
32
|
+
|
|
33
|
+
## [0.1.0] - 2026-02-04
|
|
34
|
+
|
|
35
|
+
### Added
|
|
36
|
+
- Initial release
|
|
37
|
+
- `ParallelSftp.download` one-liner for simple downloads
|
|
38
|
+
- `ParallelSftp::Client` for multiple downloads with shared connection settings
|
|
39
|
+
- Parallel/segmented downloads via lftp's `pget` command
|
|
40
|
+
- Resume support for interrupted downloads
|
|
41
|
+
- Progress callbacks with percent, speed, ETA
|
|
42
|
+
- Global configuration with `ParallelSftp.configure`
|
|
43
|
+
- `optimize_for_large_files!` preset for 20GB+ files
|
|
44
|
+
- Error classes: `LftpNotFoundError`, `ConnectionError`, `DownloadError`, `IntegrityError`
|
|
45
|
+
- lftp availability check with `lftp_available?` and `lftp_version`
|
data/CLAUDE.md
ADDED
|
@@ -0,0 +1,178 @@
|
|
|
1
|
+
# parallel_sftp Gem Guidelines
|
|
2
|
+
|
|
3
|
+
> **Purpose**: Fast parallel SFTP downloads using lftp's segmented transfer (`pget` command)
|
|
4
|
+
|
|
5
|
+
## Project Overview
|
|
6
|
+
|
|
7
|
+
- **Test Framework**: RSpec
|
|
8
|
+
- **Ruby Version**: >= 2.5.0
|
|
9
|
+
- **External Dependency**: lftp (must be installed on system)
|
|
10
|
+
- **License**: MIT
|
|
11
|
+
|
|
12
|
+
## Quick Commands
|
|
13
|
+
|
|
14
|
+
```bash
|
|
15
|
+
# Run all tests
|
|
16
|
+
bundle exec rspec
|
|
17
|
+
|
|
18
|
+
# Run specific test file
|
|
19
|
+
bundle exec rspec spec/parallel_sftp/segment_progress_parser_spec.rb
|
|
20
|
+
|
|
21
|
+
# Run with documentation format
|
|
22
|
+
bundle exec rspec --format documentation
|
|
23
|
+
|
|
24
|
+
# Integration tests (require SFTP server)
|
|
25
|
+
SFTP_TEST_HOST=host SFTP_TEST_USER=user SFTP_TEST_PASSWORD=pass bundle exec rspec spec/integration
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Architecture
|
|
29
|
+
|
|
30
|
+
```
|
|
31
|
+
lib/parallel_sftp/
|
|
32
|
+
├── client.rb # High-level API, creates LftpCommand and Download
|
|
33
|
+
├── configuration.rb # Global settings (segments, timeout, retries)
|
|
34
|
+
├── download.rb # Executes lftp, handles progress callbacks
|
|
35
|
+
├── errors.rb # Custom error classes
|
|
36
|
+
├── lftp_command.rb # Builds lftp script with pget command
|
|
37
|
+
├── progress_parser.rb # Parses lftp stdout for progress info
|
|
38
|
+
├── segment_progress_parser.rb # Parses .lftp-pget-status file for per-segment progress
|
|
39
|
+
├── time_estimator.rb # Calculates speed/ETA with moving window
|
|
40
|
+
└── version.rb # Gem version
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
## Key Patterns
|
|
44
|
+
|
|
45
|
+
### Adding New Options
|
|
46
|
+
|
|
47
|
+
When adding a new option that flows through the API:
|
|
48
|
+
|
|
49
|
+
1. Add to `Configuration` class (with default)
|
|
50
|
+
2. Add to `LftpCommand#initialize` parameters
|
|
51
|
+
3. Add to `Client#download` options handling
|
|
52
|
+
4. Add to `ParallelSftp.download` module method
|
|
53
|
+
5. Update specs for all affected classes
|
|
54
|
+
|
|
55
|
+
### Progress Callbacks
|
|
56
|
+
|
|
57
|
+
Two callback types exist:
|
|
58
|
+
|
|
59
|
+
- **`on_progress`**: Parsed from lftp stdout (percent, speed, eta)
|
|
60
|
+
- **`on_segment_progress`**: Polled from `.lftp-pget-status` file (per-segment detail)
|
|
61
|
+
|
|
62
|
+
### Thread Safety
|
|
63
|
+
|
|
64
|
+
The `Download` class spawns a background thread for segment progress polling. Key patterns:
|
|
65
|
+
|
|
66
|
+
```ruby
|
|
67
|
+
# Start polling
|
|
68
|
+
@stop_polling = false
|
|
69
|
+
@polling_thread = Thread.new { poll_segment_progress(status_file) }
|
|
70
|
+
|
|
71
|
+
# Stop polling (with timeout)
|
|
72
|
+
@stop_polling = true
|
|
73
|
+
@polling_thread.join(2)
|
|
74
|
+
@polling_thread.kill if @polling_thread.alive?
|
|
75
|
+
```
|
|
76
|
+
|
|
77
|
+
### lftp Status File Format
|
|
78
|
+
|
|
79
|
+
lftp creates `{filename}.lftp-pget-status` during pget downloads:
|
|
80
|
+
|
|
81
|
+
```
|
|
82
|
+
size=20955686931
|
|
83
|
+
0.pos=57442304
|
|
84
|
+
0.limit=2619460869
|
|
85
|
+
1.pos=2670611717
|
|
86
|
+
1.limit=5238921735
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
- `size`: Total file size (-2 indicates unknown/error)
|
|
90
|
+
- `N.pos`: Current byte position for segment N
|
|
91
|
+
- `N.limit`: End byte position for segment N
|
|
92
|
+
- Segment start = previous segment's limit (or 0 for segment 0)
|
|
93
|
+
|
|
94
|
+
### Password Escaping
|
|
95
|
+
|
|
96
|
+
Special characters in passwords must be URL-encoded for lftp:
|
|
97
|
+
|
|
98
|
+
```ruby
|
|
99
|
+
# In LftpCommand
|
|
100
|
+
def escaped_password
|
|
101
|
+
CGI.escape(password)
|
|
102
|
+
end
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### Legacy SSH Server Support
|
|
106
|
+
|
|
107
|
+
Some servers only offer `ssh-rsa` host keys. Configure via:
|
|
108
|
+
|
|
109
|
+
```ruby
|
|
110
|
+
ParallelSftp.configure do |config|
|
|
111
|
+
config.sftp_connect_program = 'ssh -o HostKeyAlgorithms=+ssh-rsa -o PubkeyAcceptedKeyTypes=+ssh-rsa'
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# Or pass explicitly (recommended for rake tasks)
|
|
115
|
+
ParallelSftp.download(
|
|
116
|
+
sftp_connect_program: 'ssh -o HostKeyAlgorithms=+ssh-rsa',
|
|
117
|
+
...
|
|
118
|
+
)
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## Testing Patterns
|
|
122
|
+
|
|
123
|
+
### Mocking lftp Availability
|
|
124
|
+
|
|
125
|
+
```ruby
|
|
126
|
+
before do
|
|
127
|
+
allow(ParallelSftp).to receive(:lftp_available?).and_return(true)
|
|
128
|
+
end
|
|
129
|
+
```
|
|
130
|
+
|
|
131
|
+
### Mocking Download Execution
|
|
132
|
+
|
|
133
|
+
```ruby
|
|
134
|
+
download_mock = instance_double(ParallelSftp::Download, execute: local_path)
|
|
135
|
+
allow(ParallelSftp::Download).to receive(:new).and_return(download_mock)
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
### Testing Progress Callbacks
|
|
139
|
+
|
|
140
|
+
```ruby
|
|
141
|
+
it "passes progress callback to Download" do
|
|
142
|
+
progress_callback = ->(info) { puts info }
|
|
143
|
+
|
|
144
|
+
expect(ParallelSftp::Download).to receive(:new)
|
|
145
|
+
.with(anything, on_progress: progress_callback, on_segment_progress: nil)
|
|
146
|
+
.and_return(download_mock)
|
|
147
|
+
|
|
148
|
+
client.download(remote_path, local_path, on_progress: progress_callback)
|
|
149
|
+
end
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### Using Tempfile for Status File Tests
|
|
153
|
+
|
|
154
|
+
```ruby
|
|
155
|
+
Tempfile.create("status") do |f|
|
|
156
|
+
f.write("size=1000\n0.pos=500\n0.limit=1000\n")
|
|
157
|
+
f.rewind
|
|
158
|
+
expect(parser.parse(f.path)).to be true
|
|
159
|
+
end
|
|
160
|
+
```
|
|
161
|
+
|
|
162
|
+
## Error Handling
|
|
163
|
+
|
|
164
|
+
Custom errors inherit from `ParallelSftp::Error`:
|
|
165
|
+
|
|
166
|
+
- `LftpNotFoundError` - lftp not installed
|
|
167
|
+
- `ConnectionError` - SFTP connection failed (includes host, exit_status)
|
|
168
|
+
- `DownloadError` - Transfer failed (includes remote_path, exit_status, output)
|
|
169
|
+
- `IntegrityError` - File size mismatch (includes expected_size, actual_size)
|
|
170
|
+
|
|
171
|
+
## Code Style
|
|
172
|
+
|
|
173
|
+
- Frozen string literals in all files
|
|
174
|
+
- RSpec `described_class` for subject
|
|
175
|
+
- Keyword arguments for complex methods
|
|
176
|
+
- Guard clauses for early returns
|
|
177
|
+
- Struct for simple data objects (e.g., `Segment`, `Sample`)
|
|
178
|
+
|
data/Gemfile
ADDED
data/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 Nestor G Pestelos Jr
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
|
@@ -0,0 +1,186 @@
|
|
|
1
|
+
# ParallelSftp
|
|
2
|
+
|
|
3
|
+
Fast parallel SFTP downloads using lftp's segmented transfer. This gem wraps lftp to enable multi-connection parallel downloads for large files.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- Parallel/segmented downloads using multiple connections
|
|
8
|
+
- Resume interrupted downloads
|
|
9
|
+
- Progress callbacks for monitoring
|
|
10
|
+
- Configurable retry and timeout settings
|
|
11
|
+
- Optimized presets for large files (20GB+)
|
|
12
|
+
|
|
13
|
+
## Requirements
|
|
14
|
+
|
|
15
|
+
- **lftp** must be installed on the system
|
|
16
|
+
|
|
17
|
+
### Installing lftp
|
|
18
|
+
|
|
19
|
+
**macOS:**
|
|
20
|
+
```bash
|
|
21
|
+
brew install lftp
|
|
22
|
+
```
|
|
23
|
+
|
|
24
|
+
**Ubuntu/Debian:**
|
|
25
|
+
```bash
|
|
26
|
+
apt install lftp
|
|
27
|
+
```
|
|
28
|
+
|
|
29
|
+
**Heroku:**
|
|
30
|
+
```bash
|
|
31
|
+
heroku buildpacks:add --index 1 https://github.com/heroku/heroku-buildpack-apt
|
|
32
|
+
echo "lftp" > Aptfile
|
|
33
|
+
git add Aptfile && git commit -m "Add lftp via apt buildpack"
|
|
34
|
+
git push heroku master
|
|
35
|
+
```
|
|
36
|
+
|
|
37
|
+
## Installation
|
|
38
|
+
|
|
39
|
+
Add this line to your application's Gemfile:
|
|
40
|
+
|
|
41
|
+
```ruby
|
|
42
|
+
gem 'parallel_sftp'
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
And then execute:
|
|
46
|
+
|
|
47
|
+
```bash
|
|
48
|
+
bundle install
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## Usage
|
|
52
|
+
|
|
53
|
+
### Simple one-liner
|
|
54
|
+
|
|
55
|
+
```ruby
|
|
56
|
+
require 'parallel_sftp'
|
|
57
|
+
|
|
58
|
+
ParallelSftp.download(
|
|
59
|
+
host: 'sftp.example.com',
|
|
60
|
+
user: 'username',
|
|
61
|
+
password: 'secret',
|
|
62
|
+
remote_path: '/path/to/large_file.zip',
|
|
63
|
+
local_path: '/tmp/large_file.zip'
|
|
64
|
+
)
|
|
65
|
+
```
|
|
66
|
+
|
|
67
|
+
### Using the Client
|
|
68
|
+
|
|
69
|
+
```ruby
|
|
70
|
+
client = ParallelSftp::Client.new(
|
|
71
|
+
host: 'sftp.example.com',
|
|
72
|
+
user: 'username',
|
|
73
|
+
password: 'secret',
|
|
74
|
+
port: 22
|
|
75
|
+
)
|
|
76
|
+
|
|
77
|
+
# Basic download
|
|
78
|
+
client.download('/remote/file.zip', '/local/file.zip')
|
|
79
|
+
|
|
80
|
+
# With options
|
|
81
|
+
client.download('/remote/file.zip', '/local/file.zip',
|
|
82
|
+
segments: 8, # parallel connections (default: 4)
|
|
83
|
+
resume: true, # continue interrupted downloads (default: true)
|
|
84
|
+
timeout: 60, # connection timeout seconds (default: 30)
|
|
85
|
+
max_retries: 15, # retry attempts (default: 10)
|
|
86
|
+
on_progress: ->(info) { puts "#{info[:percent]}%" }
|
|
87
|
+
)
|
|
88
|
+
```
|
|
89
|
+
|
|
90
|
+
### Progress Callback
|
|
91
|
+
|
|
92
|
+
The progress callback receives a hash with:
|
|
93
|
+
|
|
94
|
+
```ruby
|
|
95
|
+
{
|
|
96
|
+
percent: 45, # percentage complete
|
|
97
|
+
bytes_transferred: 1073741824, # bytes downloaded
|
|
98
|
+
total_bytes: 21474836480, # total file size
|
|
99
|
+
speed: 10485760, # bytes per second
|
|
100
|
+
eta: "30m" # estimated time remaining
|
|
101
|
+
}
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### Global Configuration
|
|
105
|
+
|
|
106
|
+
```ruby
|
|
107
|
+
ParallelSftp.configure do |config|
|
|
108
|
+
config.default_segments = 8 # parallel connections
|
|
109
|
+
config.timeout = 60 # connection timeout
|
|
110
|
+
config.max_retries = 15 # retry attempts
|
|
111
|
+
config.reconnect_interval = 10 # seconds between retries
|
|
112
|
+
config.default_port = 22 # SFTP port
|
|
113
|
+
end
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
### Large File Optimization
|
|
117
|
+
|
|
118
|
+
For files 20GB+, use the optimized settings:
|
|
119
|
+
|
|
120
|
+
```ruby
|
|
121
|
+
ParallelSftp.configuration.optimize_for_large_files!
|
|
122
|
+
# Sets: segments=8, timeout=60, max_retries=15, reconnect_interval=10
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## Tuned Settings
|
|
126
|
+
|
|
127
|
+
| Setting | Default | Large File (20GB+) | Purpose |
|
|
128
|
+
|---------|---------|-------------------|---------|
|
|
129
|
+
| `segments` | 4 | 8 | More parallel connections |
|
|
130
|
+
| `timeout` | 30 | 60 | Longer timeout for slow starts |
|
|
131
|
+
| `max_retries` | 10 | 15 | More retries for flaky connections |
|
|
132
|
+
| `reconnect_interval` | 5 | 10 | Wait longer between retries |
|
|
133
|
+
|
|
134
|
+
## Error Handling
|
|
135
|
+
|
|
136
|
+
```ruby
|
|
137
|
+
begin
|
|
138
|
+
ParallelSftp.download(...)
|
|
139
|
+
rescue ParallelSftp::LftpNotFoundError
|
|
140
|
+
# lftp is not installed
|
|
141
|
+
rescue ParallelSftp::ConnectionError => e
|
|
142
|
+
# SFTP connection failed
|
|
143
|
+
puts e.host
|
|
144
|
+
puts e.exit_status
|
|
145
|
+
rescue ParallelSftp::DownloadError => e
|
|
146
|
+
# Transfer failed
|
|
147
|
+
puts e.remote_path
|
|
148
|
+
puts e.exit_status
|
|
149
|
+
puts e.output
|
|
150
|
+
rescue ParallelSftp::IntegrityError => e
|
|
151
|
+
# File size mismatch
|
|
152
|
+
puts e.expected_size
|
|
153
|
+
puts e.actual_size
|
|
154
|
+
end
|
|
155
|
+
```
|
|
156
|
+
|
|
157
|
+
## Checking lftp Availability
|
|
158
|
+
|
|
159
|
+
```ruby
|
|
160
|
+
if ParallelSftp.lftp_available?
|
|
161
|
+
puts "lftp version: #{ParallelSftp.lftp_version}"
|
|
162
|
+
else
|
|
163
|
+
puts "lftp is not installed"
|
|
164
|
+
end
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
## Development
|
|
168
|
+
|
|
169
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt.
|
|
170
|
+
|
|
171
|
+
### Running Integration Tests
|
|
172
|
+
|
|
173
|
+
Integration tests require a running SFTP server. Set these environment variables:
|
|
174
|
+
|
|
175
|
+
```bash
|
|
176
|
+
export SFTP_TEST_HOST=your-sftp-server.com
|
|
177
|
+
export SFTP_TEST_USER=username
|
|
178
|
+
export SFTP_TEST_PASSWORD=password
|
|
179
|
+
export SFTP_TEST_FILE=/path/to/test/file.txt
|
|
180
|
+
|
|
181
|
+
rspec spec/integration
|
|
182
|
+
```
|
|
183
|
+
|
|
184
|
+
## License
|
|
185
|
+
|
|
186
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
data/Rakefile
ADDED
data/bin/console
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
|
|
3
|
+
require "bundler/setup"
|
|
4
|
+
require "parallel_sftp"
|
|
5
|
+
|
|
6
|
+
# You can add fixtures and/or initialization code here to make experimenting
|
|
7
|
+
# with your gem easier. You can also use a different console, if you like.
|
|
8
|
+
|
|
9
|
+
# (If you use this, don't forget to add pry to your Gemfile!)
|
|
10
|
+
# require "pry"
|
|
11
|
+
# Pry.start
|
|
12
|
+
|
|
13
|
+
require "irb"
|
|
14
|
+
IRB.start(__FILE__)
|
data/bin/setup
ADDED
|
@@ -0,0 +1,108 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "fileutils"
|
|
4
|
+
|
|
5
|
+
module ParallelSftp
|
|
6
|
+
# SFTP client for parallel downloads
|
|
7
|
+
class Client
|
|
8
|
+
attr_reader :host, :user, :password, :port
|
|
9
|
+
|
|
10
|
+
# Default number of times to retry with same segment count before reducing
|
|
11
|
+
DEFAULT_PARALLEL_RETRIES = 2
|
|
12
|
+
|
|
13
|
+
def initialize(options = {})
|
|
14
|
+
@host = options.fetch(:host)
|
|
15
|
+
@user = options.fetch(:user)
|
|
16
|
+
@password = options.fetch(:password)
|
|
17
|
+
@port = options.fetch(:port, ParallelSftp.configuration.default_port)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Download a file from the remote server
|
|
21
|
+
#
|
|
22
|
+
# @param remote_path [String] Path to the file on the remote server
|
|
23
|
+
# @param local_path [String] Local path to save the file
|
|
24
|
+
# @param options [Hash] Download options
|
|
25
|
+
# @option options [Integer] :segments Number of parallel connections (default: 4)
|
|
26
|
+
# @option options [Boolean] :resume Continue interrupted downloads (default: true)
|
|
27
|
+
# @option options [Integer] :timeout Connection timeout in seconds (default: 30)
|
|
28
|
+
# @option options [Integer] :max_retries Maximum retry attempts (default: 10)
|
|
29
|
+
# @option options [Integer] :reconnect_interval Seconds between retries (default: 5)
|
|
30
|
+
# @option options [Proc] :on_progress Progress callback receiving hash with :percent, :speed, etc.
|
|
31
|
+
# @option options [Proc] :on_segment_progress Per-segment progress callback receiving hash with
|
|
32
|
+
# :total_size, :segments, :total_downloaded, :overall_percent, :speed, :eta, :elapsed
|
|
33
|
+
# @option options [Boolean] :retry_on_corruption Auto-retry on zip corruption (default: true)
|
|
34
|
+
# @option options [Integer] :parallel_retries Times to retry with same segments before reducing (default: 2)
|
|
35
|
+
#
|
|
36
|
+
# @return [String] Local path to the downloaded file
|
|
37
|
+
# @raise [DownloadError] if download fails
|
|
38
|
+
# @raise [ZipIntegrityError] if zip corruption persists after all retries
|
|
39
|
+
def download(remote_path, local_path, options = {})
|
|
40
|
+
segments = options.fetch(:segments, ParallelSftp.configuration.default_segments)
|
|
41
|
+
retry_on_corruption = options.fetch(:retry_on_corruption, true)
|
|
42
|
+
parallel_retries = options.fetch(:parallel_retries, DEFAULT_PARALLEL_RETRIES)
|
|
43
|
+
|
|
44
|
+
return execute_download(remote_path, local_path, options.merge(segments: segments)) unless retry_on_corruption
|
|
45
|
+
|
|
46
|
+
download_with_retry(remote_path, local_path, options, segments, parallel_retries)
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
private
|
|
50
|
+
|
|
51
|
+
def download_with_retry(remote_path, local_path, options, segments, parallel_retries)
|
|
52
|
+
current_segments = segments
|
|
53
|
+
retries_at_current_segments = 0
|
|
54
|
+
|
|
55
|
+
loop do
|
|
56
|
+
begin
|
|
57
|
+
return execute_download(remote_path, local_path, options.merge(segments: current_segments, resume: false))
|
|
58
|
+
rescue ZipIntegrityError => e
|
|
59
|
+
cleanup_corrupted_download(local_path)
|
|
60
|
+
|
|
61
|
+
retries_at_current_segments += 1
|
|
62
|
+
|
|
63
|
+
if retries_at_current_segments < parallel_retries
|
|
64
|
+
warn "[parallel_sftp] Zip corruption detected, retrying with #{current_segments} segments " \
|
|
65
|
+
"(attempt #{retries_at_current_segments + 1}/#{parallel_retries})..."
|
|
66
|
+
elsif current_segments > 1
|
|
67
|
+
# Reduce segments and reset retry counter
|
|
68
|
+
current_segments = (current_segments / 2).clamp(1, current_segments - 1)
|
|
69
|
+
retries_at_current_segments = 0
|
|
70
|
+
warn "[parallel_sftp] Zip corruption persists, reducing to #{current_segments} segments..."
|
|
71
|
+
else
|
|
72
|
+
# segments = 1 and still failing, give up
|
|
73
|
+
raise e
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def cleanup_corrupted_download(local_path)
|
|
80
|
+
FileUtils.rm_f(local_path)
|
|
81
|
+
FileUtils.rm_f("#{local_path}.lftp-pget-status")
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def execute_download(remote_path, local_path, options)
|
|
85
|
+
lftp_command = LftpCommand.new(
|
|
86
|
+
host: host,
|
|
87
|
+
user: user,
|
|
88
|
+
password: password,
|
|
89
|
+
port: port,
|
|
90
|
+
remote_path: remote_path,
|
|
91
|
+
local_path: local_path,
|
|
92
|
+
segments: options.fetch(:segments, ParallelSftp.configuration.default_segments),
|
|
93
|
+
resume: options.fetch(:resume, true),
|
|
94
|
+
timeout: options.fetch(:timeout, ParallelSftp.configuration.timeout),
|
|
95
|
+
max_retries: options.fetch(:max_retries, ParallelSftp.configuration.max_retries),
|
|
96
|
+
reconnect_interval: options.fetch(:reconnect_interval, ParallelSftp.configuration.reconnect_interval),
|
|
97
|
+
sftp_connect_program: options[:sftp_connect_program]
|
|
98
|
+
)
|
|
99
|
+
|
|
100
|
+
download = Download.new(
|
|
101
|
+
lftp_command,
|
|
102
|
+
on_progress: options[:on_progress],
|
|
103
|
+
on_segment_progress: options[:on_segment_progress]
|
|
104
|
+
)
|
|
105
|
+
download.execute
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ParallelSftp
|
|
4
|
+
class Configuration
|
|
5
|
+
# Number of parallel connections for segmented download
|
|
6
|
+
attr_accessor :default_segments
|
|
7
|
+
|
|
8
|
+
# Connection timeout in seconds
|
|
9
|
+
attr_accessor :timeout
|
|
10
|
+
|
|
11
|
+
# Maximum retry attempts
|
|
12
|
+
attr_accessor :max_retries
|
|
13
|
+
|
|
14
|
+
# Seconds to wait between reconnection attempts
|
|
15
|
+
attr_accessor :reconnect_interval
|
|
16
|
+
|
|
17
|
+
# Default SFTP port
|
|
18
|
+
attr_accessor :default_port
|
|
19
|
+
|
|
20
|
+
# Custom SSH connect program for lftp (e.g., for legacy host key algorithms)
|
|
21
|
+
# Example: "ssh -o HostKeyAlgorithms=+ssh-rsa -o PubkeyAcceptedKeyTypes=+ssh-rsa"
|
|
22
|
+
attr_accessor :sftp_connect_program
|
|
23
|
+
|
|
24
|
+
def initialize
|
|
25
|
+
@default_segments = 4
|
|
26
|
+
@timeout = 30
|
|
27
|
+
@max_retries = 10
|
|
28
|
+
@reconnect_interval = 5
|
|
29
|
+
@default_port = 22
|
|
30
|
+
@sftp_connect_program = nil
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Apply large file optimizations (20GB+)
|
|
34
|
+
def optimize_for_large_files!
|
|
35
|
+
@default_segments = 8
|
|
36
|
+
@timeout = 60
|
|
37
|
+
@max_retries = 15
|
|
38
|
+
@reconnect_interval = 10
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
end
|