bmc-tools 0.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/bin-other/autobackup +70 -0
  3. data/bin-other/autobackup.inc +157 -0
  4. data/bin-other/bob/ExtractPagesFromPDF +0 -0
  5. data/bin-other/bob/backupsync +46 -0
  6. data/bin-other/bob/cachesync +58 -0
  7. data/bin-other/bob/deb +55 -0
  8. data/bin-other/bob/debmirror +2551 -0
  9. data/bin-other/bob/debmirror.marlin +2551 -0
  10. data/bin-other/bob/exif_rotate.sh +34 -0
  11. data/bin-other/bob/exif_rotate_dates.sh +35 -0
  12. data/bin-other/bob/git-big-objects +85 -0
  13. data/bin-other/bob/git-commit-details +22 -0
  14. data/bin-other/bob/git-commit-sizes +16 -0
  15. data/bin-other/bob/git-show-biggest.sh +33 -0
  16. data/bin-other/bob/git_remove_history.sh +24 -0
  17. data/bin-other/bob/git_staged_status.sh +29 -0
  18. data/bin-other/bob/identify_extra_raws +137 -0
  19. data/bin-other/bob/wallpaper_restore.sh +1 -0
  20. data/bin-other/bob/watch_olsr.sh +1 -0
  21. data/bin-other/bob/watch_rbpm_node_status +1 -0
  22. data/bin-other/bob/watermark_bmphoto_large.sh +32 -0
  23. data/bin-other/bob/watermark_bmphoto_small.sh +32 -0
  24. data/bin-other/bubbles/deb +42 -0
  25. data/bin-other/bubbles/firewall.sh +134 -0
  26. data/bin-other/bubbles/kernel-mirror.sh +15 -0
  27. data/bin-other/deb +42 -0
  28. data/bin-other/exif_dates.sh +35 -0
  29. data/bin-other/git-large-files +62 -0
  30. data/bin-other/git_add_upto.sh +54 -0
  31. data/bin-other/git_find_big.sh +33 -0
  32. data/bin-other/git_staged_status.sh +29 -0
  33. data/bin-other/image_resize +43 -0
  34. data/bin-other/kernel-mirror.sh +15 -0
  35. data/bin-other/marlin/deb +42 -0
  36. data/bin-other/marlin/firewall.sh +134 -0
  37. data/bin-other/mysql2svn.sh +36 -0
  38. data/bin-other/syno-cleanup.sh +31 -0
  39. data/bin/dockerize +35 -23
  40. data/bin/image_exif +30 -0
  41. data/bin/image_process +156 -0
  42. data/bin/image_process_wname +138 -0
  43. data/bin/tgv_to_pdf +206 -0
  44. data/bmc-tools.gemspec +1 -1
  45. data/lib/cli.rb +8 -0
  46. data/lib/constants.rb +1 -0
  47. data/lib/docker.rb +15 -0
  48. data/lib/git.rb +21 -0
  49. data/lib/runner.rb +19 -0
  50. metadata +52 -2
@@ -0,0 +1,2551 @@
1
+ #!/usr/bin/perl -w
2
+
3
+ # TODO: I'd like to be able to tell it to get some extra files, by name.
4
+ # I'm thinking Contents files. It would be really nice if it could pull
5
+ # a whole directory -- think project/trace, or disks-i386...
6
+ # TODO: It would probably be cleaner and easier to learn if it took
7
+ # apt-style lines to tell where to mirror from and what portions to use.
8
+
9
+ =head1 NAME
10
+
11
+ debmirror - Debian partial mirror script, with ftp, http, hftp or
12
+ rsync and package pool support
13
+
14
+ =head1 SYNOPSIS
15
+
16
+ debmirror [options] <mirrordir>
17
+
18
+ =head1 DESCRIPTION
19
+
20
+ This program downloads and maintains a partial local Debian mirror. It can
21
+ mirror any combination of architectures, distributions, and sections. Files
22
+ are transferred by ftp, and package pools are fully supported. It also does
23
+ locking and updates trace files.
24
+
25
+ To support package pools, this program mirrors in three steps.
26
+
27
+ =over 4
28
+
29
+ =item 1. download Packages and Sources files
30
+
31
+ First it downloads all Packages and Sources files for the subset of Debian it
32
+ was instructed to get.
33
+
34
+ =item 2. clean up unknown files
35
+
36
+ Any files and directories on the local mirror that are not in the list are
37
+ removed.
38
+
39
+ =item 3. download everything else
40
+
41
+ The Packages and Sources files are scanned, to build up a list of all the
42
+ files they refer to. A few other miscellaneous files are added to the list.
43
+ Then the program makes sure that each file in the list is present on the
44
+ local mirror and is up-to-date, using file size (and optionally md5sum) checks.
45
+ Any necessary files are downloaded.
46
+
47
+ =back
48
+
49
+ =cut
50
+
51
+ sub usage {
52
+ warn join(" ", @_)."\n" if @_;
53
+ warn <<EOF;
54
+ Usage: $0 [--progress] [--verbose] [--debug] [--dry-run] [--help]
55
+ [--host=remotehost] [--root=directory]
56
+ [--method=ftp|hftp|http|rsync] [--passive]
57
+ [--user=remoteusername] [--passwd=remoteuserpassword]
58
+ [--proxy=http://user:pass\@url:port/]
59
+ [--dist=foo[,bar,..] ...] [--omit-suite-symlinks]
60
+ [--section=foo[,bar,..] ...] [--arch=foo[,bar,..] ...]
61
+ [--adddir=directory] [--rsync-extra=foo[,bar,..] ...]
62
+ [--di-dist=foo[,bar,..] ...] [--di-arch=foo[,bar,..] ...]
63
+ [--source|--nosource] [--i18n] [--getcontents] [--md5sums]
64
+ [--ignore-missing-release] [--ignore-release-gpg]
65
+ [--ignore=regex] [--exclude=regex] [--include=regex]
66
+ [--exclude-deb-section=regex] [--limit-priority=regex]
67
+ [--timeout=seconds] [--max-batch=number]
68
+ [--rsync-batch=number] [--rsync-options=options]
69
+ [--postcleanup|--cleanup|--nocleanup] [--skippackages]
70
+ [--diff=use|mirror|none] [--state-cache-days=number]
71
+ [--ignore-small-errors] [--allow-dist-rename]
72
+ <mirrordir>
73
+
74
+ For details, see man page.
75
+ EOF
76
+ exit(1);
77
+ }
78
+
79
+ =head1 OPTIONS
80
+
81
+ =over 4
82
+
83
+ =item <mirrordir>
84
+
85
+ This required parameter specifies where the local mirror directory is. If the
86
+ directory does not exist, it will be created. Be careful; telling this
87
+ program that your home directory is the mirrordir is guaranteed to replace
88
+ your home directory with a Debian mirror!
89
+
90
+ =item --progress -p
91
+
92
+ Displays progress bars as files are downloaded.
93
+
94
+ =item --verbose -v
95
+
96
+ Displays progress between file downloads.
97
+
98
+ =item --debug
99
+
100
+ Enables verbose debug output, including ftp protocol dump.
101
+
102
+ =item --dry-run
103
+
104
+ Simulate a mirror run. This will still download the meta files to the
105
+ F<./.temp> working directory, but won't replace the old meta files, won't
106
+ download debs and source files and only simulates cleanup.
107
+
108
+ =item --help
109
+
110
+ Display a usage summary.
111
+
112
+ =item --host=remotehost -h
113
+
114
+ Specify the remote host to mirror from. Defaults to 'ftp.debian.org',
115
+ you are strongly encouraged to find a closer mirror.
116
+
117
+ =item --root=directory -r directory
118
+
119
+ Specifies the directory on the remote host that is the root of the Debian
120
+ archive. Defaults to "debian", which will work for most mirrors. The root
121
+ directory has a F<./dists> subdirectory.
122
+
123
+ =item --method=ftp|hftp|http|rsync -e
124
+
125
+ Specify the method to download files. Currently, supported methods are
126
+ ftp, hftp (ftp over http proxy), http or rsync.
127
+
128
+ Note: starting with version 1.1 it is no longer needed to add a ':' prefix
129
+ for the root directory.
130
+
131
+ =item --passive
132
+
133
+ Download in passive mode.
134
+
135
+ =item --user=remoteusername -u
136
+
137
+ Specify the remote user name to use to log to the remote host. Helpful when
138
+ dealing with brain damaged proxy servers. Defaults to anonymous.
139
+
140
+ =item --passwd=remoteuserpassword
141
+
142
+ Specify the remote user password to use to log into the remote ftp host.
143
+ It is used with --user and defaults to anonymous@.
144
+
145
+ =item --proxy=http://user:pass@url:port/
146
+
147
+ Specifies the http proxy (like Squid) to use for http and hftp method.
148
+
149
+ =item --dist=foo[,bar,..] -d foo
150
+
151
+ Specify the distribution (etch, lenny, squeeze, sid) of Debian to
152
+ mirror. This switch may be used multiple times, and multiple
153
+ distributions may be specified at once, separated by commas. Using the
154
+ links (stable, testing, unstable) does not have the expected results
155
+ but you may add those links manually. Defaults to mirroring sid.
156
+
157
+ =item --omit-suite-symlinks
158
+
159
+ With this option set, debmirror will not create the 'S<suite -E<gt> codename>'
160
+ symlink. This is needed for example when mirroring archived Debian
161
+ releases as they will all have either 'stable' or 'oldstable' as
162
+ suite in their F<Release> files.
163
+
164
+ =item --section=foo[,bar,..] -s foo
165
+
166
+ Specify the section of Debian to mirror. Defaults to
167
+ main,contrib,non-free,main/debian-installer.
168
+
169
+ =item --arch=foo[,bar,..] -a foo
170
+
171
+ Specify the architectures to mirror. The default is --arch=i386.
172
+ Specifying --arch=none will mirror no archs.
173
+
174
+ =item --adddir directory
175
+
176
+ Also download Packages and Sources files from the specified directory
177
+ on the remote host (the directory is relative to the root of the
178
+ Debian archive). This feature is now obsolete and may be removed in
179
+ a future release.
180
+
181
+ =item --rsync-extra=foo[,bar,..]
182
+
183
+ Allows to also mirror files from a number of directories that are not part
184
+ of the package archive itself. Debmirror will B<always> use rsync for the
185
+ transfer of these files, irrespective of what transfer method is specified
186
+ in the --method option.
187
+
188
+ This option can therefore not be used if your remote mirror does not support
189
+ rsync, or if the mirror needs a different --root option for rsync than for
190
+ the main transfer method specified with --method. Excluding individual files
191
+ in the directories is not supported.
192
+
193
+ The following values are supported.
194
+
195
+ =over 2
196
+
197
+ =item doc
198
+
199
+ Download all files and subdirectories in F<./doc> directory, and all README
200
+ files in the root directory of the archive.
201
+
202
+ =item indices
203
+
204
+ Download all files and subdirectories in F<./indices> directory. Note that
205
+ this directory can contain some rather large files; don't include this
206
+ type unless you know you need these files.
207
+
208
+ =item tools
209
+
210
+ Download all files and subdirectories in F<./tools> directory.
211
+
212
+ =item trace
213
+
214
+ Download the remote mirror's trace files for the archive (F<./project/trace/*>).
215
+
216
+ =back
217
+
218
+ If specified, the update of trace files will be done at the beginning of
219
+ the mirror run; the other types are done near the end.
220
+
221
+ This switch may be used multiple times, and multiple values may be specified
222
+ at once, separated by comma's; unknown values are ignored.
223
+
224
+ =item --di-dist=dists | foo[,bar,..]
225
+
226
+ Mirror "current" Debian Installer images for the specified dists.
227
+ See further the section L<Mirroring Debian Installer images> below.
228
+
229
+ =item --di-arch=arches | foo[,bar,..]
230
+
231
+ Mirror "current" Debian Installer images for the specified architectures.
232
+ See further the section L<Mirroring Debian Installer images> below.
233
+
234
+ =item --source
235
+
236
+ Include source in the mirror (default).
237
+
238
+ =item --nosource
239
+
240
+ Do not include source.
241
+
242
+ =item --i18n
243
+
244
+ Additionally download F<Translation-E<lt>langE<gt>.bz2> files, which contain
245
+ translations of package descriptions. Selection of specific translations is
246
+ possible using the --include and --exclude options.
247
+
248
+ =item --getcontents
249
+
250
+ Additionally download F<Contents.E<lt>archE<gt>.gz> files. Note that these
251
+ files can be relatively big and can change frequently, especially for the
252
+ testing and unstable suites. Use of the available diff files is strongly
253
+ recommended (see the --diff option).
254
+
255
+ =item --md5sums -m
256
+
257
+ Use md5sums to determine if files on the local mirror that are the correct
258
+ size actually have the correct content. Not enabled by default, because
259
+ it is too paranoid, and too slow.
260
+
261
+ When the state cache is used, debmirror will only check md5sums during runs
262
+ where the cache has expired or been invalidated, so it is worth considering
263
+ to use these two options together.
264
+
265
+ =item --ignore-missing-release
266
+
267
+ Don't fail if the F<Release> file is missing.
268
+
269
+ =item --ignore-release-gpg
270
+
271
+ Don't fail if the F<Release.gpg> file is missing.
272
+
273
+ =item --ignore=regex
274
+
275
+ Never delete any files whose filenames match the regex. May be used multiple times.
276
+
277
+ =item --exclude=regex
278
+
279
+ Never download any files whose filenames match the regex. May be used multiple times.
280
+
281
+ =item --include=regex
282
+
283
+ Don't exclude any files whose filenames match the regex. May be used multiple times.
284
+
285
+ =item --exclude-deb-section=regex
286
+
287
+ Never download any files whose Debian Section (games, doc, oldlibs,
288
+ science, ...) match the regex. May be used multiple times.
289
+
290
+ =item --limit-priority=regex
291
+
292
+ Limit download to files whose Debian Priority (required, extra,
293
+ optional, ...) match the regex. May be used multiple times.
294
+
295
+ =item --timeout=seconds -t
296
+
297
+ Specifies the timeout to use for network operations (either FTP or rsync).
298
+ Set this to a higher value if you experience failed downloads. Defaults
299
+ to 300 seconds.
300
+
301
+ =item --max-batch=number
302
+
303
+ Download at most max-batch number of files (and ignore rest).
304
+
305
+ =item --rsync-batch=number
306
+
307
+ Download at most number of files with each rsync call and then loop.
308
+
309
+ =item --rsync-options=options
310
+
311
+ Specify alternative rsync options to be used. Default options are
312
+ "-aIL --partial". Care must be taken when specifying alternative
313
+ options not to disrupt operations, it's best to only add to those
314
+ options.
315
+
316
+ The most likely option to add is "--bwlimit=x" to avoid saturating the
317
+ bandwidth of your link.
318
+
319
+ =item --postcleanup
320
+
321
+ Clean up the local mirror but only after mirroring is complete and
322
+ only if there was no error. This is the default.
323
+
324
+ =item --cleanup
325
+
326
+ Do clean up any unknown files and directories on the local mirror (see
327
+ step 2 above).
328
+
329
+ =item --nocleanup
330
+
331
+ Do not clean up the local mirror after mirroring is complete.
332
+
333
+ =item --skippackages
334
+
335
+ Don't re-download Packages and Sources files. Useful if you know they are
336
+ up-to-date.
337
+
338
+ =item --diff=use|mirror|none
339
+
340
+ If --diff=use is specified and the F<Release> file contains entries for
341
+ diff files, then debmirror will attempt to use them to update Packages,
342
+ Sources and Contents files (which can significantly reduce the download
343
+ size for meta files), but will not include them in the mirror. This is
344
+ the default behavior and avoids having time consuming diff files for a
345
+ fast local mirror.
346
+
347
+ Specifying --diff=mirror does the same as 'use', but will also include
348
+ the downloaded diff files in the local mirror. Specify --diff=none to
349
+ completely ignore diff files.
350
+
351
+ =item --state-cache-days=number
352
+
353
+ Save the state of the mirror in a cache file between runs. The cache will
354
+ expire after the specified number of days, at which time a full check and
355
+ cleanup of the mirror will be done. While the cache is valid, debmirror
356
+ will trust that the mirror is consistent with this cache.
357
+
358
+ The cache is only used for files that have a unique name, i.e. binary
359
+ packages and source files. If a mirror update fails for any reason, the
360
+ cache will be invalidated and the next run will include a full check.
361
+
362
+ Main advantage of using the state cache is that it avoids a large amount
363
+ of disk access while checking which files need to be fetched. It may also
364
+ reduce the time required for mirror updates.
365
+
366
+ =item --ignore-small-errors
367
+
368
+ Normally debmirror will report an error if any deb files or sources
369
+ fail to download and refuse to update the meta data to an inconsistent
370
+ mirror. Normally this is a good things as it indicates something went
371
+ wrong during download and should be retried. But sometimes the
372
+ upstream mirror actually is broken. Specifying --ignore-small-errors
373
+ causes debmirror to ignore missing or broken deb and source files but
374
+ still be pedantic about checking meta files.
375
+
376
+ =item --allow-dist-rename
377
+
378
+ The directory name for a dist should be equal to its Codename and not to
379
+ a Suite. If the local mirror currently has directories named after Suites,
380
+ debmirror can rename them automatically.
381
+ An existing symlink S<codename -E<gt> suite> will be removed, but debmirror
382
+ will automatically create a new symlink S<suite -E<gt> codename> (immediately
383
+ after moving meta files in place). This conversion should only be needed once.
384
+
385
+ =back
386
+
387
+ =head1 USING DEBMIRROR
388
+
389
+ =head2 Using regular expressions in options
390
+
391
+ Various options accept regular expressions that can be used to tune what
392
+ is included in the mirror. They can be any regular expression valid in
393
+ I<perl>, which also means that extended syntax is standard. Make sure to
394
+ anchor regular expressions appropriately: this is not done by debmirror.
395
+
396
+ The --include and --exclude options can be combined. This combination
397
+ for example will, if the --i18n option is used, exclude all F<Translation>
398
+ files, except for the ones for Portuguese (pt) and Brazillian (pt_BR):
399
+
400
+ --exclude='/Translation-.*\.bz2$' --include='/Translation-pt.*\.bz2$'
401
+
402
+ =head2 Mirroring Debian Installer images
403
+
404
+ Debmirror will only mirror the "current" images that are on the remote
405
+ mirror. At least one of the options --di-dist or --di-arch must be
406
+ passed to enable mirroring of the images.
407
+
408
+ The special values "dists" resp. "arches" can be used to tell debmirror
409
+ to use the same dists and architectures for D-I images as for the archive,
410
+ but it is also possible to specify different values. If either option is
411
+ not set, it will default to the same values as for the archive.
412
+
413
+ If you wish to create custom CD images using for example I<debian-cd>,
414
+ you will probably also want add the option "--rsync-extra=doc,tools".
415
+
416
+ B<Limitations>
417
+
418
+ There are no progress updates displayed for D-I images.
419
+
420
+ =head2 Archive size
421
+
422
+ The tables in the file F</usr/share/doc/debmirror/archive_size> give an
423
+ indication of the space needed to mirror the Debian archive. They are
424
+ particularly useful if you wish to set up a partial mirror.
425
+ Only the size of source and binary packages is included. You should allow
426
+ for around 1-4 GB of meta data (in F<./dists/E<lt>distE<gt>>) per suite
427
+ (depending in your settings). Plus whatever space is needed for extra
428
+ directories (e.g. F<tools>, F<doc>) you wish to mirror.
429
+
430
+ The tables also show how much additional space is required if you add
431
+ a release on top of its predecessor. Note that the additional space
432
+ needed for testing and (to a lesser extend) unstable varies during the
433
+ development cycle of a release. The additional space needed for testing
434
+ is zero immediately after a stable release and grows from that time
435
+ onwards.
436
+
437
+ B<Note>
438
+ Debmirror keeps an extra copy of all meta data. This is necessary to
439
+ guarantee that the local mirror stays consistent while debmirror is
440
+ running.
441
+
442
+ =head1 EXAMPLES
443
+
444
+ Simply make a mirror in F</srv/mirror/debian>, using all defaults (or the
445
+ settings defined in F<debmirror.conf>):
446
+
447
+ debmirror /srv/mirror/debian
448
+
449
+ Make a mirror of i386 and sparc binaries, main only, and include both unstable
450
+ and testing versions of Debian; download from 'ftp.kernel.org':
451
+
452
+ debmirror -a i386,sparc -d sid -d etch -s main --nosource \
453
+ -h ftp.nl.debian.org --progress $HOME/mirror/debian
454
+
455
+ Make a mirror using rsync (rsync server is 'ftp.debian.org::debian'),
456
+ excluding the section 'debug' and the package 'foo-doc':
457
+
458
+ debmirror -e rsync $HOME/mirror/debian --exclude='/foo-doc_' \
459
+ --exclude-deb-section='^debug$'
460
+
461
+ =head1 FILES
462
+
463
+ /etc/debmirror.conf
464
+ ~/.debmirror.conf
465
+
466
+ Debmirror will look for the presence of these files and load them
467
+ in the indicated order if they exist.
468
+ See the example in /usr/share/doc/debmirror/examples for syntax.
469
+
470
+ ~/.gnupg/trustedkeys.gpg
471
+
472
+ Debmirror uses gpgv to verify Release and Release.gpg using the
473
+ default keying ~/.gnupg/trustedkeys.gpg. This can be changed by
474
+ exporting GNUPGHOME resulting in $GNUPGHOME/trustedkeys.gpg being
475
+ used.
476
+
477
+ To add the right key to this keyring you can import it from the
478
+ debian keyring (in case of the debian archive) using:
479
+
480
+ gpg --keyring /usr/share/keyrings/debian-archive-keyring.gpg --export \
481
+ | gpg --no-default-keyring --keyring trustedkeys.gpg --import
482
+
483
+ or download the key from a keyserver:
484
+
485
+ gpg --no-default-keyring --keyring trustedkeys.gpg \
486
+ --keyserver keyring.debian.org --recv-keys <key ID>
487
+
488
+ The <key ID> can be found in the gpgv error message in debmirror:
489
+ gpgv: Signature made Tue Jan 23 09:07:53 2007 CET using DSA key ID 2D230C5F
490
+
491
+ =cut
492
+
493
+ use strict;
494
+ use Cwd;
495
+ use Storable qw(nstore retrieve);
496
+ use Net::FTP;
497
+ use Getopt::Long;
498
+ use File::Temp qw/ tempfile /;
499
+ use LockFile::Simple;
500
+ use Compress::Zlib;
501
+ use Digest::MD5;
502
+ use Digest::SHA1;
503
+ use LWP::UserAgent;
504
+
505
+ # Yeah, I use too many global variables in this program.
506
+ our ($debug, $progress, $verbose, $passive, $skippackages, $getcontents, $i18n);
507
+ our ($ua, $proxy);
508
+ our (@dists, @sections, @arches, @extra_dirs, @ignores, @excludes, @includes);
509
+ our (@excludes_deb_section, @limit_priority);
510
+ our (@di_dists, @di_arches, @rsync_extra);
511
+ our $state_cache_days = 0;
512
+ our $check_md5sums = 0;
513
+ our $check_downloads = 0;
514
+ our $cleanup=0;
515
+ our $post_cleanup=1;
516
+ our $no_cleanup=0;
517
+ our $do_source=1;
518
+ our $host="ftp.debian.org";
519
+ our $user="anonymous";
520
+ our $passwd="anonymous@";
521
+ our $remoteroot="debian";
522
+ our $download_method="ftp";
523
+ our $timeout=300;
524
+ our $max_batch=0;
525
+ our $rsync_batch=200;
526
+ our $num_errors=0;
527
+ our $bytes_to_get=0;
528
+ our $bytes_gotten=0;
529
+ our $bytes_meta=0;
530
+ our $doing_meta=1;
531
+ our $ignore_release=0;
532
+ our $ignore_release_gpg=0;
533
+ our $start_time = time;
534
+ our $dry_run=0;
535
+ our $dry_run_var=0;
536
+ our $rsync_options="-aIL --partial";
537
+ our $ignore_small_errors=0;
538
+ our $diff_mode="use";
539
+ our $omit_suite_symlinks=0;
540
+ our $allow_dist_rename=0;
541
+ my @errlog;
542
+ my $HOME;
543
+ ($HOME = $ENV{'HOME'}) or die "HOME not defined in environment!\n";
544
+
545
+ # Load in config files
546
+ require "/etc/debmirror.conf" if -r "/etc/debmirror.conf";
547
+ require "$HOME/.debmirror.conf" if -r "$HOME/.debmirror.conf";
548
+
549
+ # This hash contains the releases to mirror. If both codename and suite can be
550
+ # determined from the Release file, the codename is used in the key. If not,
551
+ # it can also be a suite (or whatever was requested by the user).
552
+ # The hash has tree subtypes:
553
+ # - suite: if both codename and suite could be determined from the Release file,
554
+ # the codename is the key and the value is the name of the suitei - used to
555
+ # update the suite -> codename symlinks;
556
+ # - mirror: set to 1 if the package archive should be mirrored for the dist;
557
+ # - d-i: set to 1 if D-I images should be mirrored for the dist.
558
+ # For the last two subtypes the key can also include a subdir.
559
+ my %distset=();
560
+
561
+ # This hash holds all the files we know about. Values are:
562
+ # - -1: file was not on mirror and download attempt failed
563
+ # - 0: file was not on mirror and either needs downloading or was
564
+ # downloaded this run
565
+ # - 1: file is on mirror and wanted according to meta data
566
+ # - 2: file is on mirror and listed in state cache, but not (yet)
567
+ # verified as wanted according to meta data
568
+ # Values -1 and 2 can occur in the state cache; see $files_cache_version
569
+ # below! Filenames should be relative to $mirrordir.
570
+ my %files;
571
+
572
+ # Hash to record size and md5sums of meta files and package files (from the
573
+ # Release file and Source/Packages files).
574
+ my %file_lists;
575
+
576
+ # Hash to record which Translation files needs download. Contains size and sha1
577
+ # info. Files also get registered in %files.
578
+ my %i18n_get;
579
+
580
+ # Separate hash for files belonging to Debian Installer images.
581
+ # This data is not cached.
582
+ my %di_files;
583
+
584
+ ## State cache meta-data
585
+ my $use_cache = 0;
586
+ my $state_cache_exptime;
587
+ # Next variable *must* be changed if the structure of the %files hash is
588
+ # changed in a way that makes old state-cache files incompatible.
589
+ my $files_cache_version = "1.0";
590
+
591
+ my $help;
592
+ GetOptions('debug' => \$debug,
593
+ 'progress|p' => \$progress,
594
+ 'verbose|v' => \$verbose,
595
+ 'source!' => \$do_source,
596
+ 'md5sums|m' => \$check_md5sums,
597
+ 'nomd5sums' => \$check_downloads,
598
+ 'passive!' => \$passive,
599
+ 'host|h=s' => \$host,
600
+ 'user|u=s' => \$user,
601
+ 'passwd=s' => \$passwd,
602
+ 'root|r=s' => \$remoteroot,
603
+ 'dist|d=s' => \@dists,
604
+ 'section|s=s' => \@sections,
605
+ 'arch|a=s' => \@arches,
606
+ 'adddir=s' => \@extra_dirs,
607
+ 'di-dist=s' => \@di_dists,
608
+ 'di-arch=s' => \@di_arches,
609
+ 'rsync-extra=s' => \@rsync_extra,
610
+ 'cleanup' => \$cleanup,
611
+ 'postcleanup' => \$post_cleanup,
612
+ 'nocleanup' => \$no_cleanup,
613
+ 'ignore=s' => \@ignores,
614
+ 'exclude=s' => \@excludes,
615
+ 'exclude-deb-section=s' => \@excludes_deb_section,
616
+ 'limit-priority=s' => \@limit_priority,
617
+ 'include=s' => \@includes,
618
+ 'skippackages' => \$skippackages,
619
+ 'i18n' => \$i18n,
620
+ 'getcontents' => \$getcontents,
621
+ 'method|e=s' => \$download_method,
622
+ 'timeout|t=s' => \$timeout,
623
+ 'max-batch=s' => \$max_batch,
624
+ 'rsync-batch=s' => \$rsync_batch,
625
+ 'state-cache-days=s' => \$state_cache_days,
626
+ 'ignore-missing-release' => \$ignore_release,
627
+ 'ignore-release-gpg' => \$ignore_release_gpg,
628
+ 'dry-run' => \$dry_run_var,
629
+ 'proxy=s' => \$proxy,
630
+ 'rsync-options=s' => \$rsync_options,
631
+ 'ignore-small-errors' => \$ignore_small_errors,
632
+ 'diff=s' => \$diff_mode,
633
+ 'omit-suite-symlinks' => \$omit_suite_symlinks,
634
+ 'allow-dist-rename' => \$allow_dist_rename,
635
+ 'help' => \$help,
636
+ ) or usage;
637
+ usage if $help;
638
+
639
+ # This parameter is so important that it is the only required parameter.
640
+ my $mirrordir=shift or usage("mirrordir not specified");
641
+
642
+ # Check for patch binary if needed
643
+ if (!($diff_mode eq "none")) {
644
+ if (system("patch --version 2>/dev/null >/dev/null")) {
645
+ say("Patch binary missing, falling back to --diff=none");
646
+ push (@errlog,"Patch binary missing, falling back to --diff=none\n");
647
+ $diff_mode = "none";
648
+ }
649
+ if (system("ed --version 2>/dev/null >/dev/null")) {
650
+ say("Ed binary missing, falling back to --diff=none");
651
+ push (@errlog,"Ed binary missing, falling back to --diff=none\n");
652
+ $diff_mode = "none";
653
+ }
654
+ }
655
+
656
+ # Backwards compatibility: remote root dir no longer needs prefix
657
+ $remoteroot =~ s%^[:/]%%;
658
+
659
+ # Post-process arrays. Allow commas to separate values the user entered.
660
+ # If the user entered nothing, provide defaults.
661
+ @dists=split(/,/,join(',',@dists));
662
+ @dists=qw(sid) unless @dists;
663
+ @sections=split(/,/,join(',',@sections));
664
+ @sections=qw(main contrib non-free main/debian-installer) unless @sections;
665
+ @arches=split(/,/,join(',',@arches));
666
+ @arches=qw(i386) unless @arches;
667
+ @arches=() if (join(',',@arches) eq "none");
668
+ @di_dists=split(/,/,join(',',@di_dists));
669
+ @di_arches=split(/,/,join(',',@di_arches));
670
+ @rsync_extra=split(/,/,join(',',@rsync_extra));
671
+ if (@di_dists) {
672
+ @di_dists = @dists if ($di_dists[0] eq "dists");
673
+ @di_arches = @arches if (!@di_arches || $di_arches[0] eq "arches");
674
+ } elsif (@di_arches) {
675
+ @di_dists = @dists if (!@di_dists);
676
+ @di_arches = @arches if ($di_arches[0] eq "arches");
677
+ }
678
+ $cleanup=0 if ($no_cleanup);
679
+ $post_cleanup=0 if ($no_cleanup);
680
+ $post_cleanup=0 if ($cleanup);
681
+
682
+ # Display configuration.
683
+ $|=1 if $debug;
684
+ if ($passwd eq "anonymous@") {
685
+ if ($download_method eq "http") {
686
+ say("Mirroring to $mirrordir from $download_method://$host/$remoteroot/");
687
+ } else {
688
+ say("Mirroring to $mirrordir from $download_method://$user\@$host/$remoteroot/");
689
+ }
690
+ } else {
691
+ say("Mirroring to $mirrordir from $download_method://$user:XXX\@$host/$remoteroot/");
692
+ }
693
+ say("Arches: ".join(",", @arches));
694
+ say("Dists: ".join(",", @dists));
695
+ say("Sections: ".join(",", @sections));
696
+ say("Including source.") if $do_source;
697
+ say("D-I arches: ".join(",", @di_arches)) if @di_arches;
698
+ say("D-I dists: ".join(",", @di_dists)) if @di_dists;
699
+ say("Pdiff mode: $diff_mode");
700
+ say("Checking md5sums.") if $check_md5sums;
701
+ say("Passive mode on.") if $passive;
702
+ say("Proxy: $proxy") if $proxy;
703
+ say("Download at most $max_batch files.") if ($max_batch > 0);
704
+ say("Download at most $rsync_batch files per rsync call.") if ($download_method eq "rsync");
705
+ if ($post_cleanup) {
706
+ say("Will clean up AFTER mirroring.");
707
+ } else {
708
+ say("Will NOT clean up.") unless $cleanup;
709
+ }
710
+ say("Dry run.") if $dry_run_var;
711
+
712
+ my $md5;
713
+ $md5=Digest::MD5->new;
714
+
715
+ # Set up mirror directory and resolve $mirrordir to a full path for
716
+ # locking and rsync
717
+ make_dir($mirrordir) if (! -d $mirrordir);
718
+ die "You need write permissions on $mirrordir" if (! -w $mirrordir);
719
+ chdir($mirrordir) or die "chdir $mirrordir: $!";
720
+ $mirrordir = cwd();
721
+
722
+ # Handle the lock file. This is the same method used by official
723
+ # Debian push mirrors.
724
+ my $hostname=`hostname -f 2>/dev/null || hostname`;
725
+ chomp $hostname;
726
+ my $lockfile="Archive-Update-in-Progress-$hostname";
727
+ say("Attempting to get lock, this might take 2 minutes before it fails.");
728
+ my $lockmgr = LockFile::Simple->make(-format => "%f/$lockfile", -max => 12,
729
+ -delay => 10, -nfs => 1, -autoclean => 1,
730
+ -warn => 1, -stale => 1, -hold => 0);
731
+ my $lock = $lockmgr->lock("$mirrordir")
732
+ or die "$lockfile exists or you lack proper permissions; aborting";
733
+ $SIG{INT}=sub { $lock->release; exit 1 };
734
+ $SIG{TERM}=sub { $lock->release; exit 1 };
735
+
736
+ # Create tempdir if missing
737
+ my $tempdir=".temp";
738
+ make_dir($tempdir) if (! -d $tempdir);
739
+ die "You need write permissions on $tempdir" if (! -w $tempdir);
740
+
741
+ # Load the state cache.
742
+ load_state_cache() if $state_cache_days;
743
+
744
+ # Register the trace and lock files.
745
+ my $tracefile="project/trace/$hostname";
746
+ $files{$tracefile}=1;
747
+ $files{$lockfile}=1;
748
+
749
+ # Start up ftp.
750
+ my $ftp;
751
+ my %opts = (Debug => $debug, Passive => $passive, Timeout => $timeout);
752
+
753
+ my $rsynctempfile;
754
+ END { unlink $rsynctempfile if $rsynctempfile }
755
+
756
+ sub init_connection {
757
+ $_ = $download_method;
758
+
759
+ /^hftp$/ && do {
760
+ # LWP stuff
761
+ $ua = new LWP::UserAgent;
762
+ if ($proxy) {
763
+ $ua->proxy('ftp', $proxy);
764
+ } elsif ($ENV{ftp_proxy}) {
765
+ $ua->proxy('ftp', $ENV{ftp_proxy});
766
+ } else {
767
+ die("hftp method needs a proxy.");
768
+ }
769
+ return;
770
+ };
771
+
772
+ /^http$/ && do {
773
+ # LWP stuff[
774
+ $ua = new LWP::UserAgent(keep_alive => 1);
775
+ $ua->proxy('http', $ENV{http_proxy}) if ($ENV{http_proxy});
776
+ $ua->proxy('http', $proxy) if ($proxy);
777
+ return;
778
+ };
779
+
780
+ /^ftp$/ && do {
781
+ $ftp=Net::FTP->new($host, %opts) or die "$@\n";
782
+ $ftp->login($user, $passwd) or die "login failed"; # anonymous
783
+ $ftp->binary or die "could not set binary mode";
784
+ $ftp->cwd("/$remoteroot") or die "cwd to /$remoteroot failed";
785
+ $ftp->hash(\*STDOUT,102400) if $progress;
786
+ return;
787
+ };
788
+
789
+ /^rsync$/ && do {
790
+ return;
791
+ };
792
+
793
+ usage("unknown download method: $_");
794
+ }
795
+ init_connection;
796
+
797
+ # determine remote root for rsync transfers
798
+ my $rsyncremote = "$host\:\:$remoteroot/";
799
+ if (! ($user eq 'anonymous')) {
800
+ $rsyncremote = "$user\@$rsyncremote";
801
+ }
802
+
803
+ # Update the remote trace files; also update ignores for @rsync_extra.
804
+ rsync_extra(1, @rsync_extra);
805
+
806
+ say("Get Release files.");
807
+ # Get Release files without caching for http
808
+ $ua->default_header( "Cache-Control" => "max-age=0" ) if ($ua);
809
+ foreach my $dist (@dists) {
810
+ my $tdir="$tempdir/.tmp/dists/$dist";
811
+ my $have_release = get_release($tdir, $dist);
812
+ next unless ($have_release || $ignore_release);
813
+ my ($codename, $suite, $dist_sdir) = name_release("mirror", $tdir, $dist);
814
+
815
+ if ($have_release) {
816
+ make_dir ("dists/$codename$dist_sdir");
817
+ make_dir ("$tempdir/dists/$codename$dist_sdir");
818
+ rename("$tdir/Release", "$tempdir/dists/$codename$dist_sdir/Release")
819
+ or die "Error while moving $tdir/Release: $!\n";
820
+ rename("$tdir/Release.gpg", "$tempdir/dists/$codename$dist_sdir/Release.gpg")
821
+ or die "Error while moving $tdir/Release.gpg: $!\n";
822
+ $files{"dists/$codename$dist_sdir/Release"}=1;
823
+ $files{$tempdir."/"."dists/$codename$dist_sdir/Release"}=1;
824
+ $files{"dists/$codename$dist_sdir/Release.gpg"}=1;
825
+ $files{$tempdir."/"."dists/$codename$dist_sdir/Release.gpg"}=1;
826
+ }
827
+ }
828
+
829
+ # Check that @di_dists contains valid codenames
830
+ di_check_dists() if @di_dists;
831
+
832
+ foreach my $dist (keys %distset) {
833
+ next unless exists $distset{$dist}{mirror};
834
+ # Parse the Release
835
+ if (open RELEASE, "<$tempdir/dists/$dist/Release") {
836
+ while (<RELEASE>) {
837
+ last if /^MD5Sum:/;
838
+ }
839
+ $_ = <RELEASE>;
840
+ while (defined $_ && $_ =~ /^ /) {
841
+ my ($md5sum, $size, $filename) =
842
+ (/ ([a-z0-9]+) +(\d+) +(.*)$/);
843
+ $file_lists{"$tempdir/dists/$dist/$filename"}{md5} = $md5sum;
844
+ $file_lists{"$tempdir/dists/$dist/$filename"}{size} = $size;
845
+ $_ = <RELEASE>;
846
+ }
847
+ close RELEASE;
848
+ }
849
+ }
850
+
851
+ if ($num_errors != 0 && $ignore_release) {
852
+ say("Ignoring failed Release files.");
853
+ push (@errlog,"Ignoring failed Release files\n");
854
+ $num_errors = 0;
855
+ }
856
+
857
+ if ($num_errors != 0) {
858
+ print "Errors:\n ".join(" ",@errlog) if (@errlog);
859
+ die "Failed to download some Release or Release.gpg files!\n";
860
+ }
861
+
862
+ # Enable caching again for http
863
+ init_connection if ($ua);
864
+
865
+ # Calculate expected downloads for meta files
866
+ # As we don't actually download most of the meta files (due to getting
867
+ # only one compression variant or using diffs), we keep a separate count
868
+ # of the actual downloaded amount of data in $bytes_meta.
869
+
870
+ # The root Release files have already been downloaded
871
+ $bytes_to_get = $bytes_meta;
872
+ $bytes_gotten = $bytes_meta;
873
+
874
+ sub add_bytes {
875
+ my $name=shift;
876
+ $bytes_to_get += $file_lists{"$tempdir/$name"}{size} if exists $file_lists{"$tempdir/$name"};
877
+ }
878
+ foreach my $dist (keys %distset) {
879
+ next unless exists $distset{$dist}{mirror};
880
+ foreach my $section (@sections) {
881
+ foreach my $arch (@arches) {
882
+ add_bytes("dists/$dist/$section/binary-$arch/Packages");
883
+ add_bytes("dists/$dist/$section/binary-$arch/Packages.gz");
884
+ add_bytes("dists/$dist/$section/binary-$arch/Packages.bz2");
885
+ add_bytes("dists/$dist/$section/binary-$arch/Release");
886
+ add_bytes("dists/$dist/$section/binary-$arch/Packages.diff/Index") unless ($diff_mode eq "none");
887
+ }
888
+ # d-i does not have separate source sections
889
+ if ($do_source && $section !~ /debian-installer/) {
890
+ add_bytes("dists/$dist/$section/source/Sources");
891
+ add_bytes("dists/$dist/$section/source/Sources.gz");
892
+ add_bytes("dists/$dist/$section/source/Sources.bz2");
893
+ add_bytes("dists/$dist/$section/source/Release");
894
+ add_bytes("dists/$dist/$section/source/Sources.diff/Index") unless ($diff_mode eq "none");
895
+ }
896
+ add_bytes("dists/$dist/$section/i18n/Index") if $i18n;
897
+ }
898
+ }
899
+ foreach (@extra_dirs) {
900
+ add_bytes("$_/Packages");
901
+ add_bytes("$_/Packages.gz");
902
+ add_bytes("$_/Packages.bz2");
903
+ add_bytes("$_/Release");
904
+ add_bytes("$_/Packages.diff/Index");
905
+ if ($do_source) {
906
+ add_bytes("$_/Sources");
907
+ add_bytes("$_/Sources.gz");
908
+ add_bytes("$_/Sources.bz2");
909
+ add_bytes("$_/Sources.diff/Index");
910
+ }
911
+ }
912
+
913
+ # Get and parse MD5SUMS files for D-I images.
914
+ di_add_files() if @di_dists;
915
+
916
+ say("Get Packages and Sources files and other miscellany.");
917
+ # Get Packages and Sources files and other miscellany.
918
+ my (@package_files, @source_files);
919
+ foreach my $dist (keys %distset) {
920
+ next unless exists $distset{$dist}{mirror};
921
+ foreach my $section (@sections) {
922
+ # no d-i in woody
923
+ next if ($section =~ /debian-installer/ && $dist eq "woody");
924
+ next if ($section =~ /debian-installer/ && $dist eq "experimental");
925
+ next if ($section =~ /debian-installer/ && $dist =~ /.*-proposed-updates/);
926
+ next if ($section =~ /debian-installer/ && $dist =~ /.*breezy-updates/ );
927
+ next if ($section =~ /debian-installer/ && $dist eq "breezy-security" );
928
+ foreach my $arch (@arches) {
929
+ get_index("dists/$dist/$section/binary-$arch", "Packages");
930
+ }
931
+ # d-i does not have separate source sections
932
+ if ($do_source && $section !~ /debian-installer/) {
933
+ get_index("dists/$dist/$section/source", "Sources");
934
+ }
935
+ get_i18n_index("dists/$dist/$section/i18n") if $i18n;
936
+ }
937
+ }
938
+ foreach (@extra_dirs) {
939
+ get_packages($_, "Packages");
940
+ get_sources($_, "Sources") if ($do_source);
941
+ }
942
+
943
+ # Set download size for meta files to actual values
944
+ $doing_meta=0;
945
+ $bytes_to_get=$bytes_meta;
946
+ $bytes_gotten=$bytes_meta;
947
+
948
+ # Sanity check. I once nuked a mirror because of this..
949
+ if (@arches && ! @package_files) {
950
+ print "Errors:\n ".join(" ",@errlog) if (@errlog);
951
+ die "Failed to download any Packages files!\n";
952
+ }
953
+ if ($do_source && ! @source_files) {
954
+ print "Errors:\n ".join(" ",@errlog) if (@errlog);
955
+ die "Failed to download any Sources files!\n";
956
+ }
957
+
958
+ if ($num_errors != 0) {
959
+ print "Errors:\n ".join(" ",@errlog) if (@errlog);
960
+ die "Failed to download some Package, Sources or Release files!\n";
961
+ }
962
+
963
+ # Really set dry-run option now if it was given. This delay is needed
964
+ # for the ftp method.
965
+ $dry_run = $dry_run_var;
966
+
967
+ # Determine size of Contents and Translation files to get.
968
+ if ($getcontents) {
969
+ # Updates of Contents files using diffs are done here; only full downloads
970
+ # are delayed.
971
+ say("Update Contents files.") if ($diff_mode ne "none");
972
+ foreach my $dist (keys %distset) {
973
+ next unless exists $distset{$dist}{mirror};
974
+ foreach my $arch (@arches) {
975
+ next if $dist=~/experimental/;
976
+ next if $dist=~/.*-proposed-updates/;
977
+ next if $arch=~/source/;
978
+ if ($diff_mode ne "none") {
979
+ if (!update_contents("dists/$dist", "Contents-$arch")) {
980
+ add_bytes("dists/$dist/Contents-$arch.gz");
981
+ }
982
+ } elsif (!check_lists ("$tempdir/dists/$dist/Contents-$arch.gz")) {
983
+ add_bytes("dists/$dist/Contents-$arch.gz");
984
+ }
985
+ }
986
+ }
987
+ }
988
+ if ($i18n) {
989
+ foreach my $dist (keys %distset) {
990
+ next unless exists $distset{$dist}{mirror};
991
+ foreach my $section (@sections) {
992
+ parse_i18n_index("dists/$dist/$section/i18n");
993
+ }
994
+ }
995
+ }
996
+
997
+ # close ftp connection to avoid timeouts, will reopen later
998
+ if ($download_method eq 'ftp') { $ftp->quit; }
999
+
1000
+ say("Parse Packages and Sources files and add to the file list everything therein.");
1001
+ {
1002
+ local $/="\n\n"; # Set input separator to read entire package
1003
+
1004
+ my ($filename, $size, $md5sum, $directory, $exclude, $include,
1005
+ $architecture, $exclude_deb_section, $limit_priority, $deb_section,
1006
+ $deb_priority);
1007
+ my $empty_mirror = 1;
1008
+
1009
+ my %arches = map { $_ => 1 } (@arches, "all");
1010
+
1011
+ $include = "(".join("|", @includes).")" if @includes;
1012
+ $exclude = "(".join("|", @excludes).")" if @excludes;
1013
+ $exclude_deb_section =
1014
+ "(".join("|", @excludes_deb_section).")" if @excludes_deb_section;
1015
+ $limit_priority = "(".join("|", @limit_priority).")" if @limit_priority;
1016
+ foreach my $file (@package_files) {
1017
+ next if (!-f $file);
1018
+ open(FILE, "<", $file) or die "$file: $!";
1019
+ for (;;) {
1020
+ my $buf;
1021
+ unless (defined( $buf = <FILE> )) {
1022
+ last if eof;
1023
+ die "$file: $!" if $!;
1024
+ }
1025
+ $_ = $buf;
1026
+ ($filename)=m/^Filename:\s+(.*)/im;
1027
+ $filename=~s:/+:/:; # remove redundant slashes in paths
1028
+ ($deb_section)=m/^Section:\s+(.*)/im;
1029
+ ($deb_priority)=m/^Priority:\s+(.*)/im;
1030
+ ($architecture)=m/^Architecture:\s+(.*)/im;
1031
+ next if (!$arches{$architecture});
1032
+ if(!(defined($include) && ($filename=~/$include/o))) {
1033
+ next if (defined($exclude) && $filename=~/$exclude/o);
1034
+ next if (defined($exclude_deb_section) && defined($deb_section)
1035
+ && $deb_section=~/$exclude_deb_section/o);
1036
+ next if (defined($limit_priority) && defined($deb_priority)
1037
+ && ! ($deb_priority=~/$limit_priority/o));
1038
+ }
1039
+ # File was listed in state cache, or file occurs multiple times
1040
+ if (exists $files{$filename}) {
1041
+ if ($files{$filename} >= 0) {
1042
+ $files{$filename} = 1 if $files{$filename} == 2;
1043
+ $empty_mirror = 0;
1044
+ next;
1045
+ } else { # download failed previous run, retry
1046
+ $files{$filename} = 0;
1047
+ }
1048
+ }
1049
+ ($size)=m/^Size:\s+(\d+)/im;
1050
+ ($md5sum)=m/^MD5sum:\s+([A-Za-z0-9]+)/im;
1051
+ if (check_file($filename, $size, $md5sum)) {
1052
+ $files{$filename} = 1;
1053
+ } else {
1054
+ $files{$filename} = 0;
1055
+ $file_lists{$filename}{md5} = $md5sum;
1056
+ $file_lists{$filename}{size} = $size;
1057
+ $bytes_to_get += $size;
1058
+ }
1059
+ $empty_mirror = 0;
1060
+ }
1061
+ close(FILE);
1062
+ }
1063
+ foreach my $file (@source_files) {
1064
+ next if (!-f $file);
1065
+ open(FILE, "<", $file) or die "$file: $!";
1066
+ for (;;) {
1067
+ my $buf = "";
1068
+ unless (defined( $buf = <FILE> )) {
1069
+ last if eof;
1070
+ die "$file: $!" if $!;
1071
+ }
1072
+ $_ = $buf;
1073
+ ($directory) = m/^Directory:\s+(.*)/im;
1074
+ ($deb_section)=m/^Section:\s+(.*)/im;
1075
+ ($deb_priority)=m/^Priority:\s+(.*)/im;
1076
+ next if (defined($exclude_deb_section) && defined($deb_section)
1077
+ && $deb_section=~/$exclude_deb_section/o);
1078
+ next if (defined($limit_priority) && defined($deb_priority)
1079
+ && ! ($deb_priority=~/$limit_priority/o));
1080
+ while (m/^ ([A-Za-z0-9]{32} .*)/mg) {
1081
+ ($md5sum, $size, $filename)=split(' ', $1, 3);
1082
+ $filename="$directory/$filename";
1083
+ $filename=~s:/+:/:; # remove redundant slashes in paths
1084
+ if(!(defined($include) && $filename=~/$include/o)) {
1085
+ next if (defined($exclude) && $filename=~/$exclude/o);
1086
+ }
1087
+ # File was listed in state cache, or file occurs multiple times
1088
+ if (exists $files{$filename}) {
1089
+ if ($files{$filename} >= 0) {
1090
+ $files{$filename} = 1 if $files{$filename} == 2;
1091
+ $empty_mirror = 0;
1092
+ next;
1093
+ } else { # download failed previous run, retry
1094
+ $files{$filename} = 0;
1095
+ }
1096
+ }
1097
+ if (check_file($filename, $size, $md5sum)) {
1098
+ $files{$filename} = 1;
1099
+ } else {
1100
+ $files{$filename} = 0;
1101
+ $file_lists{$filename}{md5} = $md5sum;
1102
+ $file_lists{$filename}{size} = $size;
1103
+ $bytes_to_get += $size;
1104
+ }
1105
+ }
1106
+ $empty_mirror = 0;
1107
+ }
1108
+ close(FILE);
1109
+ }
1110
+
1111
+ # Sanity check to avoid completely nuking a mirror.
1112
+ if ($empty_mirror) {
1113
+ print "Errors:\n ".join(" ",@errlog) if (@errlog);
1114
+ die "No packages after parsing Packages and Sources files!\n";
1115
+ }
1116
+ }
1117
+
1118
+ # Pre-mirror cleanup
1119
+ cleanup_unknown_files() if ($cleanup && ! $post_cleanup);
1120
+
1121
+ say("Download all files that we need to get (".print_dl_size($bytes_to_get - $bytes_gotten).").");
1122
+ init_connection;
1123
+
1124
+ # Download Contents and Translation files.
1125
+ get_contents_files() if ($getcontents);
1126
+ get_i18n_files() if ($i18n);
1127
+
1128
+ # Download all package files that we need to get.
1129
+ say("Get package files.");
1130
+ DOWNLOAD: {
1131
+ $_ = $download_method;
1132
+
1133
+ # hftp (ftp using http mirror) method
1134
+ /^hftp$/ && do {
1135
+ # LWP stuff
1136
+ my $dirname;
1137
+ my $i=0;
1138
+ foreach my $file (sort keys %files) {
1139
+ if (!$files{$file}) {
1140
+ if (($dirname) = $file =~ m:(.*)/:) {
1141
+ make_dir($dirname);
1142
+ }
1143
+ hftp_get($file);
1144
+ if ($max_batch > 0 && ++$i >= $max_batch) {
1145
+ push (@errlog,"Batch limit exceeded, mirror run was partial\n");
1146
+ $num_errors++;
1147
+ last;
1148
+ }
1149
+ }
1150
+ }
1151
+ last DOWNLOAD;
1152
+ };
1153
+
1154
+ # http method
1155
+ /^http$/ && do {
1156
+ # LWP stuff
1157
+ my $dirname;
1158
+ my $i=0;
1159
+ foreach my $file (sort keys %files) {
1160
+ if (!$files{$file}) {
1161
+ if (($dirname) = $file =~ m:(.*)/:) {
1162
+ make_dir($dirname);
1163
+ }
1164
+ http_get($file);
1165
+ if ($max_batch > 0 && ++$i >= $max_batch) {
1166
+ push (@errlog,"Batch limit exceeded, mirror run was partial\n");
1167
+ $num_errors++;
1168
+ last;
1169
+ }
1170
+ }
1171
+ }
1172
+ last DOWNLOAD;
1173
+ };
1174
+
1175
+ # ftp method
1176
+ /^ftp$/ && do {
1177
+ my $dirname;
1178
+ my $i=0;
1179
+ foreach my $file (sort keys %files) {
1180
+ if (!$files{$file}) {
1181
+ if (($dirname) = $file =~ m:(.*)/:) {
1182
+ make_dir($dirname);
1183
+ }
1184
+ ftp_get($file);
1185
+ if ($max_batch > 0 && ++$i >= $max_batch) {
1186
+ push (@errlog,"Batch limit exceeded, mirror run was partial\n");
1187
+ $num_errors++;
1188
+ last;
1189
+ }
1190
+ }
1191
+ }
1192
+ last DOWNLOAD;
1193
+ };
1194
+
1195
+ # rsync method
1196
+ /^rsync$/ && do {
1197
+ my $opt=$rsync_options;
1198
+ my $fh;
1199
+ my @result;
1200
+ my $i=0;
1201
+ my $j=0;
1202
+ $opt = "$opt --progress" if $progress;
1203
+ $opt = "$opt -v" if $verbose;
1204
+ $opt = "$opt -v" if $debug;
1205
+ $opt = "$opt -n" if $dry_run;
1206
+ foreach my $file (sort keys %files) {
1207
+ if (!$files{$file}) {
1208
+ my $dirname;
1209
+ my @dir;
1210
+ ($dirname) = $file =~ m:(.*/):;
1211
+ @dir= split(/\//, $dirname);
1212
+ for (0..$#dir) {
1213
+ push (@result, "" . join('/', @dir[0..$_]) . "/");
1214
+ }
1215
+ push (@result, "$file");
1216
+ if (++$j >= $rsync_batch) {
1217
+ $j = 0;
1218
+ ($fh, $rsynctempfile) = tempfile();
1219
+ if (@result) {
1220
+ @result = sort(@result);
1221
+ my $prev = "not equal to $result[0]";
1222
+ @result = grep($_ ne $prev && ($prev = $_, 1), @result);
1223
+ for (@result) {
1224
+ print $fh "$_\n";
1225
+ }
1226
+ }
1227
+ system ("rsync --timeout=$timeout $opt $rsyncremote --include-from=$rsynctempfile --exclude='*' $mirrordir");
1228
+ close $fh;
1229
+ unlink $rsynctempfile;
1230
+ foreach my $dest (@result) {
1231
+ if (-f $dest) {
1232
+ if (!check_lists($dest)) {
1233
+ say("$dest failed md5sum check");
1234
+ $num_errors++;
1235
+ }
1236
+ } elsif (!-d $dest) {
1237
+ say("$dest missing");
1238
+ $num_errors++;
1239
+ }
1240
+ }
1241
+ @result = ();
1242
+ }
1243
+ if ($max_batch > 0 && ++$i >= $max_batch) {
1244
+ print "Batch limit exceeded, mirror run will be partial\n";
1245
+ push (@errlog,"Batch limit exceeded, mirror run was partial\n");
1246
+ $num_errors++;
1247
+ last;
1248
+ }
1249
+ }
1250
+ }
1251
+ ($fh, $rsynctempfile) = tempfile();
1252
+ if (@result) {
1253
+ @result = sort(@result);
1254
+ my $prev = "not equal to $result[0]";
1255
+ @result = grep($_ ne $prev && ($prev = $_, 1), @result);
1256
+ for (@result) {
1257
+ print $fh "$_\n";
1258
+ }
1259
+ system ("rsync --timeout=$timeout $opt $rsyncremote --include-from=$rsynctempfile --exclude='*' $mirrordir");
1260
+ close $fh;
1261
+ foreach my $dest (@result) {
1262
+ if (-f $dest) {
1263
+ if (!check_lists($dest)) {
1264
+ say("$dest failed md5sum check");
1265
+ $num_errors++;
1266
+ }
1267
+ } elsif (!-d $dest) {
1268
+ say("$dest missing");
1269
+ $num_errors++;
1270
+ }
1271
+ }
1272
+ }
1273
+ last DOWNLOAD;
1274
+ };
1275
+ }
1276
+
1277
+ if (! @di_dists) {
1278
+ download_finished();
1279
+ }
1280
+
1281
+ say("Everything OK. Moving meta files.");
1282
+ chdir($tempdir) or die "unable to chdir($tempdir): $!\n";
1283
+ my $res=0;
1284
+ foreach my $file (`find . -type f`) {
1285
+ chomp $file;
1286
+ $file=~s:^\./::;
1287
+ # this skips diff files if unwanted
1288
+ next if (!exists $files{$file});
1289
+ print("Moving $file\n") if ($debug);
1290
+ if (! $dry_run) {
1291
+ $res &= unlink($mirrordir."/".$file) if ($mirrordir."/".$file);
1292
+ "$file" =~ m,(^.*)/,;
1293
+ make_dir("$mirrordir/$1");
1294
+ if (!link($file, $mirrordir."/".$file)) {
1295
+ $res &= system("cp $file $mirrordir/$file");
1296
+ }
1297
+ }
1298
+ }
1299
+ chdir($mirrordir) or die "chdir $mirrordir: $!";
1300
+
1301
+ # Get optional directories using rsync.
1302
+ rsync_extra(0, @rsync_extra);
1303
+
1304
+ # Download D-I images.
1305
+ if (@di_dists) {
1306
+ di_get_files();
1307
+ download_finished();
1308
+ }
1309
+
1310
+ # Update suite->codename symlinks
1311
+ if (! $omit_suite_symlinks && ! $dry_run) {
1312
+ my %suites;
1313
+ opendir (DIR, 'dists') or die "Can't open dists/: $!\n";
1314
+ foreach my $file (grep (!/^\.\.?$/, readdir (DIR))) {
1315
+ if (-l "dists/$file") {
1316
+ my $cur = readlink("dists/$file") or die "Error reading symlink dists/$file: $!";
1317
+ if (exists $distset{$cur}{suite} &&
1318
+ ($file eq $distset{$cur}{suite} || $file eq "stable-$distset{$cur}{suite}")) {
1319
+ $suites{$file} = "ok";
1320
+ } else {
1321
+ unlink("dists/$file") or die "Failed to remove symlink dists/$file: $!";
1322
+ }
1323
+ }
1324
+ }
1325
+ closedir (DIR);
1326
+
1327
+ foreach my $dist (keys %distset) {
1328
+ next if (! exists $distset{$dist}{suite});
1329
+ next if (!-d "dists/$dist");
1330
+ my $suite = $distset{$dist}{suite};
1331
+ if (! exists $suites{$suite}) {
1332
+ symlink("$dist", "dists/$suite") or die "Failed to create symlink dists/$suite: $!";
1333
+ }
1334
+ if ($suite eq "proposed-updates"&& !exists $suites{"stable-$suite"}) {
1335
+ symlink("$dist", "dists/stable-$suite") or die "Failed to create symlink dists/stable-$suite: $!";
1336
+ }
1337
+ }
1338
+ }
1339
+
1340
+ # Write out trace file.
1341
+ if (! $dry_run) {
1342
+ make_dir("project/trace");
1343
+ open OUT, ">$tracefile" or die "$tracefile: $!";
1344
+ print OUT `date -u`;
1345
+ close OUT;
1346
+ }
1347
+
1348
+ # Post mirror cleanup
1349
+ cleanup_unknown_files() if ($post_cleanup);
1350
+
1351
+ # mirror cleanup for directories
1352
+ if (! $use_cache && ($cleanup || $post_cleanup)) {
1353
+ # Remove all empty directories. Not done as part of main cleanup
1354
+ # to prevent race problems with pool download code, which
1355
+ # makes directories.. Sort so they are removable in bottom-up
1356
+ # order.
1357
+ chdir($mirrordir) or die "chdir $mirrordir: $!";
1358
+ system("find . -depth -type d ! -name . ! -name .. -print0 | xargs -0 rmdir 2>/dev/null") if (! $dry_run);
1359
+ }
1360
+
1361
+ if ($res != 0) {
1362
+ die("Failed to move some meta files.");
1363
+ }
1364
+
1365
+ # Save the state cache.
1366
+ save_state_cache() if $state_cache_days && !$dry_run;
1367
+
1368
+ say("All done.");
1369
+ $lock->release;
1370
+ print "Errors:\n ".join(" ",@errlog) if (@errlog);
1371
+ if ($num_errors != 0) {
1372
+ print "Failed to download files ($num_errors errors)!\n";
1373
+ exit 1 if (!$ignore_small_errors);
1374
+ }
1375
+
1376
+ exit;
1377
+
1378
+ sub print_dl_size {
1379
+ my $size=shift;
1380
+ my $unit;
1381
+ if ($size >= 10*1000*1024) {
1382
+ $size=int($size/1024/1024);
1383
+ $unit="MiB";
1384
+ } elsif ($size >= 10*1000) {
1385
+ $size=int($size/1024);
1386
+ $unit="kiB";
1387
+ } else {
1388
+ $unit="B";
1389
+ }
1390
+ return "$size $unit";
1391
+ }
1392
+
1393
+ sub add_bytes_gotten {
1394
+ my $size=shift;
1395
+ $bytes_gotten += $size;
1396
+ if ($doing_meta) {
1397
+ $bytes_meta += $size;
1398
+ }
1399
+ }
1400
+
1401
+ # Pass this function a filename, a file size (bytes), and a md5sum (hex).
1402
+ # Size is always checked; checking the md5sum is optional. However, if
1403
+ # a value of -1 is passed for size, a check of the md5sum is forced.
1404
+ # It will return true if the tests show the file matches.
1405
+ sub check_file {
1406
+ my ($filename, $size, $md5sum)=@_;
1407
+ if (-f $filename and ($size == -s _ || $size == -1)) {
1408
+ if ($check_md5sums || $size == -1) {
1409
+ open HANDLE, $filename or
1410
+ die "$filename: $!";
1411
+ $md5->addfile(*HANDLE);
1412
+ my $digest = $md5->hexdigest;
1413
+ return ($md5sum eq $digest);
1414
+ }
1415
+ else {
1416
+ # Assume it is ok, w/o md5 check.
1417
+ return 1;
1418
+ }
1419
+ }
1420
+ return 0;
1421
+ }
1422
+
1423
+ # Always checks both file size and sha1 as the files get updated (this is
1424
+ # similar to what is done in check_lists, which forces check_md5sums).
1425
+ sub check_i18n {
1426
+ my ($filename, $size, $sha1)=@_;
1427
+ my $digest = Digest::SHA1->new;
1428
+ my $ret = 0;
1429
+
1430
+ if (-f "$filename" and ($size == -s _)) {
1431
+ open HANDLE, $filename or die "$filename: $!";
1432
+ $digest->addfile(*HANDLE);
1433
+ $ret = ($sha1 eq $digest->hexdigest);
1434
+ }
1435
+ return $ret;
1436
+ }
1437
+
1438
+ # Check uncompressed diff content against sha1sum from Index file.
1439
+ sub check_diff {
1440
+ my ($filename, $size, $sha1) = @_;
1441
+ my $digest = Digest::SHA1->new;
1442
+ my $ret = 0;
1443
+
1444
+ if (-f "$filename.gz") {
1445
+ system_redirect_io("gzip -d", "$filename.gz", "$filename");
1446
+ if ($size == -s $filename) {
1447
+ open HANDLE, $filename or die "$filename: $!";
1448
+ $digest->addfile(*HANDLE);
1449
+ $ret = ($sha1 eq $digest->hexdigest);
1450
+ }
1451
+ unlink ($filename);
1452
+ }
1453
+ return $ret;
1454
+ }
1455
+
1456
+ # Check file against md5sum and size from the Release file.
1457
+ # It will return true if the md5sum matches.
1458
+ sub check_lists {
1459
+ my $file = shift;
1460
+ my $t = $check_md5sums;
1461
+ my $ret = 1;
1462
+ $check_md5sums = 1;
1463
+ if (exists $file_lists{$file}) {
1464
+ $ret = check_file($file, $file_lists{$file}{size}, $file_lists{$file}{md5});
1465
+ }
1466
+ $check_md5sums = $t;
1467
+ return $ret;
1468
+ }
1469
+
1470
+ sub remote_get {
1471
+ my $file=shift;
1472
+ my $tdir=shift;
1473
+ my $res;
1474
+ return 1 if ($skippackages);
1475
+ $tdir=$tempdir unless $tdir;
1476
+ chdir($tdir) or die "unable to chdir($tdir): $!\n";
1477
+
1478
+ METHOD: {
1479
+ $_ = $download_method;
1480
+
1481
+ /^hftp$/ && do {
1482
+ $res=hftp_get($file);
1483
+ $res=$res && check_lists($file);
1484
+ if (!$res) {
1485
+ say("$file failed md5sum check, removing");
1486
+ unlink($file) if (-f $file);
1487
+ }
1488
+ };
1489
+
1490
+ /^http$/ && do {
1491
+ $res=http_get($file);
1492
+ $res=$res && check_lists($file);
1493
+ if (!$res) {
1494
+ say("$file failed md5sum check, removing");
1495
+ unlink($file) if (-f $file);
1496
+ }
1497
+ };
1498
+
1499
+ /^ftp$/ && do {
1500
+ $res=ftp_get($file);
1501
+ $res=$res && check_lists($file);
1502
+ if (!$res) {
1503
+ say("$file failed md5sum check, removing");
1504
+ unlink($file) if (-f $file);
1505
+ }
1506
+ };
1507
+
1508
+ /^rsync$/ && do {
1509
+ $res=rsync_get($file);
1510
+ $res=$res && check_lists($file);
1511
+ if (!$res) {
1512
+ say("$file failed md5sum check");
1513
+ # FIXME: make sure the size doesn't match so it gets retried
1514
+ }
1515
+ };
1516
+ }
1517
+
1518
+ chdir($mirrordir) or die "unable to chdir($mirrordir): $!\n";
1519
+ return $res;
1520
+ }
1521
+
1522
+ # Get a file via hftp, first displaying its filename if progress is on.
1523
+ sub hftp_get {
1524
+ my $oldautoflush = $|;
1525
+ $| = 1;
1526
+ my $file=shift;
1527
+ my $url="ftp://${host}/${remoteroot}/${file}";
1528
+ my $ret=1;
1529
+
1530
+ print "$url => " if ($debug);
1531
+ if ($progress || $verbose) {
1532
+ print "Getting: $file... ";
1533
+ }
1534
+ if (! $dry_run) {
1535
+ unlink($file) if (-f $file);
1536
+ $ret = $ua->mirror($url, $file);
1537
+ print $ret->status_line . "\n" if ($debug);
1538
+ if ($ret->is_error) {
1539
+ $files{$file} = -1;
1540
+ warn "$file failed " . $ret->status_line . "\n" if ($progress or $verbose);
1541
+ push (@errlog,"Download of $file failed: ".$ret->status_line."\n");
1542
+ $num_errors++;
1543
+ } elsif ($progress || $verbose) {
1544
+ print "ok\n";
1545
+ }
1546
+ $ret = not ( $ret->is_error );
1547
+ } elsif ($progress || $verbose) {
1548
+ print "ok\n";
1549
+ }
1550
+ $| = $oldautoflush;
1551
+ return $ret;
1552
+ }
1553
+
1554
+ # Get a file via http, first displaying its filename if progress is on.
1555
+ sub http_get {
1556
+ my $oldautoflush = $|;
1557
+ $| = 1;
1558
+ my $file=shift;
1559
+ my $percent = 0;
1560
+ my $url="http://${host}/${remoteroot}/${file}";
1561
+ my $ret=1;
1562
+ $percent = sprintf("%3.0f",(($bytes_gotten/$bytes_to_get)*100)) unless($bytes_to_get == 0);
1563
+
1564
+ print "$url => " if ($debug);
1565
+ if ($progress || $verbose) {
1566
+ print "[$percent%] Getting: $file... ";
1567
+ }
1568
+ if (! $dry_run) {
1569
+ unlink($file) if (-f $file);
1570
+ $ret = $ua->mirror($url, $file);
1571
+ print $ret->status_line . "\n" if ($debug);
1572
+ if ($ret->is_error) {
1573
+ $files{$file} = -1;
1574
+ warn "$file failed " . $ret->status_line . "\n" if ($progress or $verbose);
1575
+ push (@errlog,"Download of $file failed: ".$ret->status_line."\n");
1576
+ $num_errors++;
1577
+ } elsif ($progress || $verbose) {
1578
+ print "ok\n";
1579
+ }
1580
+ $ret = not ( $ret->is_error );
1581
+ } elsif ($progress || $verbose) {
1582
+ print "ok\n";
1583
+ }
1584
+ # Account for actual bytes gotten
1585
+ my @stat = stat $file;
1586
+ add_bytes_gotten($stat[7]) if (@stat);
1587
+
1588
+ $| = $oldautoflush;
1589
+ return $ret;
1590
+ }
1591
+
1592
+ # Get a file via ftp, first displaying its filename if progress is on.
1593
+ # I should just be able to subclass Net::Ftp and override the get method,
1594
+ # but it's late.
1595
+ sub ftp_get {
1596
+ my $oldautoflush = $|;
1597
+ $| = 1;
1598
+ my $file=shift;
1599
+ my $percent = 0;
1600
+ my $mtime;
1601
+ $percent = sprintf("%3.0f",(($bytes_gotten/$bytes_to_get)*100)) unless($bytes_to_get == 0);
1602
+
1603
+ my @stat = stat $file;
1604
+ if (@stat) { # already have the file?
1605
+ my $size = $ftp->size($file);
1606
+ my $mtime = $ftp->mdtm($file);
1607
+ if ($mtime && $size
1608
+ && $size == $stat[7]
1609
+ && $mtime == $stat[9]) { # size and time match
1610
+ print "[$percent%] Keeping: $file\n" if ($progress || $verbose);
1611
+ add_bytes_gotten($size);
1612
+ return 1;
1613
+ }
1614
+ }
1615
+ if ($progress) {
1616
+ print "[$percent%] Getting: $file\t #";
1617
+ } elsif ($verbose) {
1618
+ print "[$percent%] Getting: $file";
1619
+ }
1620
+ my $ret=1;
1621
+ if (! $dry_run) {
1622
+ unlink($file) if (-f $file);
1623
+ $ret = $ftp->get($file, $file);
1624
+ if ($ret) {
1625
+ my $mtime=$ftp->mdtm($file);
1626
+ utime($mtime, $mtime, $file) if defined $mtime;
1627
+ } else {
1628
+ $files{$file} = -1;
1629
+ warn " failed:".$ftp->message if ($progress or $verbose);
1630
+ push (@errlog,"Download of $file failed: ".$ftp->message."\n");
1631
+ $num_errors++;
1632
+ }
1633
+ }
1634
+ my $size=$ftp->size($file);
1635
+ add_bytes_gotten($size) if $size;
1636
+ $| = $oldautoflush;
1637
+ print "\n" if (($verbose and not $progress) or ($dry_run and $progress));
1638
+ return $ret;
1639
+ }
1640
+
1641
+ sub rsync_get {
1642
+ my $file=shift;
1643
+ my $opt=$rsync_options;
1644
+ (my $dirname) = $file =~ m:(.*/):;
1645
+ my @dir= split(/\//, $dirname);
1646
+ for (0..$#dir) {
1647
+ $opt = "$opt --include=" . join('/', @dir[0..$_]) . "/";
1648
+ }
1649
+ $opt = "$opt --progress" if $progress;
1650
+ $opt = "$opt -v" if $debug;
1651
+ system ("rsync --timeout=$timeout $opt $rsyncremote --include=$file --exclude='*' .");
1652
+ if ($? == 0 && -f $file) {
1653
+ return 1;
1654
+ } else {
1655
+ $files{$file} = -1;
1656
+ push (@errlog,"Download of $file failed\n");
1657
+ $num_errors++;
1658
+ return 0;
1659
+ }
1660
+ }
1661
+
1662
+ sub rsync_extra {
1663
+ my ($early, @extras) = @_;
1664
+ my @includes;
1665
+
1666
+ # @ignores is updated during $early to prevent removal of files
1667
+ # if cleanup is done early.
1668
+ for my $type (@extras) {
1669
+ if ($early) {
1670
+ if ($type eq "trace") {
1671
+ push(@includes, "- /project/trace/$hostname");
1672
+ push(@includes, "/project/trace/*");
1673
+ push(@ignores, "^project/trace/");
1674
+ say("Update remote trace files (using rsync).");
1675
+ } elsif ($type eq "doc") {
1676
+ push(@ignores, "^doc/");
1677
+ push(@ignores, "^README*");
1678
+ } elsif ($type eq "tools") {
1679
+ push(@ignores, "^tools/");
1680
+ } elsif ($type eq "indices") {
1681
+ push(@ignores, "^indices/");
1682
+ }
1683
+ } else {
1684
+ if ($type eq "doc") {
1685
+ push(@includes, "/doc/***");
1686
+ push(@includes, "/README*");
1687
+ } elsif ($type eq "tools") {
1688
+ push(@includes, "/tools/***");
1689
+ } elsif ($type eq "indices") {
1690
+ push(@includes, "/indices/***");
1691
+ }
1692
+ }
1693
+ }
1694
+ return if (! @includes);
1695
+ if (! $early) {
1696
+ @extras = grep(!/^trace$/, @extras); # drop 'trace' from list
1697
+ say("Update extra files (using rsync): @extras.");
1698
+ }
1699
+ rsync_extra_get(@includes);
1700
+ }
1701
+
1702
+ sub rsync_extra_get {
1703
+ my @includes = @_;
1704
+ my $fh;
1705
+ my @result;
1706
+
1707
+ my $opt=$rsync_options;
1708
+ $opt = "$opt --progress" if $progress;
1709
+ $opt = "$opt -v" if $verbose;
1710
+ $opt = "$opt -v" if $debug;
1711
+ $opt = "$opt -n" if $dry_run;
1712
+
1713
+ ($fh, $rsynctempfile) = tempfile();
1714
+ foreach my $line (@includes) {
1715
+ if ($line !~ /^- /) {
1716
+ my $dirname;
1717
+ my @dir;
1718
+ ($dirname) = ($line =~ m:(.*/):);
1719
+ @dir= split(/\//, $dirname);
1720
+ for (1..$#dir) {
1721
+ push (@result, "" . join('/', @dir[0..$_]) . "/");
1722
+ }
1723
+ }
1724
+ push (@result, "$line");
1725
+ }
1726
+ for (@result) {
1727
+ print $fh "$_\n";
1728
+ }
1729
+ system ("rsync --timeout=$timeout $opt $rsyncremote --delete --include-from=$rsynctempfile --exclude='*' $mirrordir");
1730
+ close $fh;
1731
+ unlink $rsynctempfile;
1732
+ }
1733
+
1734
+ # run system() with stdin and stdout redirected to files
1735
+ # unlinks stdout target file first to break hard links
1736
+ sub system_redirect_io {
1737
+ my ($command, $fromfile, $tofile) = @_;
1738
+
1739
+ if (-f $tofile) {
1740
+ unlink($tofile) or die "unlink($tofile) failed: $!";
1741
+ }
1742
+ system("$command <$fromfile >$tofile");
1743
+ }
1744
+
1745
+ sub split_dist {
1746
+ my $dist = shift;
1747
+ my ($dist_raw) = ($dist =~ m:^([^/]+)/?:);
1748
+ $dist =~ m:^[^/]+(/.*)?$:;
1749
+ my $dist_sdir = $1 // "";
1750
+ return ($dist_raw, $dist_sdir);
1751
+ }
1752
+
1753
+ sub get_release {
1754
+ my ($tdir, $dist) = @_;
1755
+
1756
+ make_dir ("$tdir");
1757
+ return 0 unless remote_get("dists/$dist/Release", "$tempdir/.tmp");
1758
+ my $t = $num_errors;
1759
+ return 0 unless remote_get("dists/$dist/Release.gpg", "$tempdir/.tmp");
1760
+ # Check for gpg
1761
+ if (!$ignore_release_gpg) {
1762
+ if (system("gpgv --version >/dev/null 2>/dev/null")) {
1763
+ say("gpgv failed: --ignore-release-gpg or gpgv binary missing?");
1764
+ push (@errlog,"gpgv failed: --ignore-release-gpg or gpgv binary missing?\n");
1765
+ $num_errors++;
1766
+ }
1767
+ # Verify Release signature
1768
+ if (-f "$tdir/Release.gpg" || -f "$tdir/Release") {
1769
+ my $gpgv_res="failed";
1770
+ open GPGV, "gpgv 2>/dev/null --status-fd 1 $tdir/Release.gpg $tdir/Release|";
1771
+ while (<GPGV>) {
1772
+ $gpgv_res="valid" if /^\[GNUPG:\] VALIDSIG/;
1773
+ }
1774
+ close GPGV;
1775
+ if ($gpgv_res eq "failed" || $debug) {
1776
+ system("gpgv --status-fd 1 $tdir/Release.gpg $tdir/Release");
1777
+ }
1778
+ if ($verbose && !$debug) {
1779
+ system("gpgv --status-fd 1 $tdir/Release.gpg $tdir/Release >/dev/null");
1780
+ }
1781
+ if ($gpgv_res eq "failed") {
1782
+ say("Release signature does not verify.");
1783
+ push (@errlog,"Release signature does not verify\n");
1784
+ $num_errors++;
1785
+ }
1786
+ } else {
1787
+ say("Release signature does not verify, file missing.");
1788
+ push (@errlog,"Release signature does not verify\n");
1789
+ $num_errors++;
1790
+ }
1791
+ }
1792
+ $num_errors=$t if ($ignore_release_gpg);
1793
+ return 1
1794
+ }
1795
+
1796
+ sub name_release {
1797
+ my ($type, $tdir, $dist) = @_;
1798
+ my ($buf, $origin, $codename, $suite);
1799
+
1800
+ if (-f "$tdir/Release") {
1801
+ if (open RELEASE, "<$tdir/Release") {
1802
+ while (<RELEASE>) {
1803
+ last if /^MD5Sum:/;
1804
+ $buf = $buf . $_;
1805
+ }
1806
+ close RELEASE;
1807
+ }
1808
+
1809
+ $_ = $buf;
1810
+ ($origin) = m/^Origin:\s+(.*)/im;
1811
+ ($codename) = m/^Codename:\s+(.*)/im;
1812
+ ($suite) = m/^Suite:\s+(.*)/im;
1813
+ } elsif ($ignore_release) {
1814
+ $origin = "none";
1815
+ }
1816
+
1817
+ # Allow for for example "<codename|suite>/updates"; split into the
1818
+ # raw dist (codename or suite) and the subdirectory.
1819
+ my ($dist_raw, $dist_sdir) = split_dist($dist);
1820
+
1821
+ if ($origin eq "none") {
1822
+ $codename = $dist_raw;
1823
+ } elsif ($origin eq "Ubuntu") {
1824
+ if ($suite) {
1825
+ say("Ubuntu Release file: using Suite ($suite).");
1826
+ $codename = $suite;
1827
+ } else {
1828
+ say("Invalid Ubuntu Release file.");
1829
+ push (@errlog,"Invalid Ubuntu Release file.\n");
1830
+ $num_errors++;
1831
+ next;
1832
+ }
1833
+ } elsif ($codename) {
1834
+ if ($dist_raw ne $codename && $dist_raw ne $suite) {
1835
+ say("Broken Release file: neither Codename nor Suite matches $dist.");
1836
+ push (@errlog,"Broken Release file: neither Codename nor Suite matches $dist\n");
1837
+ $num_errors++;
1838
+ next;
1839
+ }
1840
+ } elsif ($suite) {
1841
+ say("Release file does not contain Codename; using Suite ($suite).");
1842
+ $codename = $suite;
1843
+ } else {
1844
+ say("Release file contains neither Codename nor Suite; using $dist.");
1845
+ $codename = $dist_raw;
1846
+ }
1847
+ # For experimental the suite is the same as the codename
1848
+ $suite = "" if (! $suite || $suite eq $codename);
1849
+
1850
+ die("Duplicate dist $codename$dist_sdir.\n")
1851
+ if exists $distset{"$codename$dist_sdir"}{$type};
1852
+ $distset{"$codename$dist_sdir"}{$type} = 1;
1853
+ die("Conflicting suites '$suite' and '$distset{$codename}{suite}' for $codename.\n")
1854
+ if (exists $distset{"$codename"}{suite} && ($suite ne $distset{$codename}{suite}));
1855
+ $distset{$codename}{suite} = "$suite" if ($suite);
1856
+
1857
+ # This should be a one-time conversion only
1858
+ if ($suite) {
1859
+ if (-d "$tempdir/dists/$suite" && !-l "$tempdir/dists/$suite") {
1860
+ rename_distdir("$tempdir/dists", $codename, $suite);
1861
+ }
1862
+ if (-d "dists/$suite" && !-l "dists/$suite") {
1863
+ rename_distdir("dists", $codename, $suite);
1864
+ }
1865
+ }
1866
+
1867
+ return ($codename, $suite, $dist_sdir);
1868
+ }
1869
+
1870
+ # Get Index file in the passed subdirectory.
1871
+ sub get_index {
1872
+ my $subdir=shift;
1873
+ my $file=shift;
1874
+ make_dir($subdir);
1875
+ make_dir("$tempdir/$subdir");
1876
+
1877
+ if (!($diff_mode eq "none") && exists $file_lists{"$tempdir/$subdir/$file.diff/Index"}) {
1878
+ if (!check_lists ("$tempdir/$subdir/$file.diff/Index")) {
1879
+ make_dir("$tempdir/$subdir/$file.diff");
1880
+ say("$subdir/$file.diff/Index needs fetch");
1881
+ if (!remote_get("$subdir/$file.diff/Index")) {
1882
+ push (@errlog,"$subdir/$file.diff/Index failed md5sum check, removing\n");
1883
+ } else {
1884
+ fetch_and_apply_diffs(0, $subdir, $file);
1885
+ if (check_lists ("$tempdir/$subdir/$file")) {
1886
+ system_redirect_io("gzip -9 -n", "$tempdir/$subdir/$file", "$tempdir/$subdir/$file.gz");
1887
+ system_redirect_io("bzip2", "$tempdir/$subdir/$file", "$tempdir/$subdir/$file.bz2");
1888
+ }
1889
+ }
1890
+ } else {
1891
+ $bytes_gotten += $file_lists{"$tempdir/$subdir/$file.diff/Index"}{size};
1892
+ fetch_and_apply_diffs(0, $subdir, "$file");
1893
+ if (check_lists ("$tempdir/$subdir/$file")) {
1894
+ system_redirect_io("gzip -9 -n", "$tempdir/$subdir/$file", "$tempdir/$subdir/$file.gz");
1895
+ system_redirect_io("bzip2", "$tempdir/$subdir/$file", "$tempdir/$subdir/$file.bz2");
1896
+ }
1897
+ }
1898
+ $files{"$subdir/$file.diff/Index"}=1 if ($diff_mode eq "mirror");
1899
+ $files{"$tempdir/$subdir/$file.diff/Index"}=1;
1900
+ }
1901
+
1902
+ if (exists $file_lists{"$tempdir/$subdir/$file.gz"}{size}) {
1903
+ if (!check_lists ("$tempdir/$subdir/$file.gz")) {
1904
+ say("$subdir/$file.gz needs fetch");
1905
+ if (remote_get("$subdir/$file.gz")) {
1906
+ system_redirect_io("gzip -d", "$tempdir/$subdir/$file.gz", "$tempdir/$subdir/$file");
1907
+ system_redirect_io("bzip2", "$tempdir/$subdir/$file", "$tempdir/$subdir/$file.bz2");
1908
+ } else {
1909
+ push (@errlog,"$subdir/$file.gz failed md5sum check\n");
1910
+ $num_errors++;
1911
+ }
1912
+ } else {
1913
+ $bytes_gotten += $file_lists{"$tempdir/$subdir/$file.gz"}{size};
1914
+ }
1915
+ } elsif ($ignore_release) {
1916
+ say("Ignoring missing Release file for $subdir/$file.gz");
1917
+ push (@errlog,"Ignoring missing Release file for $subdir/$file.gz\n");
1918
+ say("$subdir/$file.gz needs fetch");
1919
+ if (remote_get("$subdir/$file.gz")) {
1920
+ system_redirect_io("gzip -d", "$tempdir/$subdir/$file.gz", "$tempdir/$subdir/$file");
1921
+ }
1922
+ } else {
1923
+ if (-f "$subdir/$file.gz") {
1924
+ say("$subdir/$file.gz exists locally but not in Release");
1925
+ die "Won't mirror without $subdir/$file.gz signature in Release";
1926
+ } else {
1927
+ say("$subdir/$file.gz does not exist locally or in Release, skipping.") if ($debug);
1928
+ }
1929
+ }
1930
+ if (exists $file_lists{"$tempdir/$subdir/$file"}) {
1931
+ if (!check_lists ("$tempdir/$subdir/$file")) {
1932
+ say("$subdir/$file needs fetch");
1933
+ if (remote_get("$subdir/$file")) {
1934
+ system_redirect_io("bzip2", "$tempdir/$subdir/$file", "$tempdir/$subdir/$file.bz2");
1935
+ } else {
1936
+ push (@errlog,"$subdir/$file failed md5sum check\n");
1937
+ $num_errors++;
1938
+ }
1939
+ } else {
1940
+ $bytes_gotten += $file_lists{"$tempdir/$subdir/$file"}{size};
1941
+ }
1942
+ }
1943
+ if (exists $file_lists{"$tempdir/$subdir/$file.bz2"}) {
1944
+ if (!check_lists ("$tempdir/$subdir/$file.bz2")) {
1945
+ say("$subdir/$file.bz2 needs fetch");
1946
+ if (!remote_get("$subdir/$file.bz2")) {
1947
+ push (@errlog,"$subdir/$file.bz2 failed md5sum check, removing\n");
1948
+ }
1949
+ } else {
1950
+ $bytes_gotten += $file_lists{"$tempdir/$subdir/$file.bz2"}{size};
1951
+ }
1952
+ }
1953
+ if (exists $file_lists{"$tempdir/$subdir/Release"}) {
1954
+ if (!check_lists ("$tempdir/$subdir/Release")) {
1955
+ say("$subdir/Release needs fetch");
1956
+ if (!remote_get("$subdir/Release")) {
1957
+ push (@errlog,"$subdir/Release failed md5sum check, removing\n");
1958
+ }
1959
+ } else {
1960
+ $bytes_gotten += $file_lists{"$tempdir/$subdir/Release"}{size};
1961
+ }
1962
+ }
1963
+ if ($file eq "Packages") {
1964
+ push @package_files, "$tempdir/$subdir/$file";
1965
+ } else {
1966
+ if ($file eq "Sources") {
1967
+ push @source_files, "$tempdir/$subdir/$file";
1968
+ } else {
1969
+ die "get_index called with unknown type $file\n";
1970
+ }
1971
+ }
1972
+ $files{"$subdir/$file.gz"}=1;
1973
+ $files{"$subdir/$file.bz2"}=1;
1974
+ # Uncompressed files are no longer kept on the mirrors
1975
+ $files{"$subdir/$file"}=1 unless exists $file_lists{"$tempdir/$subdir/$file.gz"};
1976
+ $files{"$subdir/Release"}=1;
1977
+ $files{"$tempdir/$subdir/$file.gz"}=1;
1978
+ $files{"$tempdir/$subdir/$file.bz2"}=1;
1979
+ $files{"$tempdir/$subdir/$file"}=1;
1980
+ $files{"$tempdir/$subdir/Release"}=1;
1981
+ }
1982
+
1983
+ sub update_contents {
1984
+ my ($subdir, $file) = @_;
1985
+
1986
+ my $file_ok = check_lists("$tempdir/$subdir/$file.gz");
1987
+
1988
+ # Get the Index file for the diffs
1989
+ if (exists $file_lists{"$tempdir/$subdir/$file.diff/Index"}) {
1990
+ if (!check_lists ("$tempdir/$subdir/$file.diff/Index")) {
1991
+ make_dir("$tempdir/$subdir/$file.diff");
1992
+ say("$subdir/$file.diff/Index needs fetch");
1993
+ if (!remote_get("$subdir/$file.diff/Index")) {
1994
+ push (@errlog,"$subdir/$file.diff/Index failed md5sum check, removing\n");
1995
+ return $file_ok;
1996
+ }
1997
+ #FIXME: before download
1998
+ if (-f "$tempdir/$subdir/$file.diff/Index") {
1999
+ $bytes_to_get += -s "$tempdir/$subdir/$file.diff/Index";
2000
+ }
2001
+ }
2002
+ $files{"$subdir/$file.diff/Index"}=1 if ($diff_mode eq "mirror");
2003
+ $files{"$tempdir/$subdir/$file.diff/Index"}=1;
2004
+ } else {
2005
+ return $file_ok;
2006
+ }
2007
+
2008
+ if (! -f "$tempdir/$subdir/$file.gz" || $file_ok) {
2009
+ # fetch diffs only
2010
+ fetch_and_apply_diffs(1, $subdir, $file);
2011
+ return $file_ok;
2012
+ }
2013
+
2014
+ # Uncompress the Contents file
2015
+ system_redirect_io("gzip -d", "$tempdir/$subdir/$file.gz", "$tempdir/$subdir/$file");
2016
+ # Update it
2017
+ fetch_and_apply_diffs(0, $subdir, $file);
2018
+ # And compress it again
2019
+ if (-f "$tempdir/$subdir/$file") {
2020
+ system_redirect_io("gzip -9 -n", "$tempdir/$subdir/$file", "$tempdir/$subdir/$file.gz");
2021
+ unlink "$tempdir/$subdir/$file";
2022
+ }
2023
+
2024
+ return check_lists("$tempdir/$subdir/$file.gz");
2025
+ }
2026
+
2027
+ sub get_contents_files {
2028
+ my $first = 1;
2029
+ foreach my $dist (keys %distset) {
2030
+ next unless exists $distset{$dist}{mirror};
2031
+ foreach my $arch (@arches) {
2032
+ next if $dist=~/experimental/;
2033
+ next if $dist=~/.*-proposed-updates/;
2034
+ next if $arch=~/source/;
2035
+ if (!check_lists ("$tempdir/dists/$dist/Contents-$arch.gz")) {
2036
+ if ($first) {
2037
+ say("Get Contents files.");
2038
+ $first = 0;
2039
+ }
2040
+ remote_get("dists/$dist/Contents-$arch.gz");
2041
+ }
2042
+ $files{"dists/$dist/Contents-$arch.gz"}=1;
2043
+ $files{$tempdir."/"."dists/$dist/Contents-$arch.gz"}=1;
2044
+ }
2045
+ }
2046
+ }
2047
+
2048
+ sub get_i18n_index {
2049
+ my $subdir=shift;
2050
+ if (exists $file_lists{"$tempdir/$subdir/Index"}) {
2051
+ make_dir($subdir);
2052
+ make_dir("$tempdir/$subdir");
2053
+
2054
+ if (!check_lists ("$tempdir/$subdir/Index")) {
2055
+ say("$subdir/Release needs fetch");
2056
+ if (!remote_get("$subdir/Index")) {
2057
+ push (@errlog,"$subdir/Index failed md5sum check, removing\n");
2058
+ }
2059
+ } else {
2060
+ $bytes_gotten += $file_lists{"$tempdir/$subdir/Index"}{size};
2061
+ }
2062
+ $files{"$subdir/Index"}=1;
2063
+ $files{"$tempdir/$subdir/Index"}=1;
2064
+ }
2065
+ }
2066
+
2067
+ sub parse_i18n_index {
2068
+ my $subdir = shift;
2069
+ my ($sha1, $size, $filename);
2070
+ my $exclude = "(".join("|", @excludes).")" if @excludes;
2071
+ my $include = "(".join("|", @includes).")" if @includes;
2072
+
2073
+ # Parse the Index file
2074
+ if (open INDEX, "<$tempdir/$subdir/Index") {
2075
+ while (<INDEX>) {
2076
+ last if /^SHA1:/;
2077
+ }
2078
+ while (<INDEX>) {
2079
+ next unless /^ /;
2080
+
2081
+ my ($sha1, $size, $filename) = (/ ([a-z0-9]+) +(\d+) +(.*)$/);
2082
+ if(!(defined($include) && ($subdir."/".$filename)=~/$include/o)) {
2083
+ next if (defined($exclude) && ($subdir."/".$filename)=~/$exclude/o);
2084
+ }
2085
+
2086
+ $files{"$subdir/$filename"}=1;
2087
+ $files{$tempdir."/"."$subdir/$filename"}=1;
2088
+ if (! check_i18n("$tempdir/$subdir/$filename", $size, $sha1)) {
2089
+ $bytes_to_get += $size;
2090
+ $i18n_get{"$subdir/$filename"}{sha1} = $sha1;
2091
+ $i18n_get{"$subdir/$filename"}{size} = $size;
2092
+ }
2093
+ }
2094
+ close INDEX;
2095
+ }
2096
+ }
2097
+
2098
+ sub get_i18n_files {
2099
+ say("Get Translation files.");
2100
+ foreach my $file (sort keys %i18n_get) {
2101
+ if (! check_i18n("$tempdir/$file", $i18n_get{$file}{size}, $i18n_get{$file}{sha1})) {
2102
+ remote_get("$file");
2103
+ }
2104
+ }
2105
+ }
2106
+
2107
+ sub fetch_and_apply_diffs {
2108
+ my ($fetch_only, $subdir, $type) = @_;
2109
+ local (*INDEX, *FILE);
2110
+ my (%history_sha1, %history_size, %diff_sha1, %diff_size);
2111
+ my ($current_sha1, $current_size, $sha1, $size, $file, $digest, $ret);
2112
+ my $t = $num_errors;
2113
+
2114
+ # Parse DiffIndex file
2115
+ open(INDEX, "$tempdir/$subdir/$type.diff/Index") or die "$tempdir/$subdir/$type.diff/Index: $!";
2116
+ $_ = <INDEX>;
2117
+ while (defined($_)) {
2118
+ if (m/^SHA1-Current:/m) {
2119
+ ($current_sha1, $current_size) = m/^SHA1-Current:\s+([A-Za-z0-9]+)\s+(\d+)/m;
2120
+ $_ = <INDEX>;
2121
+ }
2122
+ elsif (m/^SHA1-History:/m) {
2123
+ while (defined($_ = <INDEX>)) {
2124
+ last if (!m/^\s/m);
2125
+ ($sha1, $size, $file) = m/^\s+([A-Za-z0-9]+)\s+(\d+)\s+(.*)/m;
2126
+ $history_sha1{$file} = $sha1;
2127
+ $history_size{$file} = $size;
2128
+ }
2129
+ }
2130
+ elsif (m/^SHA1-Patches:/m) {
2131
+ while (defined($_ = <INDEX>)) {
2132
+ last if (!m/^\s/m);
2133
+ ($sha1, $size, $file) = m/^\s+([A-Za-z0-9]+)\s+(\d+)\s+(.*)/m;
2134
+ $diff_sha1{$file} = $sha1;
2135
+ $diff_size{$file} = $size;
2136
+ }
2137
+ }
2138
+ }
2139
+ close(INDEX);
2140
+
2141
+ # Download diff files as necessary
2142
+ $ret = 1;
2143
+ foreach $file (sort keys %diff_sha1) {
2144
+ if (!check_diff("$tempdir/$subdir/$type.diff/$file", $diff_size{$file}, $diff_sha1{$file})) {
2145
+ say("$subdir/$type.diff/$file.gz needs fetch");
2146
+ remote_get("$subdir/$type.diff/$file.gz");
2147
+ #FIXME: before download
2148
+ if (-f "$tempdir/$subdir/$type.diff/$file.gz") {
2149
+ $bytes_to_get += -s "$tempdir/$subdir/$type.diff/$file.gz";
2150
+ }
2151
+ if (!check_diff("$tempdir/$subdir/$type.diff/$file", $diff_size{$file}, $diff_sha1{$file})) {
2152
+ say("$subdir/$type.diff/$file.gz failed sha1sum check, removing");
2153
+ push (@errlog,"$subdir/$type.diff/$file.gz failed sha1sum check, removing\n");
2154
+ unlink "$tempdir/$subdir/$type.diff/$file.gz";
2155
+ $ret = 0;
2156
+ }
2157
+ }
2158
+ $files{"$subdir/$type.diff/$file.gz"}=1 if ($diff_mode eq "mirror");
2159
+ $files{"$tempdir/$subdir/$type.diff/$file.gz"}=1;
2160
+ }
2161
+ $num_errors = $t if ($ignore_small_errors);
2162
+ return if ($fetch_only || ! $ret);
2163
+
2164
+ # Apply diff files
2165
+ open(FILE, "$tempdir/$subdir/$type") or return;
2166
+ $digest = Digest::SHA1->new;
2167
+ $digest->addfile(*FILE);
2168
+ $sha1 = $digest->hexdigest;
2169
+ $size = -s "$tempdir/$subdir/$type";
2170
+ foreach $file (sort keys %history_sha1) {
2171
+ next unless ($sha1 eq $history_sha1{$file} && $size eq $history_size{$file});
2172
+ if (system("gzip -d < \"$tempdir/$subdir/$type.diff/$file.gz\" | patch --ed \"$tempdir/$subdir/$type\"")) {
2173
+ say("Patch $file failed, will fetch $subdir/$type file");
2174
+ unlink "$tempdir/$subdir/$type";
2175
+ return;
2176
+ }
2177
+ open(FILE, "$tempdir/$subdir/$type") or return;
2178
+ $digest = Digest::SHA1->new;
2179
+ $digest->addfile(*FILE);
2180
+ $sha1 = $digest->hexdigest;
2181
+ $size = -s "$tempdir/$subdir/$type";
2182
+ say("$subdir/$type patched with $subdir/$type.diff/$file.gz");
2183
+ }
2184
+ if (!($sha1 eq $current_sha1 && $size eq $current_size)) {
2185
+ say("$subdir/$type failed sha1sum check, removing");
2186
+ push (@errlog,"$subdir/$type failed sha1sum check, removing\n");
2187
+ unlink "$tempdir/$subdir/$type";
2188
+ }
2189
+ }
2190
+
2191
+ # Make a directory including all needed parents.
2192
+ {
2193
+ my %seen;
2194
+
2195
+ sub make_dir {
2196
+ my $dir=shift;
2197
+
2198
+ my @parts=split('/', $dir);
2199
+ my $current='';
2200
+ foreach my $part (@parts) {
2201
+ $current.="$part/";
2202
+ if (! $seen{$current}) {
2203
+ if (! -d $current) {
2204
+ mkdir ($current, 0755) or die "mkdir failed: $!";
2205
+ debug("Created directory: $current");
2206
+ }
2207
+ $seen{$current}=1;
2208
+ }
2209
+ }
2210
+ }
2211
+ }
2212
+
2213
+ # Mirror cleanup for unknown files that cannot be found in Packages files.
2214
+ # This subroutine is called on pre- and post-cleanup and takes no arguments.
2215
+ # It uses some global variables like $files, $mirrordir, @ignores
2216
+ sub cleanup_unknown_files {
2217
+ print("Cleanup mirror") if ($verbose or $progress);
2218
+ if ($use_cache) {
2219
+ say(": using cache.");
2220
+ foreach my $file (sort keys %files) {
2221
+ next if (@di_dists && $file =~ m:installer-\w+/current/images/:);
2222
+ if ($files{$file} == 2 && -f $file) {
2223
+ say("deleting $file") if ($verbose);
2224
+ if (! $dry_run) {
2225
+ unlink $file or die "unlink $file: $!";
2226
+ }
2227
+ }
2228
+ }
2229
+ } else {
2230
+ say($state_cache_days ? ": full." : ".");
2231
+ chdir($mirrordir) or die "chdir $mirrordir: $!";
2232
+ my $ignore;
2233
+ $ignore = "(".join("|", @ignores).")" if @ignores;
2234
+ # Remove all files in the mirror that we don't know about
2235
+ foreach my $file (`find . -type f`) {
2236
+ chomp $file;
2237
+ $file=~s:^\./::;
2238
+ next if (@di_dists && $file =~ m:installer-\w+/current/images/:);
2239
+ unless ((exists $files{$file} && $files{$file} != 2) or
2240
+ (defined($ignore) && $file=~/$ignore/o)) {
2241
+ say("deleting $file") if ($verbose);
2242
+ if (! $dry_run) {
2243
+ unlink $file or die "unlink $file: $!";
2244
+ }
2245
+ }
2246
+ }
2247
+ }
2248
+ # Clean up obsolete files of D-I images.
2249
+ di_cleanup() if @di_dists;
2250
+ }
2251
+
2252
+ # FIXME: does not work
2253
+ sub get_http_size {
2254
+ my $file = shift;
2255
+ my $url = "http://${host}/${remoteroot}/${file}";
2256
+ my ($type, $size);
2257
+ ($type, $size) = $ua->head($url);
2258
+ say("$url -- $size");
2259
+ return $size;
2260
+ }
2261
+
2262
+ sub di_check_dists {
2263
+ say("Checking validity of D-I dists.");
2264
+ DI_DIST:
2265
+ for my $di_dist (@di_dists) {
2266
+ if (exists $distset{$di_dist}) {
2267
+ # Valid dist and also mirroring the archive itself
2268
+ $distset{$di_dist}{"d-i"} = 1;
2269
+ } else {
2270
+ foreach my $dist (keys %distset) {
2271
+ my ($dist_raw, $dist_sdir) = split_dist($dist);
2272
+ if ($di_dist eq $distset{$dist_raw}{suite}) {
2273
+ # Suite specified, use codename instead
2274
+ $distset{"$dist_raw$dist_sdir"}{"d-i"} = 1;
2275
+ next DI_DIST;
2276
+ }
2277
+ }
2278
+ # Only mirroring D-I images, not the archive itself
2279
+ my $tdir="$tempdir/.tmp/dists/$di_dist";
2280
+ next unless (get_release($tdir, $di_dist) || $ignore_release);
2281
+ name_release("d-i", $tdir, $di_dist);
2282
+ unlink "$tdir/Release";
2283
+ unlink "$tdir/Release.gpg";
2284
+ }
2285
+ }
2286
+ }
2287
+
2288
+ sub di_add_files {
2289
+ my $tdir = "$tempdir/d-i";
2290
+ my $exclude = "(".join("|", @excludes).")" if @excludes;
2291
+ my $include = "(".join("|", @includes).")" if @includes;
2292
+
2293
+ foreach my $dist (keys %distset) {
2294
+ next unless exists $distset{$dist}{"d-i"};
2295
+ foreach my $arch (@di_arches) {
2296
+ next if $arch =~ /kfreebsd-/;
2297
+
2298
+ my $image_dir = "dists/$dist/main/installer-$arch/current/images";
2299
+ make_dir ("$tdir/$image_dir");
2300
+ if (!remote_get("$image_dir/MD5SUMS", $tdir)) {
2301
+ say("Failed to download $image_dir/MD5SUMS; skipping.");
2302
+ return;
2303
+ }
2304
+ if (-f "$tdir/$image_dir/MD5SUMS") {
2305
+ $bytes_to_get += -s _; # As we did not have the size earlier
2306
+ }
2307
+
2308
+ local $/;
2309
+ undef $/; # Read whole file
2310
+ open(FILE, "<", "$tdir/$image_dir/MD5SUMS") or die "$tdir/$image_dir/MD5SUMS: $!";
2311
+ $_ = <FILE>;
2312
+ while (m/^([A-Za-z0-9]{32} .*)/mg) {
2313
+ my ($md5sum, $filename) = split(' ', $1, 3);
2314
+ $filename =~ s:^\./::;
2315
+ if(!(defined($include) && ($image_dir."/".$filename)=~/$include/o)) {
2316
+ next if (defined($exclude) && ($image_dir."/".$filename)=~/$exclude/o);
2317
+ }
2318
+
2319
+ $di_files{$image_dir}{$filename}{md5sum} = $md5sum;
2320
+ #$di_files{$image_dir}{$filename}{size} = get_http_size("$image_dir/$filename");
2321
+
2322
+ # Check against the version currently on the mirror
2323
+ if (check_file("$image_dir/$filename", -1, $md5sum)) {
2324
+ $di_files{$image_dir}{$filename}{status} = 1;
2325
+ } else {
2326
+ $di_files{$image_dir}{$filename}{status} = 0;
2327
+ }
2328
+ }
2329
+ close(FILE);
2330
+ }
2331
+ }
2332
+ }
2333
+
2334
+ # ToDo: for rsync maybe it would make sense to sync the images directly
2335
+ # into place, the whole $image_dir at a time.
2336
+ sub di_get_files {
2337
+ say("Getting Debian Installer images.");
2338
+ my $tdir = "$tempdir/d-i";
2339
+
2340
+ foreach my $image_dir (sort keys %di_files) {
2341
+ my $lres = 1;
2342
+ foreach my $file (sort keys %{ $di_files{$image_dir} }) {
2343
+ next unless $di_files{$image_dir}{$file}{status} == 0;
2344
+ # Fetch images into a temporary location
2345
+ $file =~ m:(^.*)/:;
2346
+ make_dir ("$tdir/$image_dir/$1") if $1;
2347
+ if (!remote_get("$image_dir/$file", $tdir) ||
2348
+ !check_file("$tdir/$image_dir/$file", -1, $di_files{$image_dir}{$file}{md5sum})) {
2349
+ $lres = 0;
2350
+ last if (! $dry_run);
2351
+ }
2352
+ if (-f "$tdir/$image_dir/$file") {
2353
+ $bytes_to_get += -s _; # As we did not have the size in add_di_files()
2354
+ }
2355
+ }
2356
+
2357
+ # Move images in place on mirror
2358
+ if ($lres && ! $dry_run) {
2359
+ foreach my $file (sort keys %{ $di_files{$image_dir} }) {
2360
+ next unless $di_files{$image_dir}{$file}{status} == 0;
2361
+ $file =~ m:(^.*)/:;
2362
+ make_dir ("$image_dir/$1") if $1;
2363
+ if (-f "$image_dir/$file") {
2364
+ unlink "$image_dir/$file";
2365
+ }
2366
+ link("$tdir/$image_dir/$file", "$image_dir/$file");
2367
+ }
2368
+ # Move the MD5SUMS file in place on mirror
2369
+ link("$tdir/$image_dir/MD5SUMS", "$image_dir/MD5SUMS");
2370
+ } elsif (! $dry_run) {
2371
+ say("Failed to download some files in $image_dir; not updating images.");
2372
+ }
2373
+ }
2374
+ }
2375
+
2376
+ sub di_cleanup {
2377
+ # Clean up obsolete files
2378
+ foreach my $image_dir (`find dists/ -type d -name images`) {
2379
+ next unless $image_dir =~ m:/installer-\w+/current/images$:;
2380
+ chomp $image_dir;
2381
+ chdir("$image_dir") or die "unable to chdir($image_dir): $!\n";
2382
+ foreach my $file (`find . -type f`) {
2383
+ chomp $file;
2384
+ $file=~s:^\./::;
2385
+ if (! exists $di_files{$image_dir} || ! exists $di_files{$image_dir}{$file}) {
2386
+ next if (exists $di_files{$image_dir} && $file eq "MD5SUMS");
2387
+ say("deleting $image_dir/$file") if ($verbose);
2388
+ if (! $dry_run) {
2389
+ unlink "$file" or die "unlink $image_dir/$file: $!\n";
2390
+ }
2391
+ }
2392
+ }
2393
+ chdir("$mirrordir") or die "unable to chdir($tempdir): $!\n";
2394
+ }
2395
+ # Clean up temporary D-I files (silently)
2396
+ if (-d "$tempdir/d-i") {
2397
+ chdir("$tempdir/d-i") or die "unable to chdir($tempdir/d-i): $!\n";
2398
+ foreach my $file (`find . -type f`) {
2399
+ chomp $file;
2400
+ $file=~s:^\./::;
2401
+ unlink "$file" or die "unlink $tempdir/d-i/$file: $!\n";
2402
+ }
2403
+ chdir("$mirrordir") or die "unable to chdir($mirrordir): $!\n";
2404
+ }
2405
+ }
2406
+
2407
+ sub download_finished {
2408
+ if ($download_method eq 'ftp') { $ftp->quit; }
2409
+
2410
+ my $total_time = time - $start_time;
2411
+ if ($download_method eq 'rsync' || $bytes_gotten == 0) {
2412
+ say("Download completed in ".$total_time."s.");
2413
+ } else {
2414
+ my $avg_speed = 0;
2415
+ $avg_speed = sprintf("%3.0f",($bytes_gotten / $total_time)) unless ($total_time == 0);
2416
+ say("Downloaded ".print_dl_size($bytes_gotten)." in ".$total_time."s at ".(int($avg_speed/1024*100)/100)." kiB/s.");
2417
+ }
2418
+ }
2419
+
2420
+ sub rename_distdir {
2421
+ my ($dir, $codename, $suite) = @_;
2422
+ say("The directory for a dist should be its codename, not a suite.");
2423
+ if (!$allow_dist_rename) {
2424
+ die("Use --allow-dist-rename to have debmirror do the conversion automatically.\n");
2425
+ }
2426
+ say("Starting conversion - renaming '$dir/$suite' to '$dir/$codename':");
2427
+ if (-l "$dir/$codename") {
2428
+ say(" removing symlink '$dir/$codename'; a new symlink for the suite will be created later");
2429
+ unlink "$dir/$codename";
2430
+ }
2431
+ if (-d "$dir/$codename") {
2432
+ die("Directory '$dir/$codename' already exists; aborting conversion.\n");
2433
+ }
2434
+ rename("$dir/$suite", "$dir/$codename");
2435
+ say(" conversion completed successfully");
2436
+ }
2437
+
2438
+ sub save_state_cache {
2439
+ my $cache_file = "$tempdir/debmirror_state.cache";
2440
+ say("Saving debmirror state cache.");
2441
+ foreach my $file (keys %files) {
2442
+ if ($files{$file} == 2) {
2443
+ delete $files{$file};
2444
+ } elsif ($files{$file} >= 0){
2445
+ $files{$file} = 2;
2446
+ }
2447
+ }
2448
+ # Add state cache meta data
2449
+ my $now = time();
2450
+ $files{cache_version} = $files_cache_version;
2451
+ if (! $state_cache_exptime) {
2452
+ $state_cache_exptime = $now + $state_cache_days * 24 * 60 * 60;
2453
+ }
2454
+ $files{cache_expiration_time} = $state_cache_exptime;
2455
+ if (! nstore(\%files, $cache_file)) {
2456
+ say("Failed to save state cache.");
2457
+ unlink $cache_file if -f $cache_file;
2458
+ } else {
2459
+ my $expires = int(($state_cache_exptime - $now) / (60 * 60)); # hours
2460
+ if ($expires > 0) {
2461
+ my $days = int($expires / 24);
2462
+ my $hours = $expires % 24;
2463
+ say("State cache will expire in " .
2464
+ ($days ? "$days day(s)" : ($hours ? "" : "the next hour")) .
2465
+ ($hours ? ($days ? " and " : "") . "$hours hour(s)" : "") . ".");
2466
+ } else {
2467
+ say("State cache expired during this run; next run will not use cache.");
2468
+ }
2469
+ }
2470
+ }
2471
+
2472
+ sub load_state_cache {
2473
+ my $cache_file = "$tempdir/debmirror_state.cache";
2474
+ if (! -f $cache_file) {
2475
+ say("State cache file does not exist; doing full mirroring.");
2476
+ return;
2477
+ }
2478
+
2479
+ my $rfiles;
2480
+ say("Loading debmirror state cache.");
2481
+ $rfiles = retrieve($cache_file);
2482
+ if (! defined $rfiles) {
2483
+ say("Failed to load state cache; doing full mirror check.");
2484
+ return
2485
+ }
2486
+ if (! exists $$rfiles{cache_version}) {
2487
+ say("Cache version missing in state cache; doing full mirroring.");
2488
+ return
2489
+ } elsif ($$rfiles{cache_version} ne $files_cache_version) {
2490
+ say("State cache is incompatible with this version of debmirror; doing full mirror check.");
2491
+ return
2492
+ } else {
2493
+ delete $$rfiles{cache_version};
2494
+ }
2495
+ if (! exists $$rfiles{cache_expiration_time}) {
2496
+ say("Expiration time missing in state cache; doing full mirror check.");
2497
+ return
2498
+ } elsif ($$rfiles{cache_expiration_time} < time()) {
2499
+ say("State cache has expired; doing full mirror check.");
2500
+ return
2501
+ } else {
2502
+ $state_cache_exptime = $$rfiles{cache_expiration_time};
2503
+ delete $$rfiles{cache_expiration_time};
2504
+ }
2505
+
2506
+ say("State cache loaded successfully; will use cache.");
2507
+ %files = %$rfiles;
2508
+ $use_cache = 1;
2509
+ # Preserve state cache during dry runs
2510
+ if ($dry_run_var) {
2511
+ $files{$cache_file} = 1;
2512
+ } else {
2513
+ unlink $cache_file if -f $cache_file;
2514
+ }
2515
+ }
2516
+
2517
+ sub say {
2518
+ print join(' ', @_)."\n" if ($verbose or $progress);
2519
+ }
2520
+
2521
+ sub debug {
2522
+ print $0.': '.join(' ', @_)."\n" if $debug;
2523
+ }
2524
+
2525
+ =head1 COPYRIGHT
2526
+
2527
+ This program is copyright 2001 by Joey Hess <joeyh@debian.org>, under
2528
+ the terms of the GNU GPL (either version 2 of the licence or, at your
2529
+ option, any later version), copyright 2001-2002 by Joerg Wendland
2530
+ <joergland@debian.org>, copyright 2003-2007 by Goswin von Brederlow
2531
+ <goswin-v-b@web.de> and copyright 2009 by Frans Pop <fjp@debian.org>.
2532
+
2533
+ The author disclaims any responsibility for any mangling of your system,
2534
+ unexpected bandwidth usage bills, meltdown of the Debian mirror network,
2535
+ etc, that this script may cause. See NO WARRANTY section of GPL.
2536
+
2537
+ =head1 AUTHOR
2538
+
2539
+ Current maintainer:
2540
+ Frans Pop <fjp@debian.org>
2541
+
2542
+ Previous authors:
2543
+ Joey Hess <joeyh@debian.org> (original author)
2544
+ Joerg Wendland <joergland@debian.org>
2545
+ Goswin von Brederlow <goswin-v-b@web.de>
2546
+
2547
+ =head1 MOTTO
2548
+
2549
+ Waste bandwith -- put a partial mirror on your laptop today!
2550
+
2551
+ =cut