exiftool-vendored.pl 12.80.0 → 12.84.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. package/bin/Changes +81 -0
  2. package/bin/MANIFEST +6 -18
  3. package/bin/META.json +1 -1
  4. package/bin/META.yml +1 -1
  5. package/bin/README +4 -2
  6. package/bin/build_geolocation +872 -0
  7. package/bin/config_files/example.config +2 -2
  8. package/bin/exiftool +61 -17
  9. package/bin/fmt_files/gpx.fmt +2 -1
  10. package/bin/fmt_files/gpx_wpt.fmt +2 -1
  11. package/bin/lib/Image/ExifTool/Apple.pm +51 -7
  12. package/bin/lib/Image/ExifTool/BuildTagLookup.pm +47 -31
  13. package/bin/lib/Image/ExifTool/CanonVRD.pm +19 -6
  14. package/bin/lib/Image/ExifTool/DJI.pm +29 -0
  15. package/bin/lib/Image/ExifTool/Exif.pm +19 -2
  16. package/bin/lib/Image/ExifTool/FujiFilm.pm +20 -7
  17. package/bin/lib/Image/ExifTool/GM.pm +552 -0
  18. package/bin/lib/Image/ExifTool/Geolocation.dat +0 -0
  19. package/bin/lib/Image/ExifTool/Geolocation.pm +423 -178
  20. package/bin/lib/Image/ExifTool/Geotag.pm +26 -13
  21. package/bin/lib/Image/ExifTool/M2TS.pm +32 -4
  22. package/bin/lib/Image/ExifTool/MakerNotes.pm +2 -2
  23. package/bin/lib/Image/ExifTool/Microsoft.pm +1 -1
  24. package/bin/lib/Image/ExifTool/Nikon.pm +337 -27
  25. package/bin/lib/Image/ExifTool/NikonCustom.pm +55 -1
  26. package/bin/lib/Image/ExifTool/Olympus.pm +1 -0
  27. package/bin/lib/Image/ExifTool/OpenEXR.pm +21 -3
  28. package/bin/lib/Image/ExifTool/PNG.pm +3 -3
  29. package/bin/lib/Image/ExifTool/QuickTime.pm +45 -24
  30. package/bin/lib/Image/ExifTool/QuickTimeStream.pl +66 -30
  31. package/bin/lib/Image/ExifTool/README +2 -0
  32. package/bin/lib/Image/ExifTool/Sony.pm +16 -7
  33. package/bin/lib/Image/ExifTool/TagLookup.pm +4827 -4778
  34. package/bin/lib/Image/ExifTool/TagNames.pod +953 -620
  35. package/bin/lib/Image/ExifTool/WriteQuickTime.pl +32 -9
  36. package/bin/lib/Image/ExifTool/Writer.pl +169 -130
  37. package/bin/lib/Image/ExifTool/XMP.pm +4 -2
  38. package/bin/lib/Image/ExifTool/XMP2.pl +3 -0
  39. package/bin/lib/Image/ExifTool.pm +106 -48
  40. package/bin/lib/Image/ExifTool.pod +47 -25
  41. package/bin/perl-Image-ExifTool.spec +1 -1
  42. package/bin/pp_build_exe.args +4 -4
  43. package/package.json +3 -3
@@ -0,0 +1,872 @@
1
+ #!/usr/bin/perl -w
2
+ #-------------------------------------------------------------------------------
3
+ # File: build_geolocation
4
+ #
5
+ # Description: Parse geonames files to create ExifTool geolocation database
6
+ #
7
+ # Syntax: build_geolocation [OPTIONS] [DBFILE] ...
8
+ #
9
+ # Options: (see -h output)
10
+ #
11
+ # Created: 2024-03-03 - P. Harvey
12
+ # 2024-04-15 - PH Clean up and add options for public release
13
+ # 2024-04-22 - PH Increased number of possible feature codes from
14
+ # 32 to 64. Convert backslashes in directory names
15
+ #
16
+ # Notes: Requires these files from https://download.geonames.org/export/
17
+ #
18
+ # allCountries.txt (or other input database if specified)
19
+ # countryInfo.txt
20
+ # admin1CodesASCII.txt
21
+ # admin2Codes.txt
22
+ # alternateNamesV2.txt (optional)
23
+ #
24
+ # Output datbase format (Geolocation.dat):
25
+ #
26
+ # Header:
27
+ # "GeolocationV.VV\tNNNN\n" (V.VV=version, NNNN=num city entries)
28
+ # "# <comment>\n"
29
+ # NNNN City entries:
30
+ # Offset Format Description
31
+ # 0 int16u - latitude high 16 bits (converted to 0-0x100000 range)
32
+ # 2 int8u - latitude low 4 bits, longitude low 4 bits
33
+ # 3 int16u - longitude high 16 bits
34
+ # 5 int8u - index of country in country list
35
+ # 6 int8u - 0xf0 = population E exponent (in format "N.Fe+0E"), 0x0f = population N digit
36
+ # 7 int16u - 0xf000 = population F digit, 0x0fff = index in region list (admin1)
37
+ # 9 int16u - v1.02: 0x7fff = index in subregion (admin2), 0x8000 = high bit of time zone
38
+ # 9 int16u - v1.03: index in subregion (admin2)
39
+ # 11 int8u - low byte of time zone index
40
+ # 12 int8u - 0x3f = feature code index (see below), v1.03: 0x80 = high bit of time zone
41
+ # 13 string - UTF8 City name, terminated by newline
42
+ # "\0\0\0\0\x01"
43
+ # Country entries:
44
+ # 1. 2-character country code
45
+ # 2. Country name, terminated by newline
46
+ # "\0\0\0\0\x02"
47
+ # Region entries:
48
+ # 1. Region name, terminated by newline
49
+ # "\0\0\0\0\x03"
50
+ # Subregion entries:
51
+ # 1. Subregion name, terminated by newline
52
+ # "\0\0\0\0\x04"
53
+ # Time zone entries:
54
+ # 1. Time zone name, terminated by newline
55
+ # "\0\0\0\0\x05" (feature codes added in v1.03)
56
+ # Feature codes:
57
+ # 1. Feature code, terminated by newline
58
+ # "\0\0\0\0\0"
59
+ #
60
+ # Feature Codes v1.02: (see http://www.geonames.org/export/codes.html#P for descriptions)
61
+ #
62
+ # 0. Other 3. PPLA2 6. PPLA5 9. PPLF 12. PPLR 15. PPLX
63
+ # 1. PPL 4. PPLA3 7. PPLC 10. PPLG 13. PPLS
64
+ # 2. PPLA 5. PPLA4 8. PPLCH 11. PPLL 14. STLMT
65
+ #
66
+ # Feature Codes v1.03 and later are listed at the end of the database
67
+ #-------------------------------------------------------------------------------
68
+
69
+ use strict;
70
+
71
+ my $dbVer = '1.02'; # database version
72
+
73
+ my $dbFile = 'allCountries.txt'; # default database file
74
+ my $countryFile = 'countryInfo.txt'; # mandatory country names file
75
+ my $regionFile = 'admin1CodesASCII.txt'; # mandatory region names file
76
+ my $admin2File = 'admin2Codes.txt'; # mandatory subregion names file
77
+ my $altNamesFile = 'alternateNamesV2.txt'; # optional alternate names file
78
+ my $outFile = 'Geolocation.dat'; # output ExifTool database file
79
+ my $outAltNames = 'AltNames.dat'; # output alternate names file
80
+ my $outDirName = 'Geolocation_out'; # output directory for database files
81
+ my $geoLang = 'GeoLang'; # output directory for language files
82
+
83
+ my %defaults = (
84
+ file => $dbFile,
85
+ minpop => 2000,
86
+ def_codes => 'PPLA,PPLA2',
87
+ def_codesp => 'PPL,PPLA,PPLA2,PPLA3,PPLA4,PPLA5,PPLC,PPLCH,PPLF,PPLG,PPLH,PPLL,PPLQ,PPLR,PPLS,PPLW,PPLX,STLMT',
88
+ );
89
+
90
+ # languages to read from geonames database
91
+ my @languages = qw(cs de en en-ca en-gb es fi fr it ja ko nl pl ru sk sv tr zh zh-cn zh-tw);
92
+
93
+ # indices of feature codes (v1.02 is hard-coded in ExifTool)
94
+ my @fc102 = qw(
95
+ Other PPL PPLA PPLA2 PPLA3 PPLA4 PPLA5 PPLC PPLCH
96
+ PPLF PPLG PPLL PPLR PPLS STLMT PPLX
97
+ );
98
+ # base features for v1.03+
99
+ my @fc103 = qw(
100
+ Other PPL PPLA PPLA2 PPLA3 PPLA4 PPLA5 PPLC PPLCH
101
+ PPLF PPLG PPLH PPLL PPLQ PPLR PPLS PPLW PPLX STLMT
102
+ );
103
+ my $i = 0;
104
+ my @featureCodes = $dbVer eq '1.02' ? @fc102 : @fc103;
105
+ my %featureCodes = map { $_ => $i++ } @featureCodes;
106
+
107
+ my ($dbfile, @dbfiles, $outDir, $verbose, $noLang, %needRgn);
108
+ my %optArgs = ( p => 1, c => 1, cp => 1, l => 1, o => 1, ver => 1 );
109
+
110
+ # process command-line arguments
111
+ my $opts = { };
112
+ while (@ARGV) {
113
+ my $opt = shift;
114
+ if (not $opt =~ s/^-//) {
115
+ $opt = '.' unless length $opt;
116
+ $opt =~ tr(\\)(/); # use forward slashes
117
+ $opt =~ s(/$)(); # remove trailing slash
118
+ $opt = "$opt/$defaults{file}" if -d $opt;
119
+ -e $opt or die "Error opening database $opt\n";
120
+ push @dbfiles, { %defaults, %$opts, file => $opt };
121
+ $opts = { };
122
+ next;
123
+ }
124
+ my $arg;
125
+ if ($optArgs{$opt}) {
126
+ $arg = shift;
127
+ defined $arg or die "Expecting argument for -$opt option\n";
128
+ }
129
+ if ($opt eq 'p') {
130
+ $arg = uc $arg;
131
+ if ($arg =~ /=/) {
132
+ my ($cc, $mp) = split /=/, $arg;
133
+ $mp =~ /^\d+$/ or die "Expecting number on rhs of '=' for -p option\n";
134
+ my @cc = split /,/, $cc;
135
+ foreach $cc (@cc) {
136
+ $cc =~ /^([A-Z]{2})(\..+)?$/ or die "Invalid country/region '$cc' for -p option\n";
137
+ $needRgn{$1} = $needRgn{$cc} = 1 if length $cc > 2;
138
+ $$opts{cc_minpop}{$cc} = $mp;
139
+ }
140
+ } else {
141
+ $arg =~ /^\d+$/ or die "Expecting number for -p option\n";
142
+ $$opts{minpop} = $arg;
143
+ }
144
+ } elsif ($opt =~ /^c(p?)$/) {
145
+ my $p = $1;
146
+ $arg = uc $arg;
147
+ my ($cc, $co);
148
+ if ($arg =~ /=/) {
149
+ ($cc, $co) = split /=/, $arg;
150
+ } else {
151
+ ($cc, $co) = ('??', $arg);
152
+ }
153
+ my $sign = $co =~ s/^([-+])// ? $1 : '';
154
+ my @co = split /,/, $co;
155
+ my @cc = split /,/, $cc;
156
+ # store lookup for features to keep for each country ('??' = any country)
157
+ foreach $cc (@cc) {
158
+ $cc =~ /^([A-Z]{2}|\?\?)(\..+)?$/ or die "Invalid country/region '$cc' for -$opt option\n";
159
+ $needRgn{$1} = $needRgn{$cc} = 1 if length $cc > 2;
160
+ if (not $sign) {
161
+ $$opts{"keep$p"}{$cc} = { };
162
+ } elsif (not $$opts{"keep$p"}{$cc}) {
163
+ # start from defaults
164
+ my %codes = map { $_ => 1 } split /,/, $defaults{"def_codes$p"};
165
+ $$opts{"keep$p"}{$cc} = \%codes;
166
+ }
167
+ foreach $co (@co) {
168
+ if ($sign eq '-') {
169
+ delete $$opts{"keep$p"}{$cc}{$co};
170
+ } else {
171
+ $$opts{"keep$p"}{$cc}{$co} = 1;
172
+ }
173
+ }
174
+ }
175
+ } elsif ($opt eq 'l') {
176
+ $arg = lc $arg;
177
+ my @langs = split ',', $arg;
178
+ if (not @langs) {
179
+ undef @languages;
180
+ $noLang = 1;
181
+ } elsif ($langs[0] =~ s/^-//) {
182
+ @languages = grep !/^$langs[0]$/, @languages foreach @langs;
183
+ } else {
184
+ @languages = @langs;
185
+ }
186
+ } elsif ($opt eq 'o') {
187
+ $outDir = $arg;
188
+ } elsif ($opt eq 'ver') {
189
+ $dbVer = $arg;
190
+ $dbVer =~ /^1\.0[23]$/ or die "Unsupported version number $dbVer\n";
191
+ } elsif ($opt eq 'v') {
192
+ $verbose = 1;
193
+ } elsif ($opt eq 'h') {
194
+ my $defcp = $defaults{def_codesp};
195
+ $defcp =~ s/(PPLG,)/\n $1/;
196
+ my $defLang = join ',', @languages;
197
+ $defLang =~ s/(ja,)/\n $1/;
198
+ print <<"END";
199
+ Description: Build ExifTool Geolocation database.
200
+
201
+ Syntax: build_geolocation [OPTIONS] [DBFILE] ...
202
+
203
+ Options:
204
+ DBFILE - Input database file name or directory. Multiple input database
205
+ files may be specified. The -p, -c and -cp options apply to
206
+ the database that comes after them on the command line.
207
+ Default is "$dbFile".
208
+ -p POP - Minimum population for cities to include. POP may be a number
209
+ or be of the form "CC[,C2...]=###" to set different limits for
210
+ specific countries/regions, where CC and C2 are country codes
211
+ with optional region name or code appended after a period (eg.
212
+ "CA.Ontario,US=500" sets the minimum population to 500 for
213
+ cities on Ontario Canada or the U.S.). If a region is
214
+ specified, either the full name or the geonames admin1 code may
215
+ be used, and case and spaces are not significant. May be
216
+ multiple -p options for each input DBFILE. Default is "$defaults{minpop}".
217
+ -c CODE - Feature codes to always include, regardless of population. CODE
218
+ is a comma-separated list of feature codes, with an optional
219
+ leading comma-separated list of country/region codes followed
220
+ by an equals sign to apply only to specific countries. The
221
+ feature-code list may begin with a dash to remove entries from
222
+ the default list, or a plus sign to add entries. May be
223
+ multiple -c options for each intput DBFILE. Country/region and
224
+ feature names are case insensitive. Default is "$defaults{def_codes}".
225
+ -cp CODE - Additional features to include if above minimum population.
226
+ Default is "$defcp".
227
+ -l LANG - Alternate languages to read from $altNamesFile if
228
+ available. These are used to generate $outAltNames an the
229
+ $geoLang files. LANG is a comma-separated list of language
230
+ codes, starting with a dash to remove items from the default
231
+ list. May be set to an empty string to disable generation
232
+ of alternate language files even if $altNamesFile
233
+ exists. The same set of languages applies to all input
234
+ database files. Default is "$defLang".
235
+ -o OUTDIR - Output directory name. Default is the same directory as the
236
+ first input database file. A directory named $outDirName
237
+ containing the output files will be created in this directory.
238
+ -ver VER - Version for output geolocation database (default is $dbVer).
239
+ -v - Verbose messages.
240
+ -h - Show this help.
241
+
242
+ Input files (download from https://download.geonames.org/export/dump/):
243
+ allCountries.txt - default database file (smaller files with names
244
+ like "cities###.txt" may be specified instead)
245
+ countryInfo.txt - mandatory country names file
246
+ admin1CodesASCII.txt - mandatory region names file
247
+ admin2Codes.txt - mandatory subregion names file
248
+ alternateNamesV2.txt - optional alternate names file (must exist to
249
+ to generate $outAltNames and $geoLang files)
250
+
251
+ Output files:
252
+ $outDirName - default output directory name
253
+ $outFile - ExifTool database file
254
+ $outAltNames - alternate names file
255
+ $geoLang - directory for alternate language files
256
+
257
+ Author:
258
+ Copyright 2024, Phil Harvey
259
+
260
+ This is free software; you can redistribute it and/or modify it under the
261
+ same terms as Perl itself.
262
+ END
263
+ exit 0;
264
+ } else {
265
+ die "Unknown option '-$opt'\n";
266
+ }
267
+ }
268
+
269
+ if (@dbfiles) {
270
+ # apply any remaining options to last database file
271
+ $dbfiles[-1]{$_} = $$opts{$_} foreach keys %$opts;
272
+ } else {
273
+ # use default database file if none specified
274
+ push @dbfiles, { %defaults, %$opts };
275
+ unless (-e $dbfiles[0]{file}) {
276
+ # also look in script directory
277
+ if ($0 =~ m{(.*)/} and -e "$1/$dbfiles[0]{file}") {
278
+ $dbfiles[0]{file} = "$1/$dbfiles[0]{file}";
279
+ } else {
280
+ die qq(Database "$dbfiles[0]{file}" not found. Use -h option for help.\n);
281
+ }
282
+ }
283
+ }
284
+
285
+ # determine our working directory
286
+ my $dbdir = $dbfiles[0]{file};
287
+ $dbdir = '.' unless $dbdir =~ s(/[^/]*$)();
288
+
289
+ # add default feature code lookups if necessary
290
+ foreach $dbfile (@dbfiles) {
291
+ my $p;
292
+ foreach $p ('', 'p') {
293
+ next if $$dbfile{"keep$p"}{'??'};
294
+ my %codes = map { $_ => 1 } split /,/, $defaults{"def_codes$p"};
295
+ $$dbfile{"keep$p"}{'??'} = \%codes;
296
+ }
297
+ }
298
+
299
+ # pre-read region file if necessary
300
+ if (%needRgn) {
301
+ open REGION, '<', "$dbdir/$regionFile" or die "Error opening $dbdir/$regionFile\n";
302
+ while (<REGION>) {
303
+ my @items = split /\t/;
304
+ my $rgn = $items[0];
305
+ my ($cc) = split /\./, $rgn;
306
+ next unless $needRgn{$cc};
307
+ unless ($needRgn{$rgn}) { # allow region code to be used
308
+ $rgn = $cc . '.' . uc$items[1]; # also support full region name
309
+ unless ($needRgn{$rgn}) {
310
+ $rgn =~ tr/ //d;
311
+ next unless $needRgn{$rgn}; # also allow no spaces
312
+ }
313
+ }
314
+ $needRgn{$rgn} = [$items[0], "$cc.$items[1]"];
315
+ }
316
+ close REGION;
317
+ foreach (sort keys %needRgn) {
318
+ next if length == 2;
319
+ die "No matching region for $_\n" unless ref $needRgn{$_};
320
+ }
321
+ }
322
+
323
+ if ($verbose) {
324
+ my $langs = join ',', @languages;
325
+ $langs or $langs = '<none>';
326
+ print "Languages to read from input database(s):\n $langs\n";
327
+ foreach $dbfile (@dbfiles) {
328
+ print "Parameters for reading $$dbfile{file}:\n";
329
+ print " Minimum populations (??=any country):\n";
330
+ print " ??=$$dbfile{minpop}\n";
331
+ foreach (reverse sort keys %{$$dbfile{cc_minpop}}) {
332
+ my $cc = ref $needRgn{$_} ? $needRgn{$_}[1] : $_;
333
+ print " $cc=$$dbfile{cc_minpop}{$_}\n";
334
+ }
335
+ print " Features to keep regardless of population:\n";
336
+ foreach (reverse sort keys %{$$dbfile{keep}}) {
337
+ my $cc = ref $needRgn{$_} ? $needRgn{$_}[1] : $_;
338
+ print " $cc=",join(',', sort keys %{$$dbfile{keep}{$_}}), "\n";
339
+ }
340
+ print " Features to keep for population >= minimum:\n";
341
+ foreach (reverse sort keys %{$$dbfile{keepp}}) {
342
+ my $cc = ref $needRgn{$_} ? $needRgn{$_}[1] : $_;
343
+ print " $_=",join(',', sort keys %{$$dbfile{keepp}{$_}}), "\n";
344
+ }
345
+ }
346
+ }
347
+
348
+ # translate option region arguments to region codes
349
+ foreach $dbfile (@dbfiles) {
350
+ my ($type, $cc);
351
+ foreach $type (qw(cc_minpop keep keepp)) {
352
+ my @cc = keys %{$$dbfile{$type}};
353
+ foreach $cc (@cc) {
354
+ next unless ref $needRgn{$cc};
355
+ my $tmp = $$dbfile{$type}{$cc};
356
+ delete $$dbfile{$type}{$cc};
357
+ $$dbfile{$type}{$needRgn{$cc}[0]} = $tmp;
358
+ }
359
+ }
360
+ }
361
+
362
+ $outDir = "$dbdir/$outDirName" unless defined $outDir;
363
+ -d $outDir or mkdir $outDir, 0777 or die "Error creating output directory '$outDir'\n";
364
+ -e "$dbdir/$_" or die "Missing input file $dbdir/$_\n" foreach $countryFile, $regionFile, $admin2File;
365
+
366
+ # order of country codes, region names and subregions in database
367
+ my (%orderCC, %orderRgn, %orderSub);
368
+
369
+ # languages to read from geonames database (converted to lower case)
370
+ my %languages = map { $_ => 1 } @languages;
371
+
372
+ # language codes supported by ExifTool
373
+ my @supportedLangs = qw(cs de en-ca en-gb es fi fr it ja ko nl pl ru sk sv tr zh-cn zh-tw);
374
+
375
+ # supported country-specific languages
376
+ my %ccLang = ( TW => 'zh', CN => 'zh', CA => 'en', GB => 'en' );
377
+ my (%lang, %haveCountry, %cityFlags, %rgnFlags, %subFlags, %ccFlags, %flags);
378
+ my (%haveRegion, %haveSubRgn, $filesize, $percent);
379
+
380
+ sub GetFileSize($)
381
+ {
382
+ my $file = shift;
383
+ seek $file, 0, 2 or die "Seek error\n";
384
+ my $size = tell $file;
385
+ seek $file, 0, 0 or die "Seek error\n";
386
+ return $size;
387
+ }
388
+
389
+ # pre-scan database to determine which countries, regions and subregions we will be using
390
+ foreach $dbfile (@dbfiles) {
391
+ my $database = $$dbfile{file};
392
+ my $upgraded;
393
+
394
+ print "Reading $database... 0%";
395
+ flush STDOUT;
396
+
397
+ # pre-read the files to initialize necessary variables
398
+ open INFILE, '<', $database or die "Error opening $database\n";
399
+ $filesize = GetFileSize(\*INFILE);
400
+
401
+ open OUTFILE, '>', "$outDir/$outFile" or die "Error creating $outFile in $outDir\n";
402
+ binmode(OUTFILE);
403
+
404
+ $$dbfile{kept} = [ ];
405
+ $percent = -1;
406
+ while (<INFILE>) {
407
+ my $p = int(100 * tell(INFILE) / $filesize + 0.5);
408
+ if ($percent != $p) {
409
+ printf("\b\b\b\b%3d%%", $percent = $p);
410
+ flush STDOUT;
411
+ }
412
+ my @items = split /\t/;
413
+ my ($dbnum, $code, $cc, $rgn, $sub, $pop) = @items[0,7,8,10,11,14];
414
+ next unless @items > 17 and $cc =~ /^[A-Z]{2}$/;
415
+ my ($minpop, $keep);
416
+ if ($needRgn{$cc} and defined $$dbfile{cc_minpop}{"$cc$rgn"}) {
417
+ $minpop = $$dbfile{cc_minpop}{"$cc$rgn"};
418
+ } elsif (defined $$dbfile{cc_minpop}{$cc}) {
419
+ $minpop = $$dbfile{cc_minpop}{$cc};
420
+ } else {
421
+ $minpop = $$dbfile{minpop};
422
+ }
423
+ # keep regardless of population
424
+ if ($needRgn{$cc} and $$dbfile{keep}{"$cc$rgn"}) {
425
+ $keep = $$dbfile{keep}{"$cc$rgn"}{$code};#TEST
426
+ } elsif ($$dbfile{keep}{$cc}) {
427
+ $keep = $$dbfile{keep}{$cc}{$code};
428
+ } else {
429
+ $keep = $$dbfile{keep}{'??'}{$code};
430
+ }
431
+ if ($pop < $minpop) {
432
+ next unless $keep;
433
+ } elsif ($needRgn{$cc} and $$dbfile{keepp}{"$cc$rgn"}) {
434
+ next unless $$dbfile{keepp}{"$cc$rgn"}{$code};
435
+ } elsif ($$dbfile{keepp}{$cc}) {
436
+ next unless $$dbfile{keepp}{$cc}{$code};
437
+ } else {
438
+ next unless $$dbfile{keepp}{'??'}{$code};
439
+ }
440
+ push @{$$dbfile{kept}}, $_;
441
+ $lang{$dbnum} = { alt => [ ] };
442
+ $haveCountry{$cc} = 1;
443
+ $haveRegion{"$cc$rgn"} = 1;
444
+ $haveSubRgn{"$cc$rgn.$sub"} = 1;
445
+ # add new feature codes (up to maximum index of 0x3f)
446
+ unless ($featureCodes{$code} or @featureCodes > 0x3f) {
447
+ if ($dbVer eq '1.02') {
448
+ next if $code =~ /^(PPLH|PPLQ|PPLW)$/; # (stored as "Other" in v1.02)
449
+ $dbVer = '1.03';
450
+ $upgraded = 1; # print upgrade warning
451
+ @featureCodes = @fc103;
452
+ my $i = 0;
453
+ %featureCodes = map { $_ => $i++ } @featureCodes;
454
+ next if $featureCodes{$code};
455
+ }
456
+ push @featureCodes, $code;
457
+ $featureCodes{$code} = $#featureCodes;
458
+ }
459
+ }
460
+ close INFILE;
461
+ print "\b\b\b\bDone.\n";
462
+ warn "Some feature codes not supported by version 1.02, writing as 1.03 instead.\n" if $upgraded;
463
+ }
464
+
465
+ # read country names
466
+ $i = 0;
467
+ open INFILE, '<', "$dbdir/$countryFile" or die "Error opening $dbdir/$countryFile\n";
468
+ while (<INFILE>) {
469
+ next if /^#/;
470
+ my @items = split /\t/;
471
+ next unless $haveCountry{$items[0]};
472
+ $lang{$items[16]} = { alt => [ ] }; # reference lookup by db number
473
+ $orderCC{$items[0]} = $i++; # (entry 0 is the first country)
474
+ }
475
+ close INFILE;
476
+ printf " %.6d countries (0x%.4x)\n",$i,$i if $verbose;
477
+ die "Too many countries!\n" if $i > 0x100; # (no default 0 entry)
478
+
479
+ # read region (admin1) names
480
+ $i = 0;
481
+ open REGION, '<', "$dbdir/$regionFile" or die "Error opening $dbdir/$regionFile\n";
482
+ while (<REGION>) {
483
+ chomp;
484
+ my @items = split /\t/;
485
+ $items[0] =~ tr/.//d; # (remove "." separator)
486
+ next unless $haveRegion{$items[0]};
487
+ $lang{$items[3]} = { alt => [ ] }; # reference lookup by db number
488
+ $orderRgn{$items[0]} = ++$i; # (entry 0 is default "" region)
489
+ }
490
+ close REGION;
491
+ printf " %.6d regions (0x%.4x)\n",$i,$i if $verbose;
492
+ die "Too many regions!\n" if $i > 0x0fff; # (account for default 0 entry)
493
+
494
+ # read subregion (admin2) names
495
+ $i = 0;
496
+ open ADMIN2, '<', "$dbdir/$admin2File" or die "Error opening $dbdir/$admin2File\n";
497
+ while (<ADMIN2>) {
498
+ chomp;
499
+ my @items = split /\t/;
500
+ $items[0] =~ s/\.//; # (remove first "." separator)
501
+ next unless $haveSubRgn{$items[0]};
502
+ $lang{$items[3]} = { alt => [ ] }; # reference lookup by db number
503
+ $orderSub{$items[0]} = ++$i; # (entry 0 is default "" subregion)
504
+ }
505
+ close ADMIN2;
506
+ printf " %.6d subregions (0x%.4x)\n",$i,$i if $verbose;
507
+ if ($i > ($dbVer eq '1.02' ? 0x7fff : 0xffff)) {
508
+ die "Too many subregions!\n" if $i > 0xffff;
509
+ $dbVer = '1.03';
510
+ warn "Too many subregions for version 1.02, writing as 1.03 instead.\n";
511
+ }
512
+
513
+ # read alternate names file if available
514
+ if (not $noLang and open INFILE, '<', "$dbdir/$altNamesFile") {
515
+ $filesize = GetFileSize(\*INFILE);
516
+ print "Reading $dbdir/$altNamesFile... 0%";
517
+ my %bestPri;
518
+ while (<INFILE>) {
519
+ my $p = int(100 * tell(INFILE) / $filesize + 0.5);
520
+ if ($percent != $p) {
521
+ printf("\b\b\b\b%3d%%", $percent = $p);
522
+ flush STDOUT;
523
+ }
524
+ # items: 0=altID,1=geoID,2=lang,3=alt name,4=preferred,5=short,6=colloquial,7=historic
525
+ my @items = split /\t/;
526
+ my $lkup = $lang{$items[1]} or next;
527
+ my $altList = $lang{$items[1]}{alt};
528
+ my $lng = lc $items[2];
529
+ next if $lng and not $languages{$lng};
530
+ push @$altList, $items[3] unless grep /^\Q$items[3]\E$/i, @$altList;
531
+ next unless $lng;
532
+ my $flags = 0;
533
+ # keep only the best translation for this name for each language
534
+ $items[$_] and $flags |= (1<<($_-4)) foreach 4,5,6,7;
535
+ $flags{$items[1]} = ( $flags{$items[1]} || 0 ) | $flags;
536
+ next if $items[6] or $items[7]; # ignore colloquial and historic names
537
+ my $pri = $items[5] ? 0 : ($items[4] ? 1 : 2); # priority for best type of name
538
+ my $langPri = $bestPri{$lng};
539
+ $langPri or $langPri = $bestPri{$lng} = { };
540
+ next if $$langPri{$items[1]} and $$langPri{$items[1]} > $pri;
541
+ $$langPri{$items[1]} = $pri;
542
+ # save language-specific name for this feature, removing commas
543
+ ($$lkup{$lng} = $items[3]) =~ tr/,//d;
544
+ }
545
+ print "\b\b\b\bDone.\n";
546
+ close INFILE;
547
+ } else {
548
+ print "Not writing alternate languages ($dbdir/$altNamesFile not found)\n";
549
+ $noLang = 1;
550
+ }
551
+
552
+ my (%coords, %langLookups);
553
+
554
+ foreach $dbfile (@dbfiles) {
555
+ my $database = $$dbfile{file};
556
+
557
+ print "Processing $database... 0%";
558
+ my $i = 0;
559
+ my $num = scalar @{$$dbfile{kept}};
560
+
561
+ foreach (@{$$dbfile{kept}}) {
562
+ my @items = split /\t/;
563
+ my ($lat, $lon) = @items[4,5];
564
+ $lat = int(($lat + 90) / 180 * 0x100000 + 0.5) & 0xfffff;
565
+ $lon = int(($lon + 180) / 360 * 0x100000 + 0.5) & 0xfffff;
566
+ my $coord = pack('nCn',$lat>>4,(($lat&0x0f)<<4)|($lon&0x0f),$lon>>4);;
567
+ # take the city with the highest population if there are
568
+ # multiple cities with the same reduced coordinates
569
+ if ($coords{$coord} and $coords{$coord}[6] >= $items[14]) {
570
+ next;
571
+ }
572
+ # coords=(0.lat,1.lon,2.city,3.cc,4.rgn,5.admin2,6.population,7.timezone,8.feature code,9.alt names)
573
+ my ($altList, $alt);
574
+ die "Internal error\n" unless $lang{$items[0]} and $altList = $lang{$items[0]}{alt};
575
+ if (@$altList) {
576
+ tr/,//d foreach @$altList;
577
+ $alt = join ',', sort @$altList;
578
+ } else {
579
+ $alt = '';
580
+ }
581
+ $coords{$coord} = [ @items[4,5,1,8,10,11,14,17,7] ];
582
+ $coords{$coord}[9] = $alt;
583
+ my $lkup = $lang{$items[0]}; # 0=geoID
584
+ my $key = $items[1]; # 1=city
585
+ $lkup or die "Missing language for geoID $items[0]\n";
586
+ $cityFlags{$flags{$items[0]}} = ($cityFlags{$flags{$items[0]}} || 0) + 1 if defined $flags{$items[0]};
587
+ my $ccLang = $ccLang{$items[8]}; # get country-specific language
588
+ if ($ccLang and $$lkup{$ccLang}) {
589
+ my $lc = $ccLang . '-' . lc($items[8]); # eg. zh-cn
590
+ # add country suffix for this language in this country
591
+ $$lkup{$lc} = $$lkup{$ccLang} unless $$lkup{$lc};
592
+ }
593
+ foreach (@supportedLangs) {
594
+ next unless $$lkup{$_} and $$lkup{$_} ne $key; # (ignore if same)
595
+ $langLookups{$_}{$key} or $langLookups{$_}{$key} = [ ];
596
+ push @{$langLookups{$_}{$key}}, "$items[8]$items[10].$items[11],$$lkup{$_}";
597
+ }
598
+ my $p = int(100 * ++$i / $num + 0.5);
599
+ next if $percent == $p;
600
+ printf("\b\b\b\b%3d%%", $percent = $p);
601
+ flush STDOUT;
602
+ }
603
+ print "\b\b\b\bDone.\n";
604
+ }
605
+
606
+ # write city database
607
+ my $str = $noLang ? '' : " and $outAltNames";
608
+ my @t = localtime;
609
+ my $date = sprintf('%.4d-%.2d-%.2d', $t[5]+1900, $t[4]+1, $t[3]);
610
+ print "Writing $outDir/$outFile (version $dbVer)$str...\n";
611
+ print OUTFILE "Geolocation$dbVer\t",scalar(keys %coords),"\n";
612
+ print OUTFILE "# $date Cities with population $dbfiles[0]{minpop} or greater from geonames.org with a Creative Commons license\n";
613
+
614
+ if ($noLang) {
615
+ unlink "$outDir/$outAltNames";
616
+ } else {
617
+ open ALTOUT, ">$outDir/$outAltNames";
618
+ binmode ALTOUT;
619
+ }
620
+ my (%tz, @tz, %fcodes);
621
+ my $tzNum = 0;
622
+ foreach (sort { $a cmp $b } keys %coords) {
623
+ my $items = $coords{$_};
624
+ # @$items=(0.lat,1.lon,2.city,3.cc,4.rgn,5.admin2,6.population,7.timezone,8.feature code,9.alt names)
625
+ my $iCC = $orderCC{$$items[3]};
626
+ my $iRgn = $orderRgn{"$$items[3]$$items[4]"} || 0;
627
+ my $iSub = $orderSub{"$$items[3]$$items[4].$$items[5]"} || 0;
628
+ my $tn = $tz{$$items[7]};
629
+ unless ($tn) {
630
+ push @tz, $$items[7];
631
+ $tn = $tz{$$items[7]} = $tzNum++;
632
+ }
633
+ # convert population to our binary format
634
+ my $pop = sprintf('%.1e',$$items[6]); # format: "3.1e+04"
635
+ # pack CC, population and region index into a 32-bit integer
636
+ my $code = ($iCC << 24) | (substr($pop,6,1)<<20) | (substr($pop,0,1)<<16) | (substr($pop,2,1)<<12) | $iRgn;
637
+ $fcodes{$$items[8]} = ($fcodes{$$items[8]} || 0) + 1;
638
+ my $fc = $featureCodes{$$items[8]} || 0;
639
+ # store high bit of timezone index
640
+ if ($tn > 255) {
641
+ if ($dbVer eq '1.02') {
642
+ $iSub |= 0x8000;
643
+ $tn -= 256;
644
+ } else {
645
+ $fc |= 0x80;
646
+ $tn -= 256;
647
+ }
648
+ }
649
+ my $pt = pack('NnCC', $code, $iSub, $tn, $fc);
650
+ $$items[2] =~ tr/,//d; # remove any commas
651
+ print OUTFILE "$_$pt$$items[2]\n";
652
+ next if $noLang;
653
+ $$items[9] =~ tr/,/\n/;
654
+ print ALTOUT $$items[9],"\0";
655
+ }
656
+ my $altSize = 0;
657
+ unless ($noLang) {
658
+ $altSize = tell ALTOUT;
659
+ close ALTOUT;
660
+ }
661
+ print OUTFILE "\0\0\0\0\x01\n"; # section terminator
662
+
663
+ die "Too many time zones!\n" if $tzNum > 0x01ff;
664
+
665
+ if ($verbose) {
666
+ $i = 0;
667
+ print "Features kept:\n";
668
+ foreach (sort keys %fcodes) {
669
+ my $fc = $featureCodes{$_} || 0;
670
+ printf "%6d (%2d) %s\n", $fcodes{$_}, $fc, $_;
671
+ }
672
+ }
673
+
674
+ # write country codes
675
+ open COUNTRY, '<', "$dbdir/$countryFile" or die "Error opening $dbdir/$countryFile\n";
676
+ my %cc;
677
+ while (<COUNTRY>) {
678
+ next if /^#/;
679
+ my @items = split /\t/;
680
+ next unless $haveCountry{$items[0]};
681
+ $cc{$items[4]} = $items[0];
682
+ die "country code error\n" if length $items[0] != 2;
683
+ $items[4] =~ tr/,//d; # remove any commas
684
+ print OUTFILE "$items[0]$items[4]\n";
685
+ if ($lang{$items[16]}) { # (16=geoID)
686
+ my $lkup = $lang{$items[16]};
687
+ my $key = $items[4]; # country name
688
+ $ccFlags{$flags{$items[16]}} = ($ccFlags{$flags{$items[16]}} || 0) + 1 if defined $flags{$items[16]};
689
+ foreach (@supportedLangs) {
690
+ next unless $$lkup{$_} and $$lkup{$_} ne $key; # (ignore if same)
691
+ $langLookups{$_}{$key} or $langLookups{$_}{$key} = [ ];
692
+ push @{$langLookups{$_}{$key}}, ",$$lkup{$_}";
693
+ }
694
+ }
695
+ }
696
+ close COUNTRY;
697
+
698
+ print OUTFILE "\0\0\0\0\x02\n"; # section terminator
699
+
700
+ # write regions
701
+ print OUTFILE "\n"; # (null region)
702
+ open REGION, '<', "$dbdir/$regionFile" or die "Error opening $dbdir/$regionFile\n";
703
+ my %region;
704
+ while (<REGION>) {
705
+ chomp;
706
+ my @items = split /\t/;
707
+ #items: 0=region code, 1=name, 2=ascii, 3=geoID
708
+ $items[0] =~ tr/.//d; # (remove "." separator)
709
+ next unless $haveRegion{$items[0]};
710
+ $region{$items[0]} = $items[1];
711
+ $items[1] =~ tr/,//d; # remove any commas
712
+ print OUTFILE "$items[1]\n";
713
+ if ($lang{$items[3]}) { # (3=geoID)
714
+ my $lkup = $lang{$items[3]};
715
+ my $key = $items[1]; # region name
716
+ my $cc = substr($items[0], 0, 2);
717
+ $rgnFlags{$flags{$items[3]}} = ($rgnFlags{$flags{$items[3]}} || 0) + 1 if defined $flags{$items[3]};
718
+ foreach (@supportedLangs) {
719
+ next unless $$lkup{$_} and $$lkup{$_} ne $key; # (ignore if same)
720
+ $langLookups{$_}{$key} or $langLookups{$_}{$key} = [ ];
721
+ push @{$langLookups{$_}{$key}}, "$cc,$$lkup{$_}";
722
+ }
723
+ }
724
+ }
725
+ close REGION;
726
+
727
+ print OUTFILE "\0\0\0\0\x03\n"; # section terminator
728
+
729
+ # write subregions
730
+ print OUTFILE "\n"; # (null admin2)
731
+ open ADMIN2, '<', "$dbdir/$admin2File" or die "Error opening $dbdir/$admin2File\n";
732
+ my %subregion;
733
+ while (<ADMIN2>) {
734
+ chomp;
735
+ my @items = split /\t/;
736
+ #items: 0=admin2 code, 1=name, 2=ascii, 3=geoID
737
+ $items[0] =~ s/\.//; # (remove first "." separator)
738
+ next unless $haveSubRgn{$items[0]};
739
+ $subregion{$items[0]} = $items[1];
740
+ $items[1] =~ tr/,//d; # remove any commas
741
+ print OUTFILE "$items[1]\n";
742
+ if ($lang{$items[3]}) { # (3=geoID)
743
+ my $lkup = $lang{$items[3]};
744
+ my $key = $items[1]; # region name
745
+ $subFlags{$flags{$items[3]}} = ($subFlags{$flags{$items[3]}} || 0) + 1 if defined $flags{$items[3]};
746
+ my $rc = $items[0];
747
+ $rc =~ s/\..*//; # (remove subregion code)
748
+ foreach (@supportedLangs) {
749
+ next unless $$lkup{$_} and $$lkup{$_} ne $key; # (ignore if same)
750
+ $langLookups{$_}{$key} or $langLookups{$_}{$key} = [ ];
751
+ push @{$langLookups{$_}{$key}}, "$rc,$$lkup{$_}";
752
+ }
753
+ }
754
+ }
755
+ close ADMIN2;
756
+
757
+ print OUTFILE "\0\0\0\0\x04\n"; # section terminator
758
+
759
+ # write timezones
760
+ print OUTFILE $_,"\n" foreach @tz;
761
+
762
+ print OUTFILE "\0\0\0\0\x05\n"; # section terminator
763
+
764
+ # write feature codes
765
+ print OUTFILE $_,"\n" foreach @featureCodes;
766
+
767
+ # write terminator and close Geolocation.dat
768
+ print OUTFILE "\0\0\0\0\0\n"; # file terminator
769
+ my $outSize = tell OUTFILE;
770
+ close OUTFILE;
771
+
772
+ # write language lookups
773
+ my $langSize = 0;
774
+ my $langDir = "$outDir/$geoLang";
775
+ # delete existing languages
776
+ unlink <"$langDir/*.pm">;
777
+ unless ($noLang) {
778
+ my $n = scalar(keys %langLookups);
779
+ print "Writing $n language files to $outDir/$geoLang...\n";
780
+ mkdir $langDir, 0777;
781
+ my ($lng, $key, $str, $nm, $alt);
782
+ foreach $lng (sort keys %langLookups) { # ($lng = language code)
783
+ my $myLng = $lng;
784
+ $myLng =~ tr/-/_/;
785
+ my $lkup = $langLookups{$lng};
786
+ my $file = "$myLng.pm";
787
+ open OUT, ">$langDir/$file" or die "Error creating $file\n";
788
+ print OUT "# Geolocation language translations for $myLng\n";
789
+ print OUT "#\n# Based on Creative Commons database from geonames.org\n\n";
790
+ print OUT "%Image::ExifTool::GeoLang::${myLng}::Translate = (\n";
791
+ foreach $key (sort keys %$lkup) {
792
+ ($nm = $key) =~ s/'/\\'/g;
793
+ # count entries and use the most common one, then add others with country+region ID's
794
+ # (entries in @$li are of the form: City:"CCRc,Sc,Alt", Sub:"CCRc,Alt", Rgn:"CC,Alt", Country:",Alt")
795
+ # (Rc = region code, Sc = subregion code)
796
+ my $li = $$lkup{$key};
797
+ my %count;
798
+ # sort by popularity of alternate name
799
+ foreach (@$li) {
800
+ my $val = $_;
801
+ $val =~ s/.*?,//;
802
+ $count{$val} = ($count{$val} || 0) + 1;
803
+ }
804
+ my @order = sort { $count{$b} <=> $count{$a} or length($a) <=> length($b) or $a cmp $b } keys %count;
805
+ my $first = 1;
806
+ foreach $alt (@order) {
807
+ foreach (sort @$li) {
808
+ my ($code,$val) = split ',', $_, 2;
809
+ # ($code will be empty for a country name, and 2 characters for a region name,
810
+ # and contain a "." for a city name)
811
+ next unless $val eq $alt; # don't add if alternate name is the same
812
+ die "Backslash in translated name" if $val =~ /\\/;
813
+ $val =~ s/'/\\'/g; # escape single quotes
814
+ if ($first and $val !~ /\(/) { # (don't add general translation if name is qualified with brackets)
815
+ print OUT "\t'$nm' => '$val',\n";
816
+ undef $first;
817
+ last;
818
+ }
819
+ # format for keys in language table
820
+ # City: "CCRgn,Subregion,City", "CCRgn,,City", "CC,City", ",City"
821
+ # Subregion: "CCRgn,Subregion,", "CCRgn,,"
822
+ # Region: "CCRgn,"
823
+ # Country: "CC,"
824
+ # Any: "Name"
825
+ if (not $code) {
826
+ # this is a country
827
+ $code = $cc{$key};
828
+ printf OUT "\t'$code,' => '$val',\n";
829
+ } elsif ($code !~ /\./) {
830
+ # this is a region or subregion
831
+ print OUT "\t'$code$nm,' => '$val',\n";
832
+ } else {
833
+ # this is a city
834
+ # use region/subregions name instead of code
835
+ my $sub = $subregion{$code} || '';
836
+ $sub =~ s/'/\\'/g;
837
+ $code =~ s/\..*//;
838
+ $code = substr($code,0,2) . $region{$code} if $region{$code};
839
+ $code =~ s/'/\\'/g;
840
+ print OUT "\t'$code,$sub,$nm' => '$val',\n";
841
+ }
842
+ }
843
+ }
844
+ }
845
+ print OUT ");\n\n1; #end\n";
846
+ $langSize += tell OUT;
847
+ close OUT;
848
+ }
849
+ if ($verbose) {
850
+ my @type = ( City => \%cityFlags, Region => \%rgnFlags, Subregion => \%subFlags, Country => \%ccFlags );
851
+ for (;;) {
852
+ my $type = shift @type or last;
853
+ my $flags = shift @type;
854
+ print "$type flags:\n";
855
+ printf(" 0x%.2x - %d\n", 0, $$flags{0} || 0);
856
+ my @label = qw(preferred short colloquial historic);
857
+ foreach my $bit (0..5) {
858
+ my $n = 0;
859
+ $_ & (1<<$bit) and ++$n foreach keys %$flags;
860
+ printf(" 0x%.2x - %d (%s)\n", (1<<$bit), $n, shift(@label)) if $n;
861
+ }
862
+ }
863
+ }
864
+ }
865
+
866
+ print "Output file size(s):\n";
867
+ printf "%8.2f MB %s (%d entries)\n", $outSize / 1e6, $outFile, scalar(keys %coords);
868
+ printf "%8.2f MB %s\n", $altSize / 1e6, $outAltNames if $altSize;
869
+ printf "%8.2f MB %s/*.pm\n", $langSize / 1e6, $geoLang if $langSize;
870
+ printf "%8.2f MB Total\n", ($outSize + $altSize + $langSize) / 1e6 if $altSize or $langSize;
871
+
872
+ # end