exiftool_vendored 12.82.0 → 12.83.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/bin/Changes +20 -0
  3. data/bin/MANIFEST +2 -18
  4. data/bin/META.json +1 -1
  5. data/bin/META.yml +1 -1
  6. data/bin/README +3 -2
  7. data/bin/build_geolocation +867 -0
  8. data/bin/exiftool +26 -4
  9. data/bin/fmt_files/gpx.fmt +2 -1
  10. data/bin/fmt_files/gpx_wpt.fmt +2 -1
  11. data/bin/lib/Image/ExifTool/BuildTagLookup.pm +5 -2
  12. data/bin/lib/Image/ExifTool/CanonVRD.pm +6 -5
  13. data/bin/lib/Image/ExifTool/DJI.pm +29 -0
  14. data/bin/lib/Image/ExifTool/Exif.pm +19 -2
  15. data/bin/lib/Image/ExifTool/GM.pm +17 -8
  16. data/bin/lib/Image/ExifTool/Geolocation.dat +0 -0
  17. data/bin/lib/Image/ExifTool/Geolocation.pm +48 -35
  18. data/bin/lib/Image/ExifTool/Nikon.pm +7 -6
  19. data/bin/lib/Image/ExifTool/QuickTime.pm +4 -1
  20. data/bin/lib/Image/ExifTool/QuickTimeStream.pl +5 -0
  21. data/bin/lib/Image/ExifTool/Sony.pm +15 -6
  22. data/bin/lib/Image/ExifTool/TagLookup.pm +11 -5
  23. data/bin/lib/Image/ExifTool/TagNames.pod +17 -4
  24. data/bin/lib/Image/ExifTool/WriteQuickTime.pl +2 -1
  25. data/bin/lib/Image/ExifTool/Writer.pl +163 -131
  26. data/bin/lib/Image/ExifTool/XMP2.pl +3 -0
  27. data/bin/lib/Image/ExifTool.pm +12 -5
  28. data/bin/lib/Image/ExifTool.pod +13 -7
  29. data/bin/perl-Image-ExifTool.spec +1 -1
  30. data/lib/exiftool_vendored/version.rb +1 -1
  31. metadata +3 -20
  32. data/bin/lib/Image/ExifTool/GeoLang/cs.pm +0 -978
  33. data/bin/lib/Image/ExifTool/GeoLang/de.pm +0 -1975
  34. data/bin/lib/Image/ExifTool/GeoLang/en_ca.pm +0 -44
  35. data/bin/lib/Image/ExifTool/GeoLang/en_gb.pm +0 -124
  36. data/bin/lib/Image/ExifTool/GeoLang/es.pm +0 -2921
  37. data/bin/lib/Image/ExifTool/GeoLang/fi.pm +0 -1116
  38. data/bin/lib/Image/ExifTool/GeoLang/fr.pm +0 -3171
  39. data/bin/lib/Image/ExifTool/GeoLang/it.pm +0 -2750
  40. data/bin/lib/Image/ExifTool/GeoLang/ja.pm +0 -10256
  41. data/bin/lib/Image/ExifTool/GeoLang/ko.pm +0 -4499
  42. data/bin/lib/Image/ExifTool/GeoLang/nl.pm +0 -1270
  43. data/bin/lib/Image/ExifTool/GeoLang/pl.pm +0 -3019
  44. data/bin/lib/Image/ExifTool/GeoLang/ru.pm +0 -18220
  45. data/bin/lib/Image/ExifTool/GeoLang/sk.pm +0 -441
  46. data/bin/lib/Image/ExifTool/GeoLang/sv.pm +0 -714
  47. data/bin/lib/Image/ExifTool/GeoLang/tr.pm +0 -452
  48. data/bin/lib/Image/ExifTool/GeoLang/zh_cn.pm +0 -2225
  49. data/bin/lib/Image/ExifTool/GeoLang/zh_tw.pm +0 -72
@@ -0,0 +1,867 @@
1
+ #!/usr/bin/perl -w
2
+ #-------------------------------------------------------------------------------
3
+ # File: build_geolocation
4
+ #
5
+ # Description: Parse geonames files to create ExifTool geolocation database
6
+ #
7
+ # Syntax: build_geolocation [OPTIONS] [DBFILE] ...
8
+ #
9
+ # Options: (see -h output)
10
+ #
11
+ # Created: 2024-03-03 - P. Harvey
12
+ # 2024-04-15 - PH Clean up and add options for public release
13
+ #
14
+ # Notes: Requires these files from https://download.geonames.org/export/
15
+ #
16
+ # allCountries.txt (or other input database if specified)
17
+ # countryInfo.txt
18
+ # admin1CodesASCII.txt
19
+ # admin2Codes.txt
20
+ # alternateNamesV2.txt (optional)
21
+ #
22
+ # Output datbase format (Geolocation.dat):
23
+ #
24
+ # Header:
25
+ # "GeolocationV.VV\tNNNN\n" (V.VV=version, NNNN=num city entries)
26
+ # "# <comment>\n"
27
+ # NNNN City entries:
28
+ # Offset Format Description
29
+ # 0 int16u - latitude high 16 bits (converted to 0-0x100000 range)
30
+ # 2 int8u - latitude low 4 bits, longitude low 4 bits
31
+ # 3 int16u - longitude high 16 bits
32
+ # 5 int8u - index of country in country list
33
+ # 6 int8u - 0xf0 = population E exponent (in format "N.Fe+0E"), 0x0f = population N digit
34
+ # 7 int16u - 0xf000 = population F digit, 0x0fff = index in region list (admin1)
35
+ # 9 int16u - v1.02: 0x7fff = index in subregion (admin2), 0x8000 = high bit of time zone
36
+ # 9 int16u - v1.03: index in subregion (admin2)
37
+ # 11 int8u - low byte of time zone index
38
+ # 12 int8u - 0x1f = feature code index (see below), v1.03: 0x80 = high bit of time zone
39
+ # 13 string - UTF8 City name, terminated by newline
40
+ # "\0\0\0\0\x01"
41
+ # Country entries:
42
+ # 1. 2-character country code
43
+ # 2. Country name, terminated by newline
44
+ # "\0\0\0\0\x02"
45
+ # Region entries:
46
+ # 1. Region name, terminated by newline
47
+ # "\0\0\0\0\x03"
48
+ # Subregion entries:
49
+ # 1. Subregion name, terminated by newline
50
+ # "\0\0\0\0\x04"
51
+ # Time zone entries:
52
+ # 1. Time zone name, terminated by newline
53
+ # "\0\0\0\0\x05" (feature codes added in v1.03)
54
+ # Feature codes:
55
+ # 1. Feature code, terminated by newline
56
+ # "\0\0\0\0\0"
57
+ #
58
+ # Feature Codes v1.02: (see http://www.geonames.org/export/codes.html#P for descriptions)
59
+ #
60
+ # 0. Other 3. PPLA2 6. PPLA5 9. PPLF 12. PPLR 15. PPLX
61
+ # 1. PPL 4. PPLA3 7. PPLC 10. PPLG 13. PPLS
62
+ # 2. PPLA 5. PPLA4 8. PPLCH 11. PPLL 14. STLMT
63
+ #
64
+ # Feature Codes v1.03 and later are listed at the end of the database
65
+ #-------------------------------------------------------------------------------
66
+
67
+ use strict;
68
+
69
+ my $dbVer = '1.02'; # database version
70
+
71
+ my $dbFile = 'allCountries.txt'; # default database file
72
+ my $countryFile = 'countryInfo.txt'; # mandatory country names file
73
+ my $regionFile = 'admin1CodesASCII.txt'; # mandatory region names file
74
+ my $admin2File = 'admin2Codes.txt'; # mandatory subregion names file
75
+ my $altNamesFile = 'alternateNamesV2.txt'; # optional alternate names file
76
+ my $outFile = 'Geolocation.dat'; # output ExifTool database file
77
+ my $outAltNames = 'AltNames.dat'; # output alternate names file
78
+ my $outDirName = 'Geolocation_out'; # output directory for database files
79
+ my $geoLang = 'GeoLang'; # output directory for language files
80
+
81
+ my %defaults = (
82
+ file => $dbFile,
83
+ minpop => 2000,
84
+ def_codes => 'PPLA,PPLA2',
85
+ def_codesp => 'PPL,PPLA,PPLA2,PPLA3,PPLA4,PPLA5,PPLC,PPLCH,PPLF,PPLG,PPLH,PPLL,PPLQ,PPLR,PPLS,PPLW,PPLX,STLMT',
86
+ );
87
+
88
+ # languages to read from geonames database
89
+ my @languages = qw(cs de en en-ca en-gb es fi fr it ja ko nl pl ru sk sv tr zh zh-cn zh-tw);
90
+
91
+ # indices of feature codes (v1.02 is hard-coded in ExifTool)
92
+ my @fc102 = qw(
93
+ Other PPL PPLA PPLA2 PPLA3 PPLA4 PPLA5 PPLC PPLCH
94
+ PPLF PPLG PPLL PPLR PPLS STLMT PPLX
95
+ );
96
+ # base features for v1.03+
97
+ my @fc103 = qw(
98
+ Other PPL PPLA PPLA2 PPLA3 PPLA4 PPLA5 PPLC PPLCH
99
+ PPLF PPLG PPLH PPLL PPLQ PPLR PPLS PPLW PPLX STLMT
100
+ );
101
+ my $i = 0;
102
+ my @featureCodes = $dbVer eq '1.02' ? @fc102 : @fc103;
103
+ my %featureCodes = map { $_ => $i++ } @featureCodes;
104
+
105
+ my ($dbfile, @dbfiles, $outDir, $verbose, $noLang, %needRgn);
106
+ my %optArgs = ( p => 1, c => 1, cp => 1, l => 1, o => 1, ver => 1 );
107
+
108
+ # process command-line arguments
109
+ my $opts = { };
110
+ while (@ARGV) {
111
+ my $opt = shift;
112
+ if (not $opt =~ s/^-//) {
113
+ $opt = "$opt/$defaults{file}" if -d $opt;
114
+ -e $opt or die "Error opening database $opt\n";
115
+ push @dbfiles, { %defaults, %$opts, file => $opt };
116
+ $opts = { };
117
+ next;
118
+ }
119
+ my $arg;
120
+ if ($optArgs{$opt}) {
121
+ $arg = shift;
122
+ defined $arg or die "Expecting argument for -$opt option\n";
123
+ }
124
+ if ($opt eq 'p') {
125
+ $arg = uc $arg;
126
+ if ($arg =~ /=/) {
127
+ my ($cc, $mp) = split /=/, $arg;
128
+ $mp =~ /^\d+$/ or die "Expecting number on rhs of '=' for -p option\n";
129
+ my @cc = split /,/, $cc;
130
+ foreach $cc (@cc) {
131
+ $cc =~ /^([A-Z]{2})(\..+)?$/ or die "Invalid country/region '$cc' for -p option\n";
132
+ $needRgn{$1} = $needRgn{$cc} = 1 if length $cc > 2;
133
+ $$opts{cc_minpop}{$cc} = $mp;
134
+ }
135
+ } else {
136
+ $arg =~ /^\d+$/ or die "Expecting number for -p option\n";
137
+ $$opts{minpop} = $arg;
138
+ }
139
+ } elsif ($opt =~ /^c(p?)$/) {
140
+ my $p = $1;
141
+ $arg = uc $arg;
142
+ my ($cc, $co);
143
+ if ($arg =~ /=/) {
144
+ ($cc, $co) = split /=/, $arg;
145
+ } else {
146
+ ($cc, $co) = ('??', $arg);
147
+ }
148
+ my $sign = $co =~ s/^([-+])// ? $1 : '';
149
+ my @co = split /,/, $co;
150
+ my @cc = split /,/, $cc;
151
+ # store lookup for features to keep for each country ('??' = any country)
152
+ foreach $cc (@cc) {
153
+ $cc =~ /^([A-Z]{2}|\?\?)(\..+)?$/ or die "Invalid country/region '$cc' for -$opt option\n";
154
+ $needRgn{$1} = $needRgn{$cc} = 1 if length $cc > 2;
155
+ if (not $sign) {
156
+ $$opts{"keep$p"}{$cc} = { };
157
+ } elsif (not $$opts{"keep$p"}{$cc}) {
158
+ # start from defaults
159
+ my %codes = map { $_ => 1 } split /,/, $defaults{"def_codes$p"};
160
+ $$opts{"keep$p"}{$cc} = \%codes;
161
+ }
162
+ foreach $co (@co) {
163
+ if ($sign eq '-') {
164
+ delete $$opts{"keep$p"}{$cc}{$co};
165
+ } else {
166
+ $$opts{"keep$p"}{$cc}{$co} = 1;
167
+ }
168
+ }
169
+ }
170
+ } elsif ($opt eq 'l') {
171
+ $arg = lc $arg;
172
+ my @langs = split ',', $arg;
173
+ if (not @langs) {
174
+ undef @languages;
175
+ $noLang = 1;
176
+ } elsif ($langs[0] =~ s/^-//) {
177
+ @languages = grep !/^$langs[0]$/, @languages foreach @langs;
178
+ } else {
179
+ @languages = @langs;
180
+ }
181
+ } elsif ($opt eq 'o') {
182
+ $outDir = $arg;
183
+ } elsif ($opt eq 'ver') {
184
+ $dbVer = $arg;
185
+ $dbVer =~ /^1\.0[23]$/ or die "Unsupported version number $dbVer\n";
186
+ } elsif ($opt eq 'v') {
187
+ $verbose = 1;
188
+ } elsif ($opt eq 'h') {
189
+ my $defcp = $defaults{def_codesp};
190
+ $defcp =~ s/(PPLG,)/\n $1/;
191
+ my $defLang = join ',', @languages;
192
+ $defLang =~ s/(ja,)/\n $1/;
193
+ print <<"END";
194
+ Description: Build ExifTool Geolocation database.
195
+
196
+ Syntax: build_geolocation [OPTIONS] [DBFILE] ...
197
+
198
+ Options:
199
+ DBFILE - Input database file name or directory. Multiple input database
200
+ files may be specified. The -p, -c and -cp options apply to
201
+ the database that comes after them on the command line.
202
+ Default is "$dbFile".
203
+ -p POP - Minimum population for cities to include. POP may be a number
204
+ or be of the form "CC[,C2...]=###" to set different limits for
205
+ specific countries/regions, where CC and C2 are country codes
206
+ with optional region name or code appended after a period (eg.
207
+ "CA.Ontario,US=500" sets the minimum population to 500 for
208
+ cities on Ontario Canada or the U.S.). If a region is
209
+ specified, either the full name or the geonames admin1 code may
210
+ be used, and case and spaces are not significant. May be
211
+ multiple -p options for each input DBFILE. Default is "$defaults{minpop}".
212
+ -c CODE - Feature codes to always include, regardless of population. CODE
213
+ is a comma-separated list of feature codes, with an optional
214
+ leading comma-separated list of country/region codes followed
215
+ by an equals sign to apply only to specific countries. The
216
+ feature-code list may begin with a dash to remove entries from
217
+ the default list, or a plus sign to add entries. May be
218
+ multiple -c options for each intput DBFILE. Country/region and
219
+ feature names are case insensitive. Default is "$defaults{def_codes}".
220
+ -cp CODE - Additional features to include if above minimum population.
221
+ Default is "$defcp".
222
+ -l LANG - Alternate languages to read from $altNamesFile if
223
+ available. These are used to generate $outAltNames an the
224
+ $geoLang files. LANG is a comma-separated list of language
225
+ codes, starting with a dash to remove items from the default
226
+ list. May be set to an empty string to disable generation
227
+ of alternate language files even if $altNamesFile
228
+ exists. The same set of languages applies to all input
229
+ database files. Default is "$defLang".
230
+ -o OUTDIR - Output directory name. Default is the same directory as the
231
+ first input database file. A directory named $outDirName
232
+ containing the output files will be created in this directory.
233
+ -ver VER - Version for output geolocation database (default is $dbVer).
234
+ -v - Verbose messages.
235
+ -h - Show this help.
236
+
237
+ Input files (download from https://download.geonames.org/export/dump/):
238
+ allCountries.txt - default database file (smaller files with names
239
+ like "cities###.txt" may be specified instead)
240
+ countryInfo.txt - mandatory country names file
241
+ admin1CodesASCII.txt - mandatory region names file
242
+ admin2Codes.txt - mandatory subregion names file
243
+ alternateNamesV2.txt - optional alternate names file (must exist to
244
+ to generate $outAltNames and $geoLang files)
245
+
246
+ Output files:
247
+ $outDirName - default output directory name
248
+ $outFile - ExifTool database file
249
+ $outAltNames - alternate names file
250
+ $geoLang - directory for alternate language files
251
+
252
+ Author:
253
+ Copyright 2024, Phil Harvey
254
+
255
+ This is free software; you can redistribute it and/or modify it under the
256
+ same terms as Perl itself.
257
+ END
258
+ exit 0;
259
+ } else {
260
+ die "Unknown option '-$opt'\n";
261
+ }
262
+ }
263
+
264
+ if (@dbfiles) {
265
+ # apply any remaining options to last database file
266
+ $dbfiles[-1]{$_} = $$opts{$_} foreach keys %$opts;
267
+ } else {
268
+ # use default database file if none specified
269
+ push @dbfiles, { %defaults, %$opts };
270
+ unless (-e $dbfiles[0]{file}) {
271
+ # also look in script directory
272
+ if ($0 =~ m{(.*)/} and -e "$1/$dbfiles[0]{file}") {
273
+ $dbfiles[0]{file} = "$1/$dbfiles[0]{file}";
274
+ } else {
275
+ die qq(Database "$dbfiles[0]{file}" not found. Use -h option for help.\n);
276
+ }
277
+ }
278
+ }
279
+
280
+ # determine our working directory
281
+ my $dbdir = $dbfiles[0]{file};
282
+ $dbdir = '.' unless $dbdir =~ s(/[^/]*$)();
283
+
284
+ # add default feature code lookups if necessary
285
+ foreach $dbfile (@dbfiles) {
286
+ my $p;
287
+ foreach $p ('', 'p') {
288
+ next if $$dbfile{"keep$p"}{'??'};
289
+ my %codes = map { $_ => 1 } split /,/, $defaults{"def_codes$p"};
290
+ $$dbfile{"keep$p"}{'??'} = \%codes;
291
+ }
292
+ }
293
+
294
+ # pre-read region file if necessary
295
+ if (%needRgn) {
296
+ open REGION, '<', "$dbdir/$regionFile" or die "Error opening $dbdir/$regionFile\n";
297
+ while (<REGION>) {
298
+ my @items = split /\t/;
299
+ my $rgn = $items[0];
300
+ my ($cc) = split /\./, $rgn;
301
+ next unless $needRgn{$cc};
302
+ unless ($needRgn{$rgn}) { # allow region code to be used
303
+ $rgn = $cc . '.' . uc$items[1]; # also support full region name
304
+ unless ($needRgn{$rgn}) {
305
+ $rgn =~ tr/ //d;
306
+ next unless $needRgn{$rgn}; # also allow no spaces
307
+ }
308
+ }
309
+ $needRgn{$rgn} = [$items[0], "$cc.$items[1]"];
310
+ }
311
+ close REGION;
312
+ foreach (sort keys %needRgn) {
313
+ next if length == 2;
314
+ die "No matching region for $_\n" unless ref $needRgn{$_};
315
+ }
316
+ }
317
+
318
+ if ($verbose) {
319
+ my $langs = join ',', @languages;
320
+ $langs or $langs = '<none>';
321
+ print "Languages to read from input database(s):\n $langs\n";
322
+ foreach $dbfile (@dbfiles) {
323
+ print "Parameters for reading $$dbfile{file}:\n";
324
+ print " Minimum populations (??=any country):\n";
325
+ print " ??=$$dbfile{minpop}\n";
326
+ foreach (reverse sort keys %{$$dbfile{cc_minpop}}) {
327
+ my $cc = ref $needRgn{$_} ? $needRgn{$_}[1] : $_;
328
+ print " $cc=$$dbfile{cc_minpop}{$_}\n";
329
+ }
330
+ print " Features to keep regardless of population:\n";
331
+ foreach (reverse sort keys %{$$dbfile{keep}}) {
332
+ my $cc = ref $needRgn{$_} ? $needRgn{$_}[1] : $_;
333
+ print " $cc=",join(',', sort keys %{$$dbfile{keep}{$_}}), "\n";
334
+ }
335
+ print " Features to keep for population >= minimum:\n";
336
+ foreach (reverse sort keys %{$$dbfile{keepp}}) {
337
+ my $cc = ref $needRgn{$_} ? $needRgn{$_}[1] : $_;
338
+ print " $_=",join(',', sort keys %{$$dbfile{keepp}{$_}}), "\n";
339
+ }
340
+ }
341
+ }
342
+
343
+ # translate option region arguments to region codes
344
+ foreach $dbfile (@dbfiles) {
345
+ my ($type, $cc);
346
+ foreach $type (qw(cc_minpop keep keepp)) {
347
+ my @cc = keys %{$$dbfile{$type}};
348
+ foreach $cc (@cc) {
349
+ next unless ref $needRgn{$cc};
350
+ my $tmp = $$dbfile{$type}{$cc};
351
+ delete $$dbfile{$type}{$cc};
352
+ $$dbfile{$type}{$needRgn{$cc}[0]} = $tmp;
353
+ }
354
+ }
355
+ }
356
+
357
+ $outDir = "$dbdir/$outDirName" unless defined $outDir;
358
+ -d $outDir or mkdir $outDir, 0777 or die "Error creating output directory '$outDir'\n";
359
+ -e "$dbdir/$_" or die "Missing input file $dbdir/$_\n" foreach $countryFile, $regionFile, $admin2File;
360
+
361
+ # order of country codes, region names and subregions in database
362
+ my (%orderCC, %orderRgn, %orderSub);
363
+
364
+ # languages to read from geonames database (converted to lower case)
365
+ my %languages = map { $_ => 1 } @languages;
366
+
367
+ # language codes supported by ExifTool
368
+ my @supportedLangs = qw(cs de en-ca en-gb es fi fr it ja ko nl pl ru sk sv tr zh-cn zh-tw);
369
+
370
+ # supported country-specific languages
371
+ my %ccLang = ( TW => 'zh', CN => 'zh', CA => 'en', GB => 'en' );
372
+ my (%lang, %haveCountry, %cityFlags, %rgnFlags, %subFlags, %ccFlags, %flags);
373
+ my (%haveRegion, %haveSubRgn, $filesize, $percent);
374
+
375
+ sub GetFileSize($)
376
+ {
377
+ my $file = shift;
378
+ seek $file, 0, 2 or die "Seek error\n";
379
+ my $size = tell $file;
380
+ seek $file, 0, 0 or die "Seek error\n";
381
+ return $size;
382
+ }
383
+
384
+ # pre-scan database to determine which countries, regions and subregions we will be using
385
+ foreach $dbfile (@dbfiles) {
386
+ my $database = $$dbfile{file};
387
+ my $upgraded;
388
+
389
+ print "Reading $database... 0%";
390
+ flush STDOUT;
391
+
392
+ # pre-read the files to initialize necessary variables
393
+ open INFILE, '<', $database or die "Error opening $database\n";
394
+ $filesize = GetFileSize(\*INFILE);
395
+
396
+ open OUTFILE, '>', "$outDir/$outFile" or die "Error creating $outFile in $outDir\n";
397
+ binmode(OUTFILE);
398
+
399
+ $$dbfile{kept} = [ ];
400
+ $percent = -1;
401
+ while (<INFILE>) {
402
+ my $p = int(100 * tell(INFILE) / $filesize + 0.5);
403
+ if ($percent != $p) {
404
+ printf("\b\b\b\b%3d%%", $percent = $p);
405
+ flush STDOUT;
406
+ }
407
+ my @items = split /\t/;
408
+ my ($dbnum, $code, $cc, $rgn, $sub, $pop) = @items[0,7,8,10,11,14];
409
+ next unless @items > 17 and $cc =~ /^[A-Z]{2}$/;
410
+ my ($minpop, $keep);
411
+ if ($needRgn{$cc} and defined $$dbfile{cc_minpop}{"$cc$rgn"}) {
412
+ $minpop = $$dbfile{cc_minpop}{"$cc$rgn"};
413
+ } elsif (defined $$dbfile{cc_minpop}{$cc}) {
414
+ $minpop = $$dbfile{cc_minpop}{$cc};
415
+ } else {
416
+ $minpop = $$dbfile{minpop};
417
+ }
418
+ # keep regardless of population
419
+ if ($needRgn{$cc} and $$dbfile{keep}{"$cc$rgn"}) {
420
+ $keep = $$dbfile{keep}{"$cc$rgn"}{$code};#TEST
421
+ } elsif ($$dbfile{keep}{$cc}) {
422
+ $keep = $$dbfile{keep}{$cc}{$code};
423
+ } else {
424
+ $keep = $$dbfile{keep}{'??'}{$code};
425
+ }
426
+ if ($pop < $minpop) {
427
+ next unless $keep;
428
+ } elsif ($needRgn{$cc} and $$dbfile{keepp}{"$cc$rgn"}) {
429
+ next unless $$dbfile{keepp}{"$cc$rgn"}{$code};
430
+ } elsif ($$dbfile{keepp}{$cc}) {
431
+ next unless $$dbfile{keepp}{$cc}{$code};
432
+ } else {
433
+ next unless $$dbfile{keepp}{'??'}{$code};
434
+ }
435
+ push @{$$dbfile{kept}}, $_;
436
+ $lang{$dbnum} = { alt => [ ] };
437
+ $haveCountry{$cc} = 1;
438
+ $haveRegion{"$cc$rgn"} = 1;
439
+ $haveSubRgn{"$cc$rgn.$sub"} = 1;
440
+ # add new feature codes (up to maximum index of 0x1f)
441
+ unless ($featureCodes{$code} or @featureCodes > 0x1f) {
442
+ if ($dbVer eq '1.02') {
443
+ next if $code =~ /^(PPLH|PPLQ|PPLW)$/; # (stored as "Other" in v1.02)
444
+ $dbVer = '1.03';
445
+ $upgraded = 1; # print upgrade warning
446
+ @featureCodes = @fc103;
447
+ my $i = 0;
448
+ %featureCodes = map { $_ => $i++ } @featureCodes;
449
+ next if $featureCodes{$code};
450
+ }
451
+ push @featureCodes, $code;
452
+ $featureCodes{$code} = $#featureCodes;
453
+ }
454
+ }
455
+ close INFILE;
456
+ print "\b\b\b\bDone.\n";
457
+ warn "Some feature codes not supported by version 1.02, writing as 1.03 instead.\n" if $upgraded;
458
+ }
459
+
460
+ # read country names
461
+ $i = 0;
462
+ open INFILE, '<', "$dbdir/$countryFile" or die "Error opening $dbdir/$countryFile\n";
463
+ while (<INFILE>) {
464
+ next if /^#/;
465
+ my @items = split /\t/;
466
+ next unless $haveCountry{$items[0]};
467
+ $lang{$items[16]} = { alt => [ ] }; # reference lookup by db number
468
+ $orderCC{$items[0]} = $i++; # (entry 0 is the first country)
469
+ }
470
+ close INFILE;
471
+ printf " %.6d countries (0x%.4x)\n",$i,$i if $verbose;
472
+ die "Too many countries!\n" if $i > 0x100; # (no default 0 entry)
473
+
474
+ # read region (admin1) names
475
+ $i = 0;
476
+ open REGION, '<', "$dbdir/$regionFile" or die "Error opening $dbdir/$regionFile\n";
477
+ while (<REGION>) {
478
+ chomp;
479
+ my @items = split /\t/;
480
+ $items[0] =~ tr/.//d; # (remove "." separator)
481
+ next unless $haveRegion{$items[0]};
482
+ $lang{$items[3]} = { alt => [ ] }; # reference lookup by db number
483
+ $orderRgn{$items[0]} = ++$i; # (entry 0 is default "" region)
484
+ }
485
+ close REGION;
486
+ printf " %.6d regions (0x%.4x)\n",$i,$i if $verbose;
487
+ die "Too many regions!\n" if $i > 0x0fff; # (account for default 0 entry)
488
+
489
+ # read subregion (admin2) names
490
+ $i = 0;
491
+ open ADMIN2, '<', "$dbdir/$admin2File" or die "Error opening $dbdir/$admin2File\n";
492
+ while (<ADMIN2>) {
493
+ chomp;
494
+ my @items = split /\t/;
495
+ $items[0] =~ s/\.//; # (remove first "." separator)
496
+ next unless $haveSubRgn{$items[0]};
497
+ $lang{$items[3]} = { alt => [ ] }; # reference lookup by db number
498
+ $orderSub{$items[0]} = ++$i; # (entry 0 is default "" subregion)
499
+ }
500
+ close ADMIN2;
501
+ printf " %.6d subregions (0x%.4x)\n",$i,$i if $verbose;
502
+ if ($i > ($dbVer eq '1.02' ? 0x7fff : 0xffff)) {
503
+ die "Too many subregions!\n" if $i > 0xffff;
504
+ $dbVer = '1.03';
505
+ warn "Too many subregions for version 1.02, writing as 1.03 instead.\n";
506
+ }
507
+
508
+ # read alternate names file if available
509
+ if (not $noLang and open INFILE, '<', "$dbdir/$altNamesFile") {
510
+ $filesize = GetFileSize(\*INFILE);
511
+ print "Reading $dbdir/$altNamesFile... 0%";
512
+ my %bestPri;
513
+ while (<INFILE>) {
514
+ my $p = int(100 * tell(INFILE) / $filesize + 0.5);
515
+ if ($percent != $p) {
516
+ printf("\b\b\b\b%3d%%", $percent = $p);
517
+ flush STDOUT;
518
+ }
519
+ # items: 0=altID,1=geoID,2=lang,3=alt name,4=preferred,5=short,6=colloquial,7=historic
520
+ my @items = split /\t/;
521
+ my $lkup = $lang{$items[1]} or next;
522
+ my $altList = $lang{$items[1]}{alt};
523
+ my $lng = lc $items[2];
524
+ next if $lng and not $languages{$lng};
525
+ push @$altList, $items[3] unless grep /^\Q$items[3]\E$/i, @$altList;
526
+ next unless $lng;
527
+ my $flags = 0;
528
+ # keep only the best translation for this name for each language
529
+ $items[$_] and $flags |= (1<<($_-4)) foreach 4,5,6,7;
530
+ $flags{$items[1]} = ( $flags{$items[1]} || 0 ) | $flags;
531
+ next if $items[6] or $items[7]; # ignore colloquial and historic names
532
+ my $pri = $items[5] ? 0 : ($items[4] ? 1 : 2); # priority for best type of name
533
+ my $langPri = $bestPri{$lng};
534
+ $langPri or $langPri = $bestPri{$lng} = { };
535
+ next if $$langPri{$items[1]} and $$langPri{$items[1]} > $pri;
536
+ $$langPri{$items[1]} = $pri;
537
+ # save language-specific name for this feature, removing commas
538
+ ($$lkup{$lng} = $items[3]) =~ tr/,//d;
539
+ }
540
+ print "\b\b\b\bDone.\n";
541
+ close INFILE;
542
+ } else {
543
+ print "Not writing alternate languages ($dbdir/$altNamesFile not found)\n";
544
+ $noLang = 1;
545
+ }
546
+
547
+ my (%coords, %langLookups);
548
+
549
+ foreach $dbfile (@dbfiles) {
550
+ my $database = $$dbfile{file};
551
+
552
+ print "Processing $database... 0%";
553
+ my $i = 0;
554
+ my $num = scalar @{$$dbfile{kept}};
555
+
556
+ foreach (@{$$dbfile{kept}}) {
557
+ my @items = split /\t/;
558
+ my ($lat, $lon) = @items[4,5];
559
+ $lat = int(($lat + 90) / 180 * 0x100000 + 0.5) & 0xfffff;
560
+ $lon = int(($lon + 180) / 360 * 0x100000 + 0.5) & 0xfffff;
561
+ my $coord = pack('nCn',$lat>>4,(($lat&0x0f)<<4)|($lon&0x0f),$lon>>4);;
562
+ # take the city with the highest population if there are
563
+ # multiple cities with the same reduced coordinates
564
+ if ($coords{$coord} and $coords{$coord}[6] >= $items[14]) {
565
+ next;
566
+ }
567
+ # coords=(0.lat,1.lon,2.city,3.cc,4.rgn,5.admin2,6.population,7.timezone,8.feature code,9.alt names)
568
+ my ($altList, $alt);
569
+ die "Internal error\n" unless $lang{$items[0]} and $altList = $lang{$items[0]}{alt};
570
+ if (@$altList) {
571
+ tr/,//d foreach @$altList;
572
+ $alt = join ',', sort @$altList;
573
+ } else {
574
+ $alt = '';
575
+ }
576
+ $coords{$coord} = [ @items[4,5,1,8,10,11,14,17,7] ];
577
+ $coords{$coord}[9] = $alt;
578
+ my $lkup = $lang{$items[0]}; # 0=geoID
579
+ my $key = $items[1]; # 1=city
580
+ $lkup or die "Missing language for geoID $items[0]\n";
581
+ $cityFlags{$flags{$items[0]}} = ($cityFlags{$flags{$items[0]}} || 0) + 1 if defined $flags{$items[0]};
582
+ my $ccLang = $ccLang{$items[8]}; # get country-specific language
583
+ if ($ccLang and $$lkup{$ccLang}) {
584
+ my $lc = $ccLang . '-' . lc($items[8]); # eg. zh-cn
585
+ # add country suffix for this language in this country
586
+ $$lkup{$lc} = $$lkup{$ccLang} unless $$lkup{$lc};
587
+ }
588
+ foreach (@supportedLangs) {
589
+ next unless $$lkup{$_} and $$lkup{$_} ne $key; # (ignore if same)
590
+ $langLookups{$_}{$key} or $langLookups{$_}{$key} = [ ];
591
+ push @{$langLookups{$_}{$key}}, "$items[8]$items[10].$items[11],$$lkup{$_}";
592
+ }
593
+ my $p = int(100 * ++$i / $num + 0.5);
594
+ next if $percent == $p;
595
+ printf("\b\b\b\b%3d%%", $percent = $p);
596
+ flush STDOUT;
597
+ }
598
+ print "\b\b\b\bDone.\n";
599
+ }
600
+
601
+ # write city database
602
+ my $str = $noLang ? '' : " and $outAltNames";
603
+ my @t = localtime;
604
+ my $date = sprintf('%.4d-%.2d-%.2d', $t[5]+1900, $t[4]+1, $t[3]);
605
+ print "Writing $outDir/$outFile (version $dbVer)$str...\n";
606
+ print OUTFILE "Geolocation$dbVer\t",scalar(keys %coords),"\n";
607
+ print OUTFILE "# $date Cities with population $dbfiles[0]{minpop} or greater from geonames.org with a Creative Commons license\n";
608
+
609
+ if ($noLang) {
610
+ unlink "$outDir/$outAltNames";
611
+ } else {
612
+ open ALTOUT, ">$outDir/$outAltNames";
613
+ binmode ALTOUT;
614
+ }
615
+ my (%tz, @tz, %fcodes);
616
+ my $tzNum = 0;
617
+ foreach (sort { $a cmp $b } keys %coords) {
618
+ my $items = $coords{$_};
619
+ # @$items=(0.lat,1.lon,2.city,3.cc,4.rgn,5.admin2,6.population,7.timezone,8.feature code,9.alt names)
620
+ my $iCC = $orderCC{$$items[3]};
621
+ my $iRgn = $orderRgn{"$$items[3]$$items[4]"} || 0;
622
+ my $iSub = $orderSub{"$$items[3]$$items[4].$$items[5]"} || 0;
623
+ my $tn = $tz{$$items[7]};
624
+ unless ($tn) {
625
+ push @tz, $$items[7];
626
+ $tn = $tz{$$items[7]} = $tzNum++;
627
+ }
628
+ # convert population to our binary format
629
+ my $pop = sprintf('%.1e',$$items[6]); # format: "3.1e+04"
630
+ # pack CC, population and region index into a 32-bit integer
631
+ my $code = ($iCC << 24) | (substr($pop,6,1)<<20) | (substr($pop,0,1)<<16) | (substr($pop,2,1)<<12) | $iRgn;
632
+ $fcodes{$$items[8]} = ($fcodes{$$items[8]} || 0) + 1;
633
+ my $fc = $featureCodes{$$items[8]} || 0;
634
+ # store high bit of timezone index
635
+ if ($tn > 255) {
636
+ if ($dbVer eq '1.02') {
637
+ $iSub |= 0x8000;
638
+ $tn -= 256;
639
+ } else {
640
+ $fc |= 0x80;
641
+ $tn -= 256;
642
+ }
643
+ }
644
+ my $pt = pack('NnCC', $code, $iSub, $tn, $fc);
645
+ $$items[2] =~ tr/,//d; # remove any commas
646
+ print OUTFILE "$_$pt$$items[2]\n";
647
+ next if $noLang;
648
+ $$items[9] =~ tr/,/\n/;
649
+ print ALTOUT $$items[9],"\0";
650
+ }
651
+ my $altSize = 0;
652
+ unless ($noLang) {
653
+ $altSize = tell ALTOUT;
654
+ close ALTOUT;
655
+ }
656
+ print OUTFILE "\0\0\0\0\x01\n"; # section terminator
657
+
658
+ die "Too many time zones!\n" if $tzNum > 0x01ff;
659
+
660
+ if ($verbose) {
661
+ $i = 0;
662
+ print "Features kept:\n";
663
+ foreach (sort keys %fcodes) {
664
+ my $fc = $featureCodes{$_} || 0;
665
+ printf "%6d (%2d) %s\n", $fcodes{$_}, $fc, $_;
666
+ }
667
+ }
668
+
669
+ # write country codes
670
+ open COUNTRY, '<', "$dbdir/$countryFile" or die "Error opening $dbdir/$countryFile\n";
671
+ my %cc;
672
+ while (<COUNTRY>) {
673
+ next if /^#/;
674
+ my @items = split /\t/;
675
+ next unless $haveCountry{$items[0]};
676
+ $cc{$items[4]} = $items[0];
677
+ die "country code error\n" if length $items[0] != 2;
678
+ $items[4] =~ tr/,//d; # remove any commas
679
+ print OUTFILE "$items[0]$items[4]\n";
680
+ if ($lang{$items[16]}) { # (16=geoID)
681
+ my $lkup = $lang{$items[16]};
682
+ my $key = $items[4]; # country name
683
+ $ccFlags{$flags{$items[16]}} = ($ccFlags{$flags{$items[16]}} || 0) + 1 if defined $flags{$items[16]};
684
+ foreach (@supportedLangs) {
685
+ next unless $$lkup{$_} and $$lkup{$_} ne $key; # (ignore if same)
686
+ $langLookups{$_}{$key} or $langLookups{$_}{$key} = [ ];
687
+ push @{$langLookups{$_}{$key}}, ",$$lkup{$_}";
688
+ }
689
+ }
690
+ }
691
+ close COUNTRY;
692
+
693
+ print OUTFILE "\0\0\0\0\x02\n"; # section terminator
694
+
695
+ # write regions
696
+ print OUTFILE "\n"; # (null region)
697
+ open REGION, '<', "$dbdir/$regionFile" or die "Error opening $dbdir/$regionFile\n";
698
+ my %region;
699
+ while (<REGION>) {
700
+ chomp;
701
+ my @items = split /\t/;
702
+ #items: 0=region code, 1=name, 2=ascii, 3=geoID
703
+ $items[0] =~ tr/.//d; # (remove "." separator)
704
+ next unless $haveRegion{$items[0]};
705
+ $region{$items[0]} = $items[1];
706
+ $items[1] =~ tr/,//d; # remove any commas
707
+ print OUTFILE "$items[1]\n";
708
+ if ($lang{$items[3]}) { # (3=geoID)
709
+ my $lkup = $lang{$items[3]};
710
+ my $key = $items[1]; # region name
711
+ my $cc = substr($items[0], 0, 2);
712
+ $rgnFlags{$flags{$items[3]}} = ($rgnFlags{$flags{$items[3]}} || 0) + 1 if defined $flags{$items[3]};
713
+ foreach (@supportedLangs) {
714
+ next unless $$lkup{$_} and $$lkup{$_} ne $key; # (ignore if same)
715
+ $langLookups{$_}{$key} or $langLookups{$_}{$key} = [ ];
716
+ push @{$langLookups{$_}{$key}}, "$cc,$$lkup{$_}";
717
+ }
718
+ }
719
+ }
720
+ close REGION;
721
+
722
+ print OUTFILE "\0\0\0\0\x03\n"; # section terminator
723
+
724
+ # write subregions
725
+ print OUTFILE "\n"; # (null admin2)
726
+ open ADMIN2, '<', "$dbdir/$admin2File" or die "Error opening $dbdir/$admin2File\n";
727
+ my %subregion;
728
+ while (<ADMIN2>) {
729
+ chomp;
730
+ my @items = split /\t/;
731
+ #items: 0=admin2 code, 1=name, 2=ascii, 3=geoID
732
+ $items[0] =~ s/\.//; # (remove first "." separator)
733
+ next unless $haveSubRgn{$items[0]};
734
+ $subregion{$items[0]} = $items[1];
735
+ $items[1] =~ tr/,//d; # remove any commas
736
+ print OUTFILE "$items[1]\n";
737
+ if ($lang{$items[3]}) { # (3=geoID)
738
+ my $lkup = $lang{$items[3]};
739
+ my $key = $items[1]; # region name
740
+ $subFlags{$flags{$items[3]}} = ($subFlags{$flags{$items[3]}} || 0) + 1 if defined $flags{$items[3]};
741
+ my $rc = $items[0];
742
+ $rc =~ s/\..*//; # (remove subregion code)
743
+ foreach (@supportedLangs) {
744
+ next unless $$lkup{$_} and $$lkup{$_} ne $key; # (ignore if same)
745
+ $langLookups{$_}{$key} or $langLookups{$_}{$key} = [ ];
746
+ push @{$langLookups{$_}{$key}}, "$rc,$$lkup{$_}";
747
+ }
748
+ }
749
+ }
750
+ close ADMIN2;
751
+
752
+ print OUTFILE "\0\0\0\0\x04\n"; # section terminator
753
+
754
+ # write timezones
755
+ print OUTFILE $_,"\n" foreach @tz;
756
+
757
+ print OUTFILE "\0\0\0\0\x05\n"; # section terminator
758
+
759
+ # write feature codes
760
+ print OUTFILE $_,"\n" foreach @featureCodes;
761
+
762
+ # write terminator and close Geolocation.dat
763
+ print OUTFILE "\0\0\0\0\0\n"; # file terminator
764
+ my $outSize = tell OUTFILE;
765
+ close OUTFILE;
766
+
767
+ # write language lookups
768
+ my $langSize = 0;
769
+ my $langDir = "$outDir/$geoLang";
770
+ # delete existing languages
771
+ unlink <"$langDir/*.pm">;
772
+ unless ($noLang) {
773
+ my $n = scalar(keys %langLookups);
774
+ print "Writing $n language files to $outDir/$geoLang...\n";
775
+ mkdir $langDir, 0777;
776
+ my ($lng, $key, $str, $nm, $alt);
777
+ foreach $lng (sort keys %langLookups) { # ($lng = language code)
778
+ my $myLng = $lng;
779
+ $myLng =~ tr/-/_/;
780
+ my $lkup = $langLookups{$lng};
781
+ my $file = "$myLng.pm";
782
+ open OUT, ">$langDir/$file" or die "Error creating $file\n";
783
+ print OUT "# Geolocation language translations for $myLng\n";
784
+ print OUT "#\n# Based on Creative Commons database from geonames.org\n\n";
785
+ print OUT "%Image::ExifTool::GeoLang::${myLng}::Translate = (\n";
786
+ foreach $key (sort keys %$lkup) {
787
+ ($nm = $key) =~ s/'/\\'/g;
788
+ # count entries and use the most common one, then add others with country+region ID's
789
+ # (entries in @$li are of the form: City:"CCRc,Sc,Alt", Sub:"CCRc,Alt", Rgn:"CC,Alt", Country:",Alt")
790
+ # (Rc = region code, Sc = subregion code)
791
+ my $li = $$lkup{$key};
792
+ my %count;
793
+ # sort by popularity of alternate name
794
+ foreach (@$li) {
795
+ my $val = $_;
796
+ $val =~ s/.*?,//;
797
+ $count{$val} = ($count{$val} || 0) + 1;
798
+ }
799
+ my @order = sort { $count{$b} <=> $count{$a} or length($a) <=> length($b) or $a cmp $b } keys %count;
800
+ my $first = 1;
801
+ foreach $alt (@order) {
802
+ foreach (sort @$li) {
803
+ my ($code,$val) = split ',', $_, 2;
804
+ # ($code will be empty for a country name, and 2 characters for a region name,
805
+ # and contain a "." for a city name)
806
+ next unless $val eq $alt; # don't add if alternate name is the same
807
+ die "Backslash in translated name" if $val =~ /\\/;
808
+ $val =~ s/'/\\'/g; # escape single quotes
809
+ if ($first and $val !~ /\(/) { # (don't add general translation if name is qualified with brackets)
810
+ print OUT "\t'$nm' => '$val',\n";
811
+ undef $first;
812
+ last;
813
+ }
814
+ # format for keys in language table
815
+ # City: "CCRgn,Subregion,City", "CCRgn,,City", "CC,City", ",City"
816
+ # Subregion: "CCRgn,Subregion,", "CCRgn,,"
817
+ # Region: "CCRgn,"
818
+ # Country: "CC,"
819
+ # Any: "Name"
820
+ if (not $code) {
821
+ # this is a country
822
+ $code = $cc{$key};
823
+ printf OUT "\t'$code,' => '$val',\n";
824
+ } elsif ($code !~ /\./) {
825
+ # this is a region or subregion
826
+ print OUT "\t'$code$nm,' => '$val',\n";
827
+ } else {
828
+ # this is a city
829
+ # use region/subregions name instead of code
830
+ my $sub = $subregion{$code} || '';
831
+ $sub =~ s/'/\\'/g;
832
+ $code =~ s/\..*//;
833
+ $code = substr($code,0,2) . $region{$code} if $region{$code};
834
+ $code =~ s/'/\\'/g;
835
+ print OUT "\t'$code,$sub,$nm' => '$val',\n";
836
+ }
837
+ }
838
+ }
839
+ }
840
+ print OUT ");\n\n1; #end\n";
841
+ $langSize += tell OUT;
842
+ close OUT;
843
+ }
844
+ if ($verbose) {
845
+ my @type = ( City => \%cityFlags, Region => \%rgnFlags, Subregion => \%subFlags, Country => \%ccFlags );
846
+ for (;;) {
847
+ my $type = shift @type or last;
848
+ my $flags = shift @type;
849
+ print "$type flags:\n";
850
+ printf(" 0x%.2x - %d\n", 0, $$flags{0} || 0);
851
+ my @label = qw(preferred short colloquial historic);
852
+ foreach my $bit (0..5) {
853
+ my $n = 0;
854
+ $_ & (1<<$bit) and ++$n foreach keys %$flags;
855
+ printf(" 0x%.2x - %d (%s)\n", (1<<$bit), $n, shift(@label)) if $n;
856
+ }
857
+ }
858
+ }
859
+ }
860
+
861
+ print "Output file size(s):\n";
862
+ printf "%8.2f MB %s (%d entries)\n", $outSize / 1e6, $outFile, scalar(keys %coords);
863
+ printf "%8.2f MB %s\n", $altSize / 1e6, $outAltNames if $altSize;
864
+ printf "%8.2f MB %s/*.pm\n", $langSize / 1e6, $geoLang if $langSize;
865
+ printf "%8.2f MB Total\n", ($outSize + $altSize + $langSize) / 1e6 if $altSize or $langSize;
866
+
867
+ # end