exiftool_vendored 12.81.0 → 12.83.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/bin/Changes +39 -1
  3. data/bin/MANIFEST +4 -18
  4. data/bin/META.json +1 -1
  5. data/bin/META.yml +1 -1
  6. data/bin/README +3 -2
  7. data/bin/build_geolocation +867 -0
  8. data/bin/exiftool +37 -9
  9. data/bin/fmt_files/gpx.fmt +2 -1
  10. data/bin/fmt_files/gpx_wpt.fmt +2 -1
  11. data/bin/lib/Image/ExifTool/BuildTagLookup.pm +47 -31
  12. data/bin/lib/Image/ExifTool/CanonVRD.pm +6 -5
  13. data/bin/lib/Image/ExifTool/DJI.pm +29 -0
  14. data/bin/lib/Image/ExifTool/Exif.pm +19 -2
  15. data/bin/lib/Image/ExifTool/FujiFilm.pm +9 -2
  16. data/bin/lib/Image/ExifTool/GM.pm +552 -0
  17. data/bin/lib/Image/ExifTool/Geolocation.dat +0 -0
  18. data/bin/lib/Image/ExifTool/Geolocation.pm +90 -54
  19. data/bin/lib/Image/ExifTool/Nikon.pm +9 -7
  20. data/bin/lib/Image/ExifTool/QuickTime.pm +31 -23
  21. data/bin/lib/Image/ExifTool/QuickTimeStream.pl +66 -30
  22. data/bin/lib/Image/ExifTool/README +2 -0
  23. data/bin/lib/Image/ExifTool/Sony.pm +15 -6
  24. data/bin/lib/Image/ExifTool/TagLookup.pm +37 -6
  25. data/bin/lib/Image/ExifTool/TagNames.pod +407 -239
  26. data/bin/lib/Image/ExifTool/WriteQuickTime.pl +5 -2
  27. data/bin/lib/Image/ExifTool/Writer.pl +165 -133
  28. data/bin/lib/Image/ExifTool/XMP.pm +3 -2
  29. data/bin/lib/Image/ExifTool/XMP2.pl +3 -0
  30. data/bin/lib/Image/ExifTool.pm +38 -9
  31. data/bin/lib/Image/ExifTool.pod +31 -18
  32. data/bin/perl-Image-ExifTool.spec +1 -1
  33. data/lib/exiftool_vendored/version.rb +1 -1
  34. metadata +4 -20
  35. data/bin/lib/Image/ExifTool/GeoLang/cs.pm +0 -978
  36. data/bin/lib/Image/ExifTool/GeoLang/de.pm +0 -1975
  37. data/bin/lib/Image/ExifTool/GeoLang/en_ca.pm +0 -44
  38. data/bin/lib/Image/ExifTool/GeoLang/en_gb.pm +0 -124
  39. data/bin/lib/Image/ExifTool/GeoLang/es.pm +0 -2921
  40. data/bin/lib/Image/ExifTool/GeoLang/fi.pm +0 -1116
  41. data/bin/lib/Image/ExifTool/GeoLang/fr.pm +0 -3171
  42. data/bin/lib/Image/ExifTool/GeoLang/it.pm +0 -2750
  43. data/bin/lib/Image/ExifTool/GeoLang/ja.pm +0 -10256
  44. data/bin/lib/Image/ExifTool/GeoLang/ko.pm +0 -4499
  45. data/bin/lib/Image/ExifTool/GeoLang/nl.pm +0 -1270
  46. data/bin/lib/Image/ExifTool/GeoLang/pl.pm +0 -3019
  47. data/bin/lib/Image/ExifTool/GeoLang/ru.pm +0 -18220
  48. data/bin/lib/Image/ExifTool/GeoLang/sk.pm +0 -441
  49. data/bin/lib/Image/ExifTool/GeoLang/sv.pm +0 -714
  50. data/bin/lib/Image/ExifTool/GeoLang/tr.pm +0 -452
  51. data/bin/lib/Image/ExifTool/GeoLang/zh_cn.pm +0 -2225
  52. data/bin/lib/Image/ExifTool/GeoLang/zh_tw.pm +0 -72
@@ -0,0 +1,867 @@
1
+ #!/usr/bin/perl -w
2
+ #-------------------------------------------------------------------------------
3
+ # File: build_geolocation
4
+ #
5
+ # Description: Parse geonames files to create ExifTool geolocation database
6
+ #
7
+ # Syntax: build_geolocation [OPTIONS] [DBFILE] ...
8
+ #
9
+ # Options: (see -h output)
10
+ #
11
+ # Created: 2024-03-03 - P. Harvey
12
+ # 2024-04-15 - PH Clean up and add options for public release
13
+ #
14
+ # Notes: Requires these files from https://download.geonames.org/export/
15
+ #
16
+ # allCountries.txt (or other input database if specified)
17
+ # countryInfo.txt
18
+ # admin1CodesASCII.txt
19
+ # admin2Codes.txt
20
+ # alternateNamesV2.txt (optional)
21
+ #
22
+ # Output datbase format (Geolocation.dat):
23
+ #
24
+ # Header:
25
+ # "GeolocationV.VV\tNNNN\n" (V.VV=version, NNNN=num city entries)
26
+ # "# <comment>\n"
27
+ # NNNN City entries:
28
+ # Offset Format Description
29
+ # 0 int16u - latitude high 16 bits (converted to 0-0x100000 range)
30
+ # 2 int8u - latitude low 4 bits, longitude low 4 bits
31
+ # 3 int16u - longitude high 16 bits
32
+ # 5 int8u - index of country in country list
33
+ # 6 int8u - 0xf0 = population E exponent (in format "N.Fe+0E"), 0x0f = population N digit
34
+ # 7 int16u - 0xf000 = population F digit, 0x0fff = index in region list (admin1)
35
+ # 9 int16u - v1.02: 0x7fff = index in subregion (admin2), 0x8000 = high bit of time zone
36
+ # 9 int16u - v1.03: index in subregion (admin2)
37
+ # 11 int8u - low byte of time zone index
38
+ # 12 int8u - 0x1f = feature code index (see below), v1.03: 0x80 = high bit of time zone
39
+ # 13 string - UTF8 City name, terminated by newline
40
+ # "\0\0\0\0\x01"
41
+ # Country entries:
42
+ # 1. 2-character country code
43
+ # 2. Country name, terminated by newline
44
+ # "\0\0\0\0\x02"
45
+ # Region entries:
46
+ # 1. Region name, terminated by newline
47
+ # "\0\0\0\0\x03"
48
+ # Subregion entries:
49
+ # 1. Subregion name, terminated by newline
50
+ # "\0\0\0\0\x04"
51
+ # Time zone entries:
52
+ # 1. Time zone name, terminated by newline
53
+ # "\0\0\0\0\x05" (feature codes added in v1.03)
54
+ # Feature codes:
55
+ # 1. Feature code, terminated by newline
56
+ # "\0\0\0\0\0"
57
+ #
58
+ # Feature Codes v1.02: (see http://www.geonames.org/export/codes.html#P for descriptions)
59
+ #
60
+ # 0. Other 3. PPLA2 6. PPLA5 9. PPLF 12. PPLR 15. PPLX
61
+ # 1. PPL 4. PPLA3 7. PPLC 10. PPLG 13. PPLS
62
+ # 2. PPLA 5. PPLA4 8. PPLCH 11. PPLL 14. STLMT
63
+ #
64
+ # Feature Codes v1.03 and later are listed at the end of the database
65
+ #-------------------------------------------------------------------------------
66
+
67
+ use strict;
68
+
69
+ my $dbVer = '1.02'; # database version
70
+
71
+ my $dbFile = 'allCountries.txt'; # default database file
72
+ my $countryFile = 'countryInfo.txt'; # mandatory country names file
73
+ my $regionFile = 'admin1CodesASCII.txt'; # mandatory region names file
74
+ my $admin2File = 'admin2Codes.txt'; # mandatory subregion names file
75
+ my $altNamesFile = 'alternateNamesV2.txt'; # optional alternate names file
76
+ my $outFile = 'Geolocation.dat'; # output ExifTool database file
77
+ my $outAltNames = 'AltNames.dat'; # output alternate names file
78
+ my $outDirName = 'Geolocation_out'; # output directory for database files
79
+ my $geoLang = 'GeoLang'; # output directory for language files
80
+
81
+ my %defaults = (
82
+ file => $dbFile,
83
+ minpop => 2000,
84
+ def_codes => 'PPLA,PPLA2',
85
+ def_codesp => 'PPL,PPLA,PPLA2,PPLA3,PPLA4,PPLA5,PPLC,PPLCH,PPLF,PPLG,PPLH,PPLL,PPLQ,PPLR,PPLS,PPLW,PPLX,STLMT',
86
+ );
87
+
88
+ # languages to read from geonames database
89
+ my @languages = qw(cs de en en-ca en-gb es fi fr it ja ko nl pl ru sk sv tr zh zh-cn zh-tw);
90
+
91
+ # indices of feature codes (v1.02 is hard-coded in ExifTool)
92
+ my @fc102 = qw(
93
+ Other PPL PPLA PPLA2 PPLA3 PPLA4 PPLA5 PPLC PPLCH
94
+ PPLF PPLG PPLL PPLR PPLS STLMT PPLX
95
+ );
96
+ # base features for v1.03+
97
+ my @fc103 = qw(
98
+ Other PPL PPLA PPLA2 PPLA3 PPLA4 PPLA5 PPLC PPLCH
99
+ PPLF PPLG PPLH PPLL PPLQ PPLR PPLS PPLW PPLX STLMT
100
+ );
101
+ my $i = 0;
102
+ my @featureCodes = $dbVer eq '1.02' ? @fc102 : @fc103;
103
+ my %featureCodes = map { $_ => $i++ } @featureCodes;
104
+
105
+ my ($dbfile, @dbfiles, $outDir, $verbose, $noLang, %needRgn);
106
+ my %optArgs = ( p => 1, c => 1, cp => 1, l => 1, o => 1, ver => 1 );
107
+
108
+ # process command-line arguments
109
+ my $opts = { };
110
+ while (@ARGV) {
111
+ my $opt = shift;
112
+ if (not $opt =~ s/^-//) {
113
+ $opt = "$opt/$defaults{file}" if -d $opt;
114
+ -e $opt or die "Error opening database $opt\n";
115
+ push @dbfiles, { %defaults, %$opts, file => $opt };
116
+ $opts = { };
117
+ next;
118
+ }
119
+ my $arg;
120
+ if ($optArgs{$opt}) {
121
+ $arg = shift;
122
+ defined $arg or die "Expecting argument for -$opt option\n";
123
+ }
124
+ if ($opt eq 'p') {
125
+ $arg = uc $arg;
126
+ if ($arg =~ /=/) {
127
+ my ($cc, $mp) = split /=/, $arg;
128
+ $mp =~ /^\d+$/ or die "Expecting number on rhs of '=' for -p option\n";
129
+ my @cc = split /,/, $cc;
130
+ foreach $cc (@cc) {
131
+ $cc =~ /^([A-Z]{2})(\..+)?$/ or die "Invalid country/region '$cc' for -p option\n";
132
+ $needRgn{$1} = $needRgn{$cc} = 1 if length $cc > 2;
133
+ $$opts{cc_minpop}{$cc} = $mp;
134
+ }
135
+ } else {
136
+ $arg =~ /^\d+$/ or die "Expecting number for -p option\n";
137
+ $$opts{minpop} = $arg;
138
+ }
139
+ } elsif ($opt =~ /^c(p?)$/) {
140
+ my $p = $1;
141
+ $arg = uc $arg;
142
+ my ($cc, $co);
143
+ if ($arg =~ /=/) {
144
+ ($cc, $co) = split /=/, $arg;
145
+ } else {
146
+ ($cc, $co) = ('??', $arg);
147
+ }
148
+ my $sign = $co =~ s/^([-+])// ? $1 : '';
149
+ my @co = split /,/, $co;
150
+ my @cc = split /,/, $cc;
151
+ # store lookup for features to keep for each country ('??' = any country)
152
+ foreach $cc (@cc) {
153
+ $cc =~ /^([A-Z]{2}|\?\?)(\..+)?$/ or die "Invalid country/region '$cc' for -$opt option\n";
154
+ $needRgn{$1} = $needRgn{$cc} = 1 if length $cc > 2;
155
+ if (not $sign) {
156
+ $$opts{"keep$p"}{$cc} = { };
157
+ } elsif (not $$opts{"keep$p"}{$cc}) {
158
+ # start from defaults
159
+ my %codes = map { $_ => 1 } split /,/, $defaults{"def_codes$p"};
160
+ $$opts{"keep$p"}{$cc} = \%codes;
161
+ }
162
+ foreach $co (@co) {
163
+ if ($sign eq '-') {
164
+ delete $$opts{"keep$p"}{$cc}{$co};
165
+ } else {
166
+ $$opts{"keep$p"}{$cc}{$co} = 1;
167
+ }
168
+ }
169
+ }
170
+ } elsif ($opt eq 'l') {
171
+ $arg = lc $arg;
172
+ my @langs = split ',', $arg;
173
+ if (not @langs) {
174
+ undef @languages;
175
+ $noLang = 1;
176
+ } elsif ($langs[0] =~ s/^-//) {
177
+ @languages = grep !/^$langs[0]$/, @languages foreach @langs;
178
+ } else {
179
+ @languages = @langs;
180
+ }
181
+ } elsif ($opt eq 'o') {
182
+ $outDir = $arg;
183
+ } elsif ($opt eq 'ver') {
184
+ $dbVer = $arg;
185
+ $dbVer =~ /^1\.0[23]$/ or die "Unsupported version number $dbVer\n";
186
+ } elsif ($opt eq 'v') {
187
+ $verbose = 1;
188
+ } elsif ($opt eq 'h') {
189
+ my $defcp = $defaults{def_codesp};
190
+ $defcp =~ s/(PPLG,)/\n $1/;
191
+ my $defLang = join ',', @languages;
192
+ $defLang =~ s/(ja,)/\n $1/;
193
+ print <<"END";
194
+ Description: Build ExifTool Geolocation database.
195
+
196
+ Syntax: build_geolocation [OPTIONS] [DBFILE] ...
197
+
198
+ Options:
199
+ DBFILE - Input database file name or directory. Multiple input database
200
+ files may be specified. The -p, -c and -cp options apply to
201
+ the database that comes after them on the command line.
202
+ Default is "$dbFile".
203
+ -p POP - Minimum population for cities to include. POP may be a number
204
+ or be of the form "CC[,C2...]=###" to set different limits for
205
+ specific countries/regions, where CC and C2 are country codes
206
+ with optional region name or code appended after a period (eg.
207
+ "CA.Ontario,US=500" sets the minimum population to 500 for
208
+ cities on Ontario Canada or the U.S.). If a region is
209
+ specified, either the full name or the geonames admin1 code may
210
+ be used, and case and spaces are not significant. May be
211
+ multiple -p options for each input DBFILE. Default is "$defaults{minpop}".
212
+ -c CODE - Feature codes to always include, regardless of population. CODE
213
+ is a comma-separated list of feature codes, with an optional
214
+ leading comma-separated list of country/region codes followed
215
+ by an equals sign to apply only to specific countries. The
216
+ feature-code list may begin with a dash to remove entries from
217
+ the default list, or a plus sign to add entries. May be
218
+ multiple -c options for each intput DBFILE. Country/region and
219
+ feature names are case insensitive. Default is "$defaults{def_codes}".
220
+ -cp CODE - Additional features to include if above minimum population.
221
+ Default is "$defcp".
222
+ -l LANG - Alternate languages to read from $altNamesFile if
223
+ available. These are used to generate $outAltNames an the
224
+ $geoLang files. LANG is a comma-separated list of language
225
+ codes, starting with a dash to remove items from the default
226
+ list. May be set to an empty string to disable generation
227
+ of alternate language files even if $altNamesFile
228
+ exists. The same set of languages applies to all input
229
+ database files. Default is "$defLang".
230
+ -o OUTDIR - Output directory name. Default is the same directory as the
231
+ first input database file. A directory named $outDirName
232
+ containing the output files will be created in this directory.
233
+ -ver VER - Version for output geolocation database (default is $dbVer).
234
+ -v - Verbose messages.
235
+ -h - Show this help.
236
+
237
+ Input files (download from https://download.geonames.org/export/dump/):
238
+ allCountries.txt - default database file (smaller files with names
239
+ like "cities###.txt" may be specified instead)
240
+ countryInfo.txt - mandatory country names file
241
+ admin1CodesASCII.txt - mandatory region names file
242
+ admin2Codes.txt - mandatory subregion names file
243
+ alternateNamesV2.txt - optional alternate names file (must exist to
244
+ to generate $outAltNames and $geoLang files)
245
+
246
+ Output files:
247
+ $outDirName - default output directory name
248
+ $outFile - ExifTool database file
249
+ $outAltNames - alternate names file
250
+ $geoLang - directory for alternate language files
251
+
252
+ Author:
253
+ Copyright 2024, Phil Harvey
254
+
255
+ This is free software; you can redistribute it and/or modify it under the
256
+ same terms as Perl itself.
257
+ END
258
+ exit 0;
259
+ } else {
260
+ die "Unknown option '-$opt'\n";
261
+ }
262
+ }
263
+
264
+ if (@dbfiles) {
265
+ # apply any remaining options to last database file
266
+ $dbfiles[-1]{$_} = $$opts{$_} foreach keys %$opts;
267
+ } else {
268
+ # use default database file if none specified
269
+ push @dbfiles, { %defaults, %$opts };
270
+ unless (-e $dbfiles[0]{file}) {
271
+ # also look in script directory
272
+ if ($0 =~ m{(.*)/} and -e "$1/$dbfiles[0]{file}") {
273
+ $dbfiles[0]{file} = "$1/$dbfiles[0]{file}";
274
+ } else {
275
+ die qq(Database "$dbfiles[0]{file}" not found. Use -h option for help.\n);
276
+ }
277
+ }
278
+ }
279
+
280
+ # determine our working directory
281
+ my $dbdir = $dbfiles[0]{file};
282
+ $dbdir = '.' unless $dbdir =~ s(/[^/]*$)();
283
+
284
+ # add default feature code lookups if necessary
285
+ foreach $dbfile (@dbfiles) {
286
+ my $p;
287
+ foreach $p ('', 'p') {
288
+ next if $$dbfile{"keep$p"}{'??'};
289
+ my %codes = map { $_ => 1 } split /,/, $defaults{"def_codes$p"};
290
+ $$dbfile{"keep$p"}{'??'} = \%codes;
291
+ }
292
+ }
293
+
294
+ # pre-read region file if necessary
295
+ if (%needRgn) {
296
+ open REGION, '<', "$dbdir/$regionFile" or die "Error opening $dbdir/$regionFile\n";
297
+ while (<REGION>) {
298
+ my @items = split /\t/;
299
+ my $rgn = $items[0];
300
+ my ($cc) = split /\./, $rgn;
301
+ next unless $needRgn{$cc};
302
+ unless ($needRgn{$rgn}) { # allow region code to be used
303
+ $rgn = $cc . '.' . uc$items[1]; # also support full region name
304
+ unless ($needRgn{$rgn}) {
305
+ $rgn =~ tr/ //d;
306
+ next unless $needRgn{$rgn}; # also allow no spaces
307
+ }
308
+ }
309
+ $needRgn{$rgn} = [$items[0], "$cc.$items[1]"];
310
+ }
311
+ close REGION;
312
+ foreach (sort keys %needRgn) {
313
+ next if length == 2;
314
+ die "No matching region for $_\n" unless ref $needRgn{$_};
315
+ }
316
+ }
317
+
318
+ if ($verbose) {
319
+ my $langs = join ',', @languages;
320
+ $langs or $langs = '<none>';
321
+ print "Languages to read from input database(s):\n $langs\n";
322
+ foreach $dbfile (@dbfiles) {
323
+ print "Parameters for reading $$dbfile{file}:\n";
324
+ print " Minimum populations (??=any country):\n";
325
+ print " ??=$$dbfile{minpop}\n";
326
+ foreach (reverse sort keys %{$$dbfile{cc_minpop}}) {
327
+ my $cc = ref $needRgn{$_} ? $needRgn{$_}[1] : $_;
328
+ print " $cc=$$dbfile{cc_minpop}{$_}\n";
329
+ }
330
+ print " Features to keep regardless of population:\n";
331
+ foreach (reverse sort keys %{$$dbfile{keep}}) {
332
+ my $cc = ref $needRgn{$_} ? $needRgn{$_}[1] : $_;
333
+ print " $cc=",join(',', sort keys %{$$dbfile{keep}{$_}}), "\n";
334
+ }
335
+ print " Features to keep for population >= minimum:\n";
336
+ foreach (reverse sort keys %{$$dbfile{keepp}}) {
337
+ my $cc = ref $needRgn{$_} ? $needRgn{$_}[1] : $_;
338
+ print " $_=",join(',', sort keys %{$$dbfile{keepp}{$_}}), "\n";
339
+ }
340
+ }
341
+ }
342
+
343
+ # translate option region arguments to region codes
344
+ foreach $dbfile (@dbfiles) {
345
+ my ($type, $cc);
346
+ foreach $type (qw(cc_minpop keep keepp)) {
347
+ my @cc = keys %{$$dbfile{$type}};
348
+ foreach $cc (@cc) {
349
+ next unless ref $needRgn{$cc};
350
+ my $tmp = $$dbfile{$type}{$cc};
351
+ delete $$dbfile{$type}{$cc};
352
+ $$dbfile{$type}{$needRgn{$cc}[0]} = $tmp;
353
+ }
354
+ }
355
+ }
356
+
357
+ $outDir = "$dbdir/$outDirName" unless defined $outDir;
358
+ -d $outDir or mkdir $outDir, 0777 or die "Error creating output directory '$outDir'\n";
359
+ -e "$dbdir/$_" or die "Missing input file $dbdir/$_\n" foreach $countryFile, $regionFile, $admin2File;
360
+
361
+ # order of country codes, region names and subregions in database
362
+ my (%orderCC, %orderRgn, %orderSub);
363
+
364
+ # languages to read from geonames database (converted to lower case)
365
+ my %languages = map { $_ => 1 } @languages;
366
+
367
+ # language codes supported by ExifTool
368
+ my @supportedLangs = qw(cs de en-ca en-gb es fi fr it ja ko nl pl ru sk sv tr zh-cn zh-tw);
369
+
370
+ # supported country-specific languages
371
+ my %ccLang = ( TW => 'zh', CN => 'zh', CA => 'en', GB => 'en' );
372
+ my (%lang, %haveCountry, %cityFlags, %rgnFlags, %subFlags, %ccFlags, %flags);
373
+ my (%haveRegion, %haveSubRgn, $filesize, $percent);
374
+
375
+ sub GetFileSize($)
376
+ {
377
+ my $file = shift;
378
+ seek $file, 0, 2 or die "Seek error\n";
379
+ my $size = tell $file;
380
+ seek $file, 0, 0 or die "Seek error\n";
381
+ return $size;
382
+ }
383
+
384
+ # pre-scan database to determine which countries, regions and subregions we will be using
385
+ foreach $dbfile (@dbfiles) {
386
+ my $database = $$dbfile{file};
387
+ my $upgraded;
388
+
389
+ print "Reading $database... 0%";
390
+ flush STDOUT;
391
+
392
+ # pre-read the files to initialize necessary variables
393
+ open INFILE, '<', $database or die "Error opening $database\n";
394
+ $filesize = GetFileSize(\*INFILE);
395
+
396
+ open OUTFILE, '>', "$outDir/$outFile" or die "Error creating $outFile in $outDir\n";
397
+ binmode(OUTFILE);
398
+
399
+ $$dbfile{kept} = [ ];
400
+ $percent = -1;
401
+ while (<INFILE>) {
402
+ my $p = int(100 * tell(INFILE) / $filesize + 0.5);
403
+ if ($percent != $p) {
404
+ printf("\b\b\b\b%3d%%", $percent = $p);
405
+ flush STDOUT;
406
+ }
407
+ my @items = split /\t/;
408
+ my ($dbnum, $code, $cc, $rgn, $sub, $pop) = @items[0,7,8,10,11,14];
409
+ next unless @items > 17 and $cc =~ /^[A-Z]{2}$/;
410
+ my ($minpop, $keep);
411
+ if ($needRgn{$cc} and defined $$dbfile{cc_minpop}{"$cc$rgn"}) {
412
+ $minpop = $$dbfile{cc_minpop}{"$cc$rgn"};
413
+ } elsif (defined $$dbfile{cc_minpop}{$cc}) {
414
+ $minpop = $$dbfile{cc_minpop}{$cc};
415
+ } else {
416
+ $minpop = $$dbfile{minpop};
417
+ }
418
+ # keep regardless of population
419
+ if ($needRgn{$cc} and $$dbfile{keep}{"$cc$rgn"}) {
420
+ $keep = $$dbfile{keep}{"$cc$rgn"}{$code};#TEST
421
+ } elsif ($$dbfile{keep}{$cc}) {
422
+ $keep = $$dbfile{keep}{$cc}{$code};
423
+ } else {
424
+ $keep = $$dbfile{keep}{'??'}{$code};
425
+ }
426
+ if ($pop < $minpop) {
427
+ next unless $keep;
428
+ } elsif ($needRgn{$cc} and $$dbfile{keepp}{"$cc$rgn"}) {
429
+ next unless $$dbfile{keepp}{"$cc$rgn"}{$code};
430
+ } elsif ($$dbfile{keepp}{$cc}) {
431
+ next unless $$dbfile{keepp}{$cc}{$code};
432
+ } else {
433
+ next unless $$dbfile{keepp}{'??'}{$code};
434
+ }
435
+ push @{$$dbfile{kept}}, $_;
436
+ $lang{$dbnum} = { alt => [ ] };
437
+ $haveCountry{$cc} = 1;
438
+ $haveRegion{"$cc$rgn"} = 1;
439
+ $haveSubRgn{"$cc$rgn.$sub"} = 1;
440
+ # add new feature codes (up to maximum index of 0x1f)
441
+ unless ($featureCodes{$code} or @featureCodes > 0x1f) {
442
+ if ($dbVer eq '1.02') {
443
+ next if $code =~ /^(PPLH|PPLQ|PPLW)$/; # (stored as "Other" in v1.02)
444
+ $dbVer = '1.03';
445
+ $upgraded = 1; # print upgrade warning
446
+ @featureCodes = @fc103;
447
+ my $i = 0;
448
+ %featureCodes = map { $_ => $i++ } @featureCodes;
449
+ next if $featureCodes{$code};
450
+ }
451
+ push @featureCodes, $code;
452
+ $featureCodes{$code} = $#featureCodes;
453
+ }
454
+ }
455
+ close INFILE;
456
+ print "\b\b\b\bDone.\n";
457
+ warn "Some feature codes not supported by version 1.02, writing as 1.03 instead.\n" if $upgraded;
458
+ }
459
+
460
+ # read country names
461
+ $i = 0;
462
+ open INFILE, '<', "$dbdir/$countryFile" or die "Error opening $dbdir/$countryFile\n";
463
+ while (<INFILE>) {
464
+ next if /^#/;
465
+ my @items = split /\t/;
466
+ next unless $haveCountry{$items[0]};
467
+ $lang{$items[16]} = { alt => [ ] }; # reference lookup by db number
468
+ $orderCC{$items[0]} = $i++; # (entry 0 is the first country)
469
+ }
470
+ close INFILE;
471
+ printf " %.6d countries (0x%.4x)\n",$i,$i if $verbose;
472
+ die "Too many countries!\n" if $i > 0x100; # (no default 0 entry)
473
+
474
+ # read region (admin1) names
475
+ $i = 0;
476
+ open REGION, '<', "$dbdir/$regionFile" or die "Error opening $dbdir/$regionFile\n";
477
+ while (<REGION>) {
478
+ chomp;
479
+ my @items = split /\t/;
480
+ $items[0] =~ tr/.//d; # (remove "." separator)
481
+ next unless $haveRegion{$items[0]};
482
+ $lang{$items[3]} = { alt => [ ] }; # reference lookup by db number
483
+ $orderRgn{$items[0]} = ++$i; # (entry 0 is default "" region)
484
+ }
485
+ close REGION;
486
+ printf " %.6d regions (0x%.4x)\n",$i,$i if $verbose;
487
+ die "Too many regions!\n" if $i > 0x0fff; # (account for default 0 entry)
488
+
489
+ # read subregion (admin2) names
490
+ $i = 0;
491
+ open ADMIN2, '<', "$dbdir/$admin2File" or die "Error opening $dbdir/$admin2File\n";
492
+ while (<ADMIN2>) {
493
+ chomp;
494
+ my @items = split /\t/;
495
+ $items[0] =~ s/\.//; # (remove first "." separator)
496
+ next unless $haveSubRgn{$items[0]};
497
+ $lang{$items[3]} = { alt => [ ] }; # reference lookup by db number
498
+ $orderSub{$items[0]} = ++$i; # (entry 0 is default "" subregion)
499
+ }
500
+ close ADMIN2;
501
+ printf " %.6d subregions (0x%.4x)\n",$i,$i if $verbose;
502
+ if ($i > ($dbVer eq '1.02' ? 0x7fff : 0xffff)) {
503
+ die "Too many subregions!\n" if $i > 0xffff;
504
+ $dbVer = '1.03';
505
+ warn "Too many subregions for version 1.02, writing as 1.03 instead.\n";
506
+ }
507
+
508
+ # read alternate names file if available
509
+ if (not $noLang and open INFILE, '<', "$dbdir/$altNamesFile") {
510
+ $filesize = GetFileSize(\*INFILE);
511
+ print "Reading $dbdir/$altNamesFile... 0%";
512
+ my %bestPri;
513
+ while (<INFILE>) {
514
+ my $p = int(100 * tell(INFILE) / $filesize + 0.5);
515
+ if ($percent != $p) {
516
+ printf("\b\b\b\b%3d%%", $percent = $p);
517
+ flush STDOUT;
518
+ }
519
+ # items: 0=altID,1=geoID,2=lang,3=alt name,4=preferred,5=short,6=colloquial,7=historic
520
+ my @items = split /\t/;
521
+ my $lkup = $lang{$items[1]} or next;
522
+ my $altList = $lang{$items[1]}{alt};
523
+ my $lng = lc $items[2];
524
+ next if $lng and not $languages{$lng};
525
+ push @$altList, $items[3] unless grep /^\Q$items[3]\E$/i, @$altList;
526
+ next unless $lng;
527
+ my $flags = 0;
528
+ # keep only the best translation for this name for each language
529
+ $items[$_] and $flags |= (1<<($_-4)) foreach 4,5,6,7;
530
+ $flags{$items[1]} = ( $flags{$items[1]} || 0 ) | $flags;
531
+ next if $items[6] or $items[7]; # ignore colloquial and historic names
532
+ my $pri = $items[5] ? 0 : ($items[4] ? 1 : 2); # priority for best type of name
533
+ my $langPri = $bestPri{$lng};
534
+ $langPri or $langPri = $bestPri{$lng} = { };
535
+ next if $$langPri{$items[1]} and $$langPri{$items[1]} > $pri;
536
+ $$langPri{$items[1]} = $pri;
537
+ # save language-specific name for this feature, removing commas
538
+ ($$lkup{$lng} = $items[3]) =~ tr/,//d;
539
+ }
540
+ print "\b\b\b\bDone.\n";
541
+ close INFILE;
542
+ } else {
543
+ print "Not writing alternate languages ($dbdir/$altNamesFile not found)\n";
544
+ $noLang = 1;
545
+ }
546
+
547
+ my (%coords, %langLookups);
548
+
549
+ foreach $dbfile (@dbfiles) {
550
+ my $database = $$dbfile{file};
551
+
552
+ print "Processing $database... 0%";
553
+ my $i = 0;
554
+ my $num = scalar @{$$dbfile{kept}};
555
+
556
+ foreach (@{$$dbfile{kept}}) {
557
+ my @items = split /\t/;
558
+ my ($lat, $lon) = @items[4,5];
559
+ $lat = int(($lat + 90) / 180 * 0x100000 + 0.5) & 0xfffff;
560
+ $lon = int(($lon + 180) / 360 * 0x100000 + 0.5) & 0xfffff;
561
+ my $coord = pack('nCn',$lat>>4,(($lat&0x0f)<<4)|($lon&0x0f),$lon>>4);;
562
+ # take the city with the highest population if there are
563
+ # multiple cities with the same reduced coordinates
564
+ if ($coords{$coord} and $coords{$coord}[6] >= $items[14]) {
565
+ next;
566
+ }
567
+ # coords=(0.lat,1.lon,2.city,3.cc,4.rgn,5.admin2,6.population,7.timezone,8.feature code,9.alt names)
568
+ my ($altList, $alt);
569
+ die "Internal error\n" unless $lang{$items[0]} and $altList = $lang{$items[0]}{alt};
570
+ if (@$altList) {
571
+ tr/,//d foreach @$altList;
572
+ $alt = join ',', sort @$altList;
573
+ } else {
574
+ $alt = '';
575
+ }
576
+ $coords{$coord} = [ @items[4,5,1,8,10,11,14,17,7] ];
577
+ $coords{$coord}[9] = $alt;
578
+ my $lkup = $lang{$items[0]}; # 0=geoID
579
+ my $key = $items[1]; # 1=city
580
+ $lkup or die "Missing language for geoID $items[0]\n";
581
+ $cityFlags{$flags{$items[0]}} = ($cityFlags{$flags{$items[0]}} || 0) + 1 if defined $flags{$items[0]};
582
+ my $ccLang = $ccLang{$items[8]}; # get country-specific language
583
+ if ($ccLang and $$lkup{$ccLang}) {
584
+ my $lc = $ccLang . '-' . lc($items[8]); # eg. zh-cn
585
+ # add country suffix for this language in this country
586
+ $$lkup{$lc} = $$lkup{$ccLang} unless $$lkup{$lc};
587
+ }
588
+ foreach (@supportedLangs) {
589
+ next unless $$lkup{$_} and $$lkup{$_} ne $key; # (ignore if same)
590
+ $langLookups{$_}{$key} or $langLookups{$_}{$key} = [ ];
591
+ push @{$langLookups{$_}{$key}}, "$items[8]$items[10].$items[11],$$lkup{$_}";
592
+ }
593
+ my $p = int(100 * ++$i / $num + 0.5);
594
+ next if $percent == $p;
595
+ printf("\b\b\b\b%3d%%", $percent = $p);
596
+ flush STDOUT;
597
+ }
598
+ print "\b\b\b\bDone.\n";
599
+ }
600
+
601
+ # write city database
602
+ my $str = $noLang ? '' : " and $outAltNames";
603
+ my @t = localtime;
604
+ my $date = sprintf('%.4d-%.2d-%.2d', $t[5]+1900, $t[4]+1, $t[3]);
605
+ print "Writing $outDir/$outFile (version $dbVer)$str...\n";
606
+ print OUTFILE "Geolocation$dbVer\t",scalar(keys %coords),"\n";
607
+ print OUTFILE "# $date Cities with population $dbfiles[0]{minpop} or greater from geonames.org with a Creative Commons license\n";
608
+
609
+ if ($noLang) {
610
+ unlink "$outDir/$outAltNames";
611
+ } else {
612
+ open ALTOUT, ">$outDir/$outAltNames";
613
+ binmode ALTOUT;
614
+ }
615
+ my (%tz, @tz, %fcodes);
616
+ my $tzNum = 0;
617
+ foreach (sort { $a cmp $b } keys %coords) {
618
+ my $items = $coords{$_};
619
+ # @$items=(0.lat,1.lon,2.city,3.cc,4.rgn,5.admin2,6.population,7.timezone,8.feature code,9.alt names)
620
+ my $iCC = $orderCC{$$items[3]};
621
+ my $iRgn = $orderRgn{"$$items[3]$$items[4]"} || 0;
622
+ my $iSub = $orderSub{"$$items[3]$$items[4].$$items[5]"} || 0;
623
+ my $tn = $tz{$$items[7]};
624
+ unless ($tn) {
625
+ push @tz, $$items[7];
626
+ $tn = $tz{$$items[7]} = $tzNum++;
627
+ }
628
+ # convert population to our binary format
629
+ my $pop = sprintf('%.1e',$$items[6]); # format: "3.1e+04"
630
+ # pack CC, population and region index into a 32-bit integer
631
+ my $code = ($iCC << 24) | (substr($pop,6,1)<<20) | (substr($pop,0,1)<<16) | (substr($pop,2,1)<<12) | $iRgn;
632
+ $fcodes{$$items[8]} = ($fcodes{$$items[8]} || 0) + 1;
633
+ my $fc = $featureCodes{$$items[8]} || 0;
634
+ # store high bit of timezone index
635
+ if ($tn > 255) {
636
+ if ($dbVer eq '1.02') {
637
+ $iSub |= 0x8000;
638
+ $tn -= 256;
639
+ } else {
640
+ $fc |= 0x80;
641
+ $tn -= 256;
642
+ }
643
+ }
644
+ my $pt = pack('NnCC', $code, $iSub, $tn, $fc);
645
+ $$items[2] =~ tr/,//d; # remove any commas
646
+ print OUTFILE "$_$pt$$items[2]\n";
647
+ next if $noLang;
648
+ $$items[9] =~ tr/,/\n/;
649
+ print ALTOUT $$items[9],"\0";
650
+ }
651
+ my $altSize = 0;
652
+ unless ($noLang) {
653
+ $altSize = tell ALTOUT;
654
+ close ALTOUT;
655
+ }
656
+ print OUTFILE "\0\0\0\0\x01\n"; # section terminator
657
+
658
+ die "Too many time zones!\n" if $tzNum > 0x01ff;
659
+
660
+ if ($verbose) {
661
+ $i = 0;
662
+ print "Features kept:\n";
663
+ foreach (sort keys %fcodes) {
664
+ my $fc = $featureCodes{$_} || 0;
665
+ printf "%6d (%2d) %s\n", $fcodes{$_}, $fc, $_;
666
+ }
667
+ }
668
+
669
+ # write country codes
670
+ open COUNTRY, '<', "$dbdir/$countryFile" or die "Error opening $dbdir/$countryFile\n";
671
+ my %cc;
672
+ while (<COUNTRY>) {
673
+ next if /^#/;
674
+ my @items = split /\t/;
675
+ next unless $haveCountry{$items[0]};
676
+ $cc{$items[4]} = $items[0];
677
+ die "country code error\n" if length $items[0] != 2;
678
+ $items[4] =~ tr/,//d; # remove any commas
679
+ print OUTFILE "$items[0]$items[4]\n";
680
+ if ($lang{$items[16]}) { # (16=geoID)
681
+ my $lkup = $lang{$items[16]};
682
+ my $key = $items[4]; # country name
683
+ $ccFlags{$flags{$items[16]}} = ($ccFlags{$flags{$items[16]}} || 0) + 1 if defined $flags{$items[16]};
684
+ foreach (@supportedLangs) {
685
+ next unless $$lkup{$_} and $$lkup{$_} ne $key; # (ignore if same)
686
+ $langLookups{$_}{$key} or $langLookups{$_}{$key} = [ ];
687
+ push @{$langLookups{$_}{$key}}, ",$$lkup{$_}";
688
+ }
689
+ }
690
+ }
691
+ close COUNTRY;
692
+
693
+ print OUTFILE "\0\0\0\0\x02\n"; # section terminator
694
+
695
+ # write regions
696
+ print OUTFILE "\n"; # (null region)
697
+ open REGION, '<', "$dbdir/$regionFile" or die "Error opening $dbdir/$regionFile\n";
698
+ my %region;
699
+ while (<REGION>) {
700
+ chomp;
701
+ my @items = split /\t/;
702
+ #items: 0=region code, 1=name, 2=ascii, 3=geoID
703
+ $items[0] =~ tr/.//d; # (remove "." separator)
704
+ next unless $haveRegion{$items[0]};
705
+ $region{$items[0]} = $items[1];
706
+ $items[1] =~ tr/,//d; # remove any commas
707
+ print OUTFILE "$items[1]\n";
708
+ if ($lang{$items[3]}) { # (3=geoID)
709
+ my $lkup = $lang{$items[3]};
710
+ my $key = $items[1]; # region name
711
+ my $cc = substr($items[0], 0, 2);
712
+ $rgnFlags{$flags{$items[3]}} = ($rgnFlags{$flags{$items[3]}} || 0) + 1 if defined $flags{$items[3]};
713
+ foreach (@supportedLangs) {
714
+ next unless $$lkup{$_} and $$lkup{$_} ne $key; # (ignore if same)
715
+ $langLookups{$_}{$key} or $langLookups{$_}{$key} = [ ];
716
+ push @{$langLookups{$_}{$key}}, "$cc,$$lkup{$_}";
717
+ }
718
+ }
719
+ }
720
+ close REGION;
721
+
722
+ print OUTFILE "\0\0\0\0\x03\n"; # section terminator
723
+
724
+ # write subregions
725
+ print OUTFILE "\n"; # (null admin2)
726
+ open ADMIN2, '<', "$dbdir/$admin2File" or die "Error opening $dbdir/$admin2File\n";
727
+ my %subregion;
728
+ while (<ADMIN2>) {
729
+ chomp;
730
+ my @items = split /\t/;
731
+ #items: 0=admin2 code, 1=name, 2=ascii, 3=geoID
732
+ $items[0] =~ s/\.//; # (remove first "." separator)
733
+ next unless $haveSubRgn{$items[0]};
734
+ $subregion{$items[0]} = $items[1];
735
+ $items[1] =~ tr/,//d; # remove any commas
736
+ print OUTFILE "$items[1]\n";
737
+ if ($lang{$items[3]}) { # (3=geoID)
738
+ my $lkup = $lang{$items[3]};
739
+ my $key = $items[1]; # region name
740
+ $subFlags{$flags{$items[3]}} = ($subFlags{$flags{$items[3]}} || 0) + 1 if defined $flags{$items[3]};
741
+ my $rc = $items[0];
742
+ $rc =~ s/\..*//; # (remove subregion code)
743
+ foreach (@supportedLangs) {
744
+ next unless $$lkup{$_} and $$lkup{$_} ne $key; # (ignore if same)
745
+ $langLookups{$_}{$key} or $langLookups{$_}{$key} = [ ];
746
+ push @{$langLookups{$_}{$key}}, "$rc,$$lkup{$_}";
747
+ }
748
+ }
749
+ }
750
+ close ADMIN2;
751
+
752
+ print OUTFILE "\0\0\0\0\x04\n"; # section terminator
753
+
754
+ # write timezones
755
+ print OUTFILE $_,"\n" foreach @tz;
756
+
757
+ print OUTFILE "\0\0\0\0\x05\n"; # section terminator
758
+
759
+ # write feature codes
760
+ print OUTFILE $_,"\n" foreach @featureCodes;
761
+
762
+ # write terminator and close Geolocation.dat
763
+ print OUTFILE "\0\0\0\0\0\n"; # file terminator
764
+ my $outSize = tell OUTFILE;
765
+ close OUTFILE;
766
+
767
+ # write language lookups
768
+ my $langSize = 0;
769
+ my $langDir = "$outDir/$geoLang";
770
+ # delete existing languages
771
+ unlink <"$langDir/*.pm">;
772
+ unless ($noLang) {
773
+ my $n = scalar(keys %langLookups);
774
+ print "Writing $n language files to $outDir/$geoLang...\n";
775
+ mkdir $langDir, 0777;
776
+ my ($lng, $key, $str, $nm, $alt);
777
+ foreach $lng (sort keys %langLookups) { # ($lng = language code)
778
+ my $myLng = $lng;
779
+ $myLng =~ tr/-/_/;
780
+ my $lkup = $langLookups{$lng};
781
+ my $file = "$myLng.pm";
782
+ open OUT, ">$langDir/$file" or die "Error creating $file\n";
783
+ print OUT "# Geolocation language translations for $myLng\n";
784
+ print OUT "#\n# Based on Creative Commons database from geonames.org\n\n";
785
+ print OUT "%Image::ExifTool::GeoLang::${myLng}::Translate = (\n";
786
+ foreach $key (sort keys %$lkup) {
787
+ ($nm = $key) =~ s/'/\\'/g;
788
+ # count entries and use the most common one, then add others with country+region ID's
789
+ # (entries in @$li are of the form: City:"CCRc,Sc,Alt", Sub:"CCRc,Alt", Rgn:"CC,Alt", Country:",Alt")
790
+ # (Rc = region code, Sc = subregion code)
791
+ my $li = $$lkup{$key};
792
+ my %count;
793
+ # sort by popularity of alternate name
794
+ foreach (@$li) {
795
+ my $val = $_;
796
+ $val =~ s/.*?,//;
797
+ $count{$val} = ($count{$val} || 0) + 1;
798
+ }
799
+ my @order = sort { $count{$b} <=> $count{$a} or length($a) <=> length($b) or $a cmp $b } keys %count;
800
+ my $first = 1;
801
+ foreach $alt (@order) {
802
+ foreach (sort @$li) {
803
+ my ($code,$val) = split ',', $_, 2;
804
+ # ($code will be empty for a country name, and 2 characters for a region name,
805
+ # and contain a "." for a city name)
806
+ next unless $val eq $alt; # don't add if alternate name is the same
807
+ die "Backslash in translated name" if $val =~ /\\/;
808
+ $val =~ s/'/\\'/g; # escape single quotes
809
+ if ($first and $val !~ /\(/) { # (don't add general translation if name is qualified with brackets)
810
+ print OUT "\t'$nm' => '$val',\n";
811
+ undef $first;
812
+ last;
813
+ }
814
+ # format for keys in language table
815
+ # City: "CCRgn,Subregion,City", "CCRgn,,City", "CC,City", ",City"
816
+ # Subregion: "CCRgn,Subregion,", "CCRgn,,"
817
+ # Region: "CCRgn,"
818
+ # Country: "CC,"
819
+ # Any: "Name"
820
+ if (not $code) {
821
+ # this is a country
822
+ $code = $cc{$key};
823
+ printf OUT "\t'$code,' => '$val',\n";
824
+ } elsif ($code !~ /\./) {
825
+ # this is a region or subregion
826
+ print OUT "\t'$code$nm,' => '$val',\n";
827
+ } else {
828
+ # this is a city
829
+ # use region/subregions name instead of code
830
+ my $sub = $subregion{$code} || '';
831
+ $sub =~ s/'/\\'/g;
832
+ $code =~ s/\..*//;
833
+ $code = substr($code,0,2) . $region{$code} if $region{$code};
834
+ $code =~ s/'/\\'/g;
835
+ print OUT "\t'$code,$sub,$nm' => '$val',\n";
836
+ }
837
+ }
838
+ }
839
+ }
840
+ print OUT ");\n\n1; #end\n";
841
+ $langSize += tell OUT;
842
+ close OUT;
843
+ }
844
+ if ($verbose) {
845
+ my @type = ( City => \%cityFlags, Region => \%rgnFlags, Subregion => \%subFlags, Country => \%ccFlags );
846
+ for (;;) {
847
+ my $type = shift @type or last;
848
+ my $flags = shift @type;
849
+ print "$type flags:\n";
850
+ printf(" 0x%.2x - %d\n", 0, $$flags{0} || 0);
851
+ my @label = qw(preferred short colloquial historic);
852
+ foreach my $bit (0..5) {
853
+ my $n = 0;
854
+ $_ & (1<<$bit) and ++$n foreach keys %$flags;
855
+ printf(" 0x%.2x - %d (%s)\n", (1<<$bit), $n, shift(@label)) if $n;
856
+ }
857
+ }
858
+ }
859
+ }
860
+
861
+ print "Output file size(s):\n";
862
+ printf "%8.2f MB %s (%d entries)\n", $outSize / 1e6, $outFile, scalar(keys %coords);
863
+ printf "%8.2f MB %s\n", $altSize / 1e6, $outAltNames if $altSize;
864
+ printf "%8.2f MB %s/*.pm\n", $langSize / 1e6, $geoLang if $langSize;
865
+ printf "%8.2f MB Total\n", ($outSize + $altSize + $langSize) / 1e6 if $altSize or $langSize;
866
+
867
+ # end