exiftool_vendored 12.82.0 → 12.85.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/bin/Changes +59 -0
  3. data/bin/MANIFEST +2 -18
  4. data/bin/META.json +1 -1
  5. data/bin/META.yml +1 -1
  6. data/bin/README +47 -46
  7. data/bin/build_geolocation +945 -0
  8. data/bin/config_files/example.config +2 -2
  9. data/bin/exiftool +75 -51
  10. data/bin/fmt_files/gpx.fmt +2 -1
  11. data/bin/fmt_files/gpx_wpt.fmt +2 -1
  12. data/bin/lib/Image/ExifTool/Apple.pm +52 -7
  13. data/bin/lib/Image/ExifTool/BuildTagLookup.pm +5 -2
  14. data/bin/lib/Image/ExifTool/Canon.pm +11 -2
  15. data/bin/lib/Image/ExifTool/CanonVRD.pm +18 -5
  16. data/bin/lib/Image/ExifTool/DJI.pm +29 -0
  17. data/bin/lib/Image/ExifTool/DPX.pm +3 -3
  18. data/bin/lib/Image/ExifTool/Exif.pm +19 -2
  19. data/bin/lib/Image/ExifTool/GM.pm +17 -8
  20. data/bin/lib/Image/ExifTool/Geolocation.dat +0 -0
  21. data/bin/lib/Image/ExifTool/Geolocation.pm +170 -104
  22. data/bin/lib/Image/ExifTool/Geotag.pm +18 -10
  23. data/bin/lib/Image/ExifTool/ID3.pm +28 -6
  24. data/bin/lib/Image/ExifTool/Nikon.pm +25 -6
  25. data/bin/lib/Image/ExifTool/Pentax.pm +64 -13
  26. data/bin/lib/Image/ExifTool/QuickTime.pm +26 -7
  27. data/bin/lib/Image/ExifTool/QuickTimeStream.pl +5 -0
  28. data/bin/lib/Image/ExifTool/Sony.pm +15 -6
  29. data/bin/lib/Image/ExifTool/TagLookup.pm +3557 -3540
  30. data/bin/lib/Image/ExifTool/TagNames.pod +61 -13
  31. data/bin/lib/Image/ExifTool/WriteQuickTime.pl +4 -2
  32. data/bin/lib/Image/ExifTool/Writer.pl +166 -134
  33. data/bin/lib/Image/ExifTool/XMP.pm +2 -0
  34. data/bin/lib/Image/ExifTool/XMP2.pl +3 -0
  35. data/bin/lib/Image/ExifTool.pm +59 -27
  36. data/bin/lib/Image/ExifTool.pod +88 -71
  37. data/bin/perl-Image-ExifTool.spec +45 -45
  38. data/lib/exiftool_vendored/version.rb +1 -1
  39. metadata +3 -20
  40. data/bin/lib/Image/ExifTool/GeoLang/cs.pm +0 -978
  41. data/bin/lib/Image/ExifTool/GeoLang/de.pm +0 -1975
  42. data/bin/lib/Image/ExifTool/GeoLang/en_ca.pm +0 -44
  43. data/bin/lib/Image/ExifTool/GeoLang/en_gb.pm +0 -124
  44. data/bin/lib/Image/ExifTool/GeoLang/es.pm +0 -2921
  45. data/bin/lib/Image/ExifTool/GeoLang/fi.pm +0 -1116
  46. data/bin/lib/Image/ExifTool/GeoLang/fr.pm +0 -3171
  47. data/bin/lib/Image/ExifTool/GeoLang/it.pm +0 -2750
  48. data/bin/lib/Image/ExifTool/GeoLang/ja.pm +0 -10256
  49. data/bin/lib/Image/ExifTool/GeoLang/ko.pm +0 -4499
  50. data/bin/lib/Image/ExifTool/GeoLang/nl.pm +0 -1270
  51. data/bin/lib/Image/ExifTool/GeoLang/pl.pm +0 -3019
  52. data/bin/lib/Image/ExifTool/GeoLang/ru.pm +0 -18220
  53. data/bin/lib/Image/ExifTool/GeoLang/sk.pm +0 -441
  54. data/bin/lib/Image/ExifTool/GeoLang/sv.pm +0 -714
  55. data/bin/lib/Image/ExifTool/GeoLang/tr.pm +0 -452
  56. data/bin/lib/Image/ExifTool/GeoLang/zh_cn.pm +0 -2225
  57. data/bin/lib/Image/ExifTool/GeoLang/zh_tw.pm +0 -72
@@ -0,0 +1,945 @@
1
+ #!/usr/bin/perl -w
2
+ #-------------------------------------------------------------------------------
3
+ # File: build_geolocation
4
+ #
5
+ # Description: Parse geonames files to create ExifTool geolocation database
6
+ #
7
+ # Syntax: build_geolocation [OPTIONS] [DBFILE] ...
8
+ #
9
+ # Options: (see -h output)
10
+ #
11
+ # Created: 2024-03-03 - P. Harvey
12
+ # 2024-04-15 - PH Clean up and add options for public release
13
+ # 2024-04-22 - PH Increased number of possible feature codes from
14
+ # 32 to 64. Convert backslashes in directory names
15
+ # 2024-04-24 - PH Fixed problem with population exponent when run
16
+ # under ActivePerl
17
+ # 2024-04-29 - PH Added feature types and default to db v1.03
18
+ #
19
+ # Notes: Requires these files from https://download.geonames.org/export/
20
+ #
21
+ # allCountries.txt (or other input database if specified)
22
+ # countryInfo.txt
23
+ # admin1CodesASCII.txt
24
+ # admin2Codes.txt
25
+ # featureCodes_XX.txt (optional)
26
+ # alternateNamesV2.txt (optional)
27
+ #
28
+ # Output datbase format (Geolocation.dat):
29
+ #
30
+ # Header:
31
+ # "GeolocationV.VV\tNNNN\n" (V.VV=version, NNNN=num city entries)
32
+ # "# <comment>\n"
33
+ # NNNN City entries:
34
+ # Offset Format Description
35
+ # 0 int16u - latitude high 16 bits (converted to 0-0x100000 range)
36
+ # 2 int8u - latitude low 4 bits, longitude low 4 bits
37
+ # 3 int16u - longitude high 16 bits
38
+ # 5 int8u - index of country in country list
39
+ # 6 int8u - 0xf0 = population E exponent (in format "N.Fe+0E"), 0x0f = population N digit
40
+ # 7 int16u - 0xf000 = population F digit, 0x0fff = index in region list (admin1)
41
+ # 9 int16u - v1.02: 0x7fff = index in subregion (admin2), 0x8000 = high bit of time zone
42
+ # 9 int16u - v1.03: index in subregion (admin2)
43
+ # 11 int8u - low byte of time zone index
44
+ # 12 int8u - 0x3f = feature code index (see below), v1.03: 0x80 = high bit of time zone
45
+ # 13 string - UTF8 City name, terminated by newline
46
+ # "\0\0\0\0\x01"
47
+ # Country entries:
48
+ # 1. 2-character country code
49
+ # 2. Country name, terminated by newline
50
+ # "\0\0\0\0\x02"
51
+ # Region entries:
52
+ # 1. Region name, terminated by newline
53
+ # "\0\0\0\0\x03"
54
+ # Subregion entries:
55
+ # 1. Subregion name, terminated by newline
56
+ # "\0\0\0\0\x04"
57
+ # Time zone entries:
58
+ # 1. Time zone name, terminated by newline
59
+ # "\0\0\0\0\x05" (feature codes added in v1.03)
60
+ # Feature codes:
61
+ # 1. Feature code, optional space-followed-by-feature-name, then newline
62
+ # "\0\0\0\0\0"
63
+ #
64
+ # Feature Codes v1.02: (see http://www.geonames.org/export/codes.html#P for descriptions)
65
+ #
66
+ # 0. Other 3. PPLA2 6. PPLA5 9. PPLF 12. PPLR 15. PPLX
67
+ # 1. PPL 4. PPLA3 7. PPLC 10. PPLG 13. PPLS
68
+ # 2. PPLA 5. PPLA4 8. PPLCH 11. PPLL 14. STLMT
69
+ #
70
+ # Feature Codes v1.03 and later are listed at the end of the database
71
+ #-------------------------------------------------------------------------------
72
+
73
+ use strict;
74
+
75
+ my $dbVer = '1.03'; # default output database version
76
+ my $dbFile = 'allCountries.txt'; # default database file
77
+ my $countryFile = 'countryInfo.txt'; # mandatory country names file
78
+ my $regionFile = 'admin1CodesASCII.txt'; # mandatory region names file
79
+ my $admin2File = 'admin2Codes.txt'; # mandatory subregion names file
80
+ my $featureFile = 'featureCodes_en.txt'; # optional feature names file
81
+ my $altNamesFile = 'alternateNamesV2.txt'; # optional alternate names file
82
+ my $outFile = 'Geolocation.dat'; # output ExifTool database file
83
+ my $outAltNames = 'AltNames.dat'; # output alternate names file
84
+ my $outDirName = 'Geolocation_out'; # output directory for database files
85
+ my $geoLang = 'GeoLang'; # output directory for language files
86
+
87
+ my %defaults = (
88
+ file => $dbFile,
89
+ minpop => 2000,
90
+ def_codes => 'PPLA,PPLA2',
91
+ def_codesp => 'PPL,PPLA,PPLA2,PPLA3,PPLA4,PPLA5,PPLC,PPLCH,PPLF,PPLG,PPLH,PPLL,PPLQ,PPLR,PPLS,PPLW,PPLX,STLMT',
92
+ );
93
+
94
+ # languages to read from geonames database
95
+ my @languages = qw(cs de en en-ca en-gb es fi fr it ja ko nl pl ru sk sv tr zh zh-cn zh-tw);
96
+
97
+ # indices of feature codes (v1.02 is hard-coded in ExifTool)
98
+ my @fc102 = qw(
99
+ Other PPL PPLA PPLA2 PPLA3 PPLA4 PPLA5 PPLC PPLCH
100
+ PPLF PPLG PPLL PPLR PPLS STLMT PPLX
101
+ );
102
+ # base features for v1.03+
103
+ my @fc103 = qw(
104
+ Other PPL PPLA PPLA2 PPLA3 PPLA4 PPLA5 PPLC PPLCH
105
+ PPLF PPLG PPLH PPLL PPLQ PPLR PPLS PPLW PPLX STLMT
106
+ );
107
+ my $i = 0;
108
+ my @featureCodes = $dbVer eq '1.02' ? @fc102 : @fc103;
109
+ my %featureCodes = map { $_ => $i++ } @featureCodes;
110
+ my %featureNames;
111
+
112
+ my ($dbfile, @dbfiles, $outDir, $verbose, $noLang, %needRgn);
113
+ my %optArgs = ( p => 1, c => 1, cp => 1, l => 1, o => 1, ver => 1 );
114
+
115
+ # process command-line arguments
116
+ my $opts = { };
117
+ while (@ARGV) {
118
+ my $opt = shift;
119
+ if (not $opt =~ s/^-//) {
120
+ $opt = '.' unless length $opt;
121
+ $opt =~ tr(\\)(/); # use forward slashes
122
+ $opt =~ s(/$)(); # remove trailing slash
123
+ $opt = "$opt/$defaults{file}" if -d $opt;
124
+ -e $opt or die "Error opening database $opt\n";
125
+ push @dbfiles, { %defaults, %$opts, file => $opt };
126
+ $opts = { };
127
+ next;
128
+ }
129
+ my $arg;
130
+ if ($optArgs{$opt}) {
131
+ $arg = shift;
132
+ defined $arg or die "Expecting argument for -$opt option\n";
133
+ }
134
+ if ($opt eq 'p') {
135
+ $arg = uc $arg;
136
+ if ($arg =~ /=/) {
137
+ my ($cc, $mp) = split /=/, $arg;
138
+ $mp =~ /^\d+$/ or die "Expecting number on rhs of '=' for -p option\n";
139
+ my @cc = split /,/, $cc;
140
+ foreach $cc (@cc) {
141
+ $cc =~ /^([A-Z]{2})(\..+)?$/ or die "Invalid country/region '$cc' for -p option\n";
142
+ $needRgn{$1} = $needRgn{$cc} = 1 if length $cc > 2;
143
+ $$opts{cc_minpop}{$cc} = $mp;
144
+ }
145
+ } else {
146
+ $arg =~ /^\d+$/ or die "Expecting number for -p option\n";
147
+ $$opts{minpop} = $arg;
148
+ }
149
+ } elsif ($opt =~ /^c(p?)$/) {
150
+ my $p = $1;
151
+ $arg = uc $arg;
152
+ my ($cc, $co);
153
+ if ($arg =~ /=/) {
154
+ ($cc, $co) = split /=/, $arg;
155
+ } else {
156
+ ($cc, $co) = ('??', $arg);
157
+ }
158
+ my $sign = $co =~ s/^([-+])// ? $1 : '';
159
+ my @co = split /,/, $co;
160
+ my @cc = split /,/, $cc;
161
+ # store lookup for features to keep for each country ('??' = any country)
162
+ foreach $cc (@cc) {
163
+ $cc =~ /^([A-Z]{2}|\?\?)(\..+)?$/ or die "Invalid country/region '$cc' for -$opt option\n";
164
+ $needRgn{$1} = $needRgn{$cc} = 1 if length $cc > 2;
165
+ if (not $sign) {
166
+ $$opts{"keep$p"}{$cc} = { };
167
+ } elsif (not $$opts{"keep$p"}{$cc}) {
168
+ # start from defaults
169
+ my %codes = map { $_ => 1 } split /,/, $defaults{"def_codes$p"};
170
+ $$opts{"keep$p"}{$cc} = \%codes;
171
+ }
172
+ foreach $co (@co) {
173
+ if ($sign eq '-') {
174
+ delete $$opts{"keep$p"}{$cc}{$co};
175
+ } else {
176
+ $$opts{"keep$p"}{$cc}{$co} = 1;
177
+ }
178
+ }
179
+ }
180
+ } elsif ($opt eq 'l') {
181
+ $arg = lc $arg;
182
+ my @langs = split ',', $arg;
183
+ if (not @langs) {
184
+ undef @languages;
185
+ $noLang = 1;
186
+ } elsif ($langs[0] =~ s/^-//) {
187
+ @languages = grep !/^$langs[0]$/, @languages foreach @langs;
188
+ } else {
189
+ @languages = @langs;
190
+ }
191
+ } elsif ($opt eq 'o') {
192
+ $outDir = $arg;
193
+ } elsif ($opt eq 'ver') {
194
+ $dbVer = $arg;
195
+ $dbVer =~ /^1\.0[23]$/ or die "Unsupported version number $dbVer\n";
196
+ } elsif ($opt eq 'v') {
197
+ $verbose = 1;
198
+ } elsif ($opt eq 'h') {
199
+ my $defcp = $defaults{def_codesp};
200
+ $defcp =~ s/(PPLG,)/\n $1/;
201
+ my $defLang = join ',', @languages;
202
+ $defLang =~ s/(ja,)/\n $1/;
203
+ print <<"END";
204
+ Description: Build ExifTool Geolocation database.
205
+
206
+ Syntax: build_geolocation [OPTIONS] [DBFILE] ...
207
+
208
+ Options:
209
+ DBFILE - Input database file name or directory. Multiple input database
210
+ files may be specified. The -p, -c and -cp options apply to
211
+ the database that comes after them on the command line.
212
+ Default is "$dbFile".
213
+ -p POP - Minimum population for cities to include. POP may be a number
214
+ or be of the form "CC[,C2...]=###" to set different limits for
215
+ specific countries/regions, where CC and C2 are country codes
216
+ with optional region name or code appended after a period (eg.
217
+ "CA.Ontario,US=500" sets the minimum population to 500 for
218
+ cities on Ontario Canada or the U.S.). If a region is
219
+ specified, either the full name or the geonames admin1 code may
220
+ be used, and case and spaces are not significant. May be
221
+ multiple -p options for each input DBFILE. Default is "$defaults{minpop}".
222
+ -c CODE - Feature codes to always include, regardless of population. CODE
223
+ is a comma-separated list of feature codes, with an optional
224
+ leading comma-separated list of country/region codes followed
225
+ by an equals sign to apply only to specific countries. The
226
+ feature-code list may begin with a dash to remove entries from
227
+ the default list, or a plus sign to add entries. May be
228
+ multiple -c options for each intput DBFILE. Country/region and
229
+ feature names are case insensitive. Default is "$defaults{def_codes}".
230
+ -cp CODE - Additional features to include if above minimum population.
231
+ Default is "$defcp".
232
+ -l LANG - Alternate languages to read from $altNamesFile if
233
+ available. These are used to generate $outAltNames an the
234
+ $geoLang files. LANG is a comma-separated list of language
235
+ codes, starting with a dash to remove items from the default
236
+ list. May be set to an empty string to disable generation
237
+ of alternate language files even if $altNamesFile
238
+ exists. The same set of languages applies to all input
239
+ database files. Default is "$defLang".
240
+ -o OUTDIR - Output directory name. Default is the same directory as the
241
+ first input database file. A directory named $outDirName
242
+ containing the output files will be created in this directory.
243
+ -ver VER - Version for output geolocation database (default is $dbVer).
244
+ -v - Verbose messages.
245
+ -h - Show this help.
246
+
247
+ Input files (download from https://download.geonames.org/export/dump/):
248
+ $dbFile - default database file (smaller files with names
249
+ like "cities###.txt" may be specified instead)
250
+ $countryFile - mandatory country names file
251
+ $regionFile - mandatory region names file
252
+ $admin2File - mandatory subregion names file
253
+ $featureFile - optional feature codes file
254
+ $altNamesFile - optional alternate names file (must exist to
255
+ to generate $outAltNames and $geoLang files)
256
+
257
+ Output files:
258
+ $outDirName - default output directory name
259
+ $outFile - ExifTool database file
260
+ $outAltNames - alternate names file
261
+ $geoLang - directory for alternate language files
262
+
263
+ Author:
264
+ Copyright 2024, Phil Harvey
265
+
266
+ This is free software; you can redistribute it and/or modify it under the
267
+ same terms as Perl itself.
268
+ END
269
+ exit 0;
270
+ } else {
271
+ die "Unknown option '-$opt'\n";
272
+ }
273
+ }
274
+
275
+ if (@dbfiles) {
276
+ # apply any remaining options to last database file
277
+ $dbfiles[-1]{$_} = $$opts{$_} foreach keys %$opts;
278
+ } else {
279
+ # use default database file if none specified
280
+ push @dbfiles, { %defaults, %$opts };
281
+ unless (-e $dbfiles[0]{file}) {
282
+ # also look in script directory
283
+ if ($0 =~ m{(.*)/} and -e "$1/$dbfiles[0]{file}") {
284
+ $dbfiles[0]{file} = "$1/$dbfiles[0]{file}";
285
+ } else {
286
+ die qq(Database "$dbfiles[0]{file}" not found. Use -h option for help.\n);
287
+ }
288
+ }
289
+ }
290
+
291
+ # determine our working directory
292
+ my $dbdir = $dbfiles[0]{file};
293
+ $dbdir = '.' unless $dbdir =~ s(/[^/]*$)();
294
+
295
+ # add default feature code lookups if necessary
296
+ foreach $dbfile (@dbfiles) {
297
+ my $p;
298
+ foreach $p ('', 'p') {
299
+ next if $$dbfile{"keep$p"}{'??'};
300
+ my %codes = map { $_ => 1 } split /,/, $defaults{"def_codes$p"};
301
+ $$dbfile{"keep$p"}{'??'} = \%codes;
302
+ }
303
+ }
304
+
305
+ # pre-read region file if necessary
306
+ if (%needRgn) {
307
+ open REGION, '<', "$dbdir/$regionFile" or die "Error opening $dbdir/$regionFile\n";
308
+ while (<REGION>) {
309
+ my @items = split /\t/;
310
+ my $rgn = $items[0];
311
+ my ($cc) = split /\./, $rgn;
312
+ next unless $needRgn{$cc};
313
+ unless ($needRgn{$rgn}) { # allow region code to be used
314
+ $rgn = $cc . '.' . uc$items[1]; # also support full region name
315
+ unless ($needRgn{$rgn}) {
316
+ $rgn =~ tr/ //d;
317
+ next unless $needRgn{$rgn}; # also allow no spaces
318
+ }
319
+ }
320
+ $needRgn{$rgn} = [$items[0], "$cc.$items[1]"];
321
+ }
322
+ close REGION;
323
+ foreach (sort keys %needRgn) {
324
+ next if length == 2;
325
+ die "No matching region for $_\n" unless ref $needRgn{$_};
326
+ }
327
+ }
328
+
329
+ if ($verbose) {
330
+ my $langs = join ',', @languages;
331
+ $langs or $langs = '<none>';
332
+ print "Languages to read from input database(s):\n $langs\n";
333
+ foreach $dbfile (@dbfiles) {
334
+ print "Parameters for reading $$dbfile{file}:\n";
335
+ print " Minimum populations (??=any country):\n";
336
+ print " ??=$$dbfile{minpop}\n";
337
+ foreach (reverse sort keys %{$$dbfile{cc_minpop}}) {
338
+ my $cc = ref $needRgn{$_} ? $needRgn{$_}[1] : $_;
339
+ print " $cc=$$dbfile{cc_minpop}{$_}\n";
340
+ }
341
+ print " Features to keep regardless of population:\n";
342
+ foreach (reverse sort keys %{$$dbfile{keep}}) {
343
+ my $cc = ref $needRgn{$_} ? $needRgn{$_}[1] : $_;
344
+ print " $cc=",join(',', sort keys %{$$dbfile{keep}{$_}}), "\n";
345
+ }
346
+ print " Features to keep for population >= minimum:\n";
347
+ foreach (reverse sort keys %{$$dbfile{keepp}}) {
348
+ my $cc = ref $needRgn{$_} ? $needRgn{$_}[1] : $_;
349
+ print " $_=",join(',', sort keys %{$$dbfile{keepp}{$_}}), "\n";
350
+ }
351
+ }
352
+ }
353
+
354
+ # translate option region arguments to region codes
355
+ foreach $dbfile (@dbfiles) {
356
+ my ($type, $cc);
357
+ foreach $type (qw(cc_minpop keep keepp)) {
358
+ my @cc = keys %{$$dbfile{$type}};
359
+ foreach $cc (@cc) {
360
+ next unless ref $needRgn{$cc};
361
+ my $tmp = $$dbfile{$type}{$cc};
362
+ delete $$dbfile{$type}{$cc};
363
+ $$dbfile{$type}{$needRgn{$cc}[0]} = $tmp;
364
+ }
365
+ }
366
+ }
367
+
368
+ $outDir = "$dbdir/$outDirName" unless defined $outDir;
369
+ -d $outDir or mkdir $outDir, 0777 or die "Error creating output directory '$outDir'\n";
370
+ -e "$dbdir/$_" or die "Missing input file $dbdir/$_\n" foreach $countryFile, $regionFile, $admin2File;
371
+
372
+ # order of country codes, region names and subregions in database
373
+ my (%orderCC, %orderRgn, %orderSub);
374
+
375
+ # languages to read from geonames database (converted to lower case)
376
+ my %languages = map { $_ => 1 } @languages;
377
+
378
+ # language codes supported by ExifTool
379
+ my @supportedLangs = qw(cs de en-ca en-gb es fi fr it ja ko nl pl ru sk sv tr zh-cn zh-tw);
380
+
381
+ # supported country-specific languages
382
+ my %ccLang = ( TW => 'zh', CN => 'zh', CA => 'en', GB => 'en' );
383
+ my (%lang, %featureLang, %haveCountry, %cityFlags, %rgnFlags, %subFlags, %ccFlags, %flags);
384
+ my (%haveRegion, %haveSubRgn, $filesize, $percent);
385
+
386
+ sub GetFileSize($)
387
+ {
388
+ my $file = shift;
389
+ seek $file, 0, 2 or die "Seek error\n";
390
+ my $size = tell $file;
391
+ seek $file, 0, 0 or die "Seek error\n";
392
+ return $size;
393
+ }
394
+
395
+ # pre-scan database to determine which countries, regions subregions and
396
+ # feature codes we will be using
397
+ foreach $dbfile (@dbfiles) {
398
+ my $database = $$dbfile{file};
399
+ my $upgraded;
400
+
401
+ print "Reading $database... 0%";
402
+ flush STDOUT;
403
+
404
+ # pre-read the files to initialize necessary variables
405
+ open INFILE, '<', $database or die "Error opening $database\n";
406
+ $filesize = GetFileSize(\*INFILE);
407
+
408
+ open OUTFILE, '>', "$outDir/$outFile" or die "Error creating $outFile in $outDir\n";
409
+ binmode(OUTFILE);
410
+
411
+ $$dbfile{kept} = [ ];
412
+ $percent = -1;
413
+ while (<INFILE>) {
414
+ my $p = int(100 * tell(INFILE) / $filesize + 0.5);
415
+ if ($percent != $p) {
416
+ printf("\b\b\b\b%3d%%", $percent = $p);
417
+ flush STDOUT;
418
+ }
419
+ my @items = split /\t/;
420
+ my ($dbnum, $code, $cc, $rgn, $sub, $pop) = @items[0,7,8,10,11,14];
421
+ next unless @items > 17 and $cc =~ /^[A-Z]{2}$/;
422
+ my ($minpop, $keep);
423
+ if ($needRgn{$cc} and defined $$dbfile{cc_minpop}{"$cc$rgn"}) {
424
+ $minpop = $$dbfile{cc_minpop}{"$cc$rgn"};
425
+ } elsif (defined $$dbfile{cc_minpop}{$cc}) {
426
+ $minpop = $$dbfile{cc_minpop}{$cc};
427
+ } else {
428
+ $minpop = $$dbfile{minpop};
429
+ }
430
+ # keep regardless of population
431
+ if ($needRgn{$cc} and $$dbfile{keep}{"$cc$rgn"}) {
432
+ $keep = $$dbfile{keep}{"$cc$rgn"}{$code};
433
+ } elsif ($$dbfile{keep}{$cc}) {
434
+ $keep = $$dbfile{keep}{$cc}{$code};
435
+ } else {
436
+ $keep = $$dbfile{keep}{'??'}{$code};
437
+ }
438
+ if ($pop < $minpop) {
439
+ next unless $keep;
440
+ } elsif ($needRgn{$cc} and $$dbfile{keepp}{"$cc$rgn"}) {
441
+ next unless $$dbfile{keepp}{"$cc$rgn"}{$code};
442
+ } elsif ($$dbfile{keepp}{$cc}) {
443
+ next unless $$dbfile{keepp}{$cc}{$code};
444
+ } else {
445
+ next unless $$dbfile{keepp}{'??'}{$code};
446
+ }
447
+ push @{$$dbfile{kept}}, $_;
448
+ $lang{$dbnum} = { alt => [ ] };
449
+ $haveCountry{$cc} = 1;
450
+ $haveRegion{"$cc$rgn"} = 1;
451
+ $haveSubRgn{"$cc$rgn.$sub"} = 1;
452
+ # add new feature codes (up to maximum index of 0x3f)
453
+ unless ($featureCodes{$code} or @featureCodes > 0x3f) {
454
+ if ($dbVer eq '1.02') {
455
+ next if $code =~ /^(PPLH|PPLQ|PPLW)$/; # (stored as "Other" in v1.02)
456
+ $dbVer = '1.03';
457
+ $upgraded = 1; # print upgrade warning
458
+ @featureCodes = @fc103;
459
+ my $i = 0;
460
+ %featureCodes = map { $_ => $i++ } @featureCodes;
461
+ next if $featureCodes{$code};
462
+ }
463
+ push @featureCodes, $code;
464
+ $featureCodes{$code} = $#featureCodes;
465
+ }
466
+ }
467
+ close INFILE;
468
+ print "\b\b\b\bDone.\n";
469
+ warn "Some feature codes not supported by version 1.02, writing as 1.03 instead.\n" if $upgraded;
470
+ }
471
+
472
+ # read feature names
473
+ if (open INFILE, '<', "$dbdir/$featureFile") {
474
+ print "Reading $dbdir/$featureFile\n";
475
+ while (<INFILE>) {
476
+ my @items = split /\t/;
477
+ $items[0] =~ s/^.\.//; # remove feature group and "." separator
478
+ next unless $featureCodes{$items[0]};
479
+ my $name = ucfirst $items[1];
480
+ $name =~ s/ ([a-z])/ \U$1/g;
481
+ $featureNames{$items[0]} = $name;
482
+ }
483
+ close INFILE;
484
+ } else {
485
+ print "Not found: $dbdir/$featureFile\n";
486
+ print "--> Not storing feature type names\n";
487
+ }
488
+
489
+ # read country names
490
+ $i = 0;
491
+ open INFILE, '<', "$dbdir/$countryFile" or die "Error opening $dbdir/$countryFile\n";
492
+ print "Reading $dbdir/$countryFile\n";
493
+ while (<INFILE>) {
494
+ next if /^#/;
495
+ my @items = split /\t/;
496
+ next unless $haveCountry{$items[0]};
497
+ $lang{$items[16]} = { alt => [ ] }; # reference lookup by db number
498
+ $orderCC{$items[0]} = $i++; # (entry 0 is the first country)
499
+ }
500
+ close INFILE;
501
+ printf " %.6d countries (0x%.4x)\n",$i,$i if $verbose;
502
+ die "Too many countries!\n" if $i > 0x100; # (no default 0 entry)
503
+
504
+ # read region (admin1) names
505
+ $i = 0;
506
+ open REGION, '<', "$dbdir/$regionFile" or die "Error opening $dbdir/$regionFile\n";
507
+ print "Reading $dbdir/$regionFile\n";
508
+ while (<REGION>) {
509
+ chomp;
510
+ my @items = split /\t/;
511
+ $items[0] =~ tr/.//d; # (remove "." separator)
512
+ next unless $haveRegion{$items[0]};
513
+ $lang{$items[3]} = { alt => [ ] }; # reference lookup by db number
514
+ $orderRgn{$items[0]} = ++$i; # (entry 0 is default "" region)
515
+ }
516
+ close REGION;
517
+ printf " %.6d regions (0x%.4x)\n",$i,$i if $verbose;
518
+ die "Too many regions!\n" if $i > 0x0fff; # (account for default 0 entry)
519
+
520
+ # read subregion (admin2) names
521
+ $i = 0;
522
+ open ADMIN2, '<', "$dbdir/$admin2File" or die "Error opening $dbdir/$admin2File\n";
523
+ print "Reading $dbdir/$admin2File\n";
524
+ while (<ADMIN2>) {
525
+ chomp;
526
+ my @items = split /\t/;
527
+ $items[0] =~ s/\.//; # (remove first "." separator)
528
+ next unless $haveSubRgn{$items[0]};
529
+ $lang{$items[3]} = { alt => [ ] }; # reference lookup by db number
530
+ $orderSub{$items[0]} = ++$i; # (entry 0 is default "" subregion)
531
+ }
532
+ close ADMIN2;
533
+ printf " %.6d subregions (0x%.4x)\n",$i,$i if $verbose;
534
+ if ($i > ($dbVer eq '1.02' ? 0x7fff : 0xffff)) {
535
+ die "Too many subregions!\n" if $i > 0xffff;
536
+ $dbVer = '1.03';
537
+ warn "Too many subregions for version 1.02, writing as 1.03 instead.\n";
538
+ }
539
+
540
+ # read alternate names file if available
541
+ if (not $noLang and open INFILE, '<', "$dbdir/$altNamesFile") {
542
+ $filesize = GetFileSize(\*INFILE);
543
+ print "Reading $dbdir/$altNamesFile... 0%";
544
+ my %bestPri;
545
+ while (<INFILE>) {
546
+ my $p = int(100 * tell(INFILE) / $filesize + 0.5);
547
+ if ($percent != $p) {
548
+ printf("\b\b\b\b%3d%%", $percent = $p);
549
+ flush STDOUT;
550
+ }
551
+ # items: 0=altID,1=geoID,2=lang,3=alt name,4=preferred,5=short,6=colloquial,7=historic
552
+ my @items = split /\t/;
553
+ my $lkup = $lang{$items[1]} or next;
554
+ my $altList = $lang{$items[1]}{alt};
555
+ my $lng = lc $items[2];
556
+ next if $lng and not $languages{$lng};
557
+ push @$altList, $items[3] unless grep /^\Q$items[3]\E$/i, @$altList;
558
+ next unless $lng;
559
+ my $flags = 0;
560
+ # keep only the best translation for this name for each language
561
+ $items[$_] and $flags |= (1<<($_-4)) foreach 4,5,6,7;
562
+ $flags{$items[1]} = ( $flags{$items[1]} || 0 ) | $flags;
563
+ next if $items[6] or $items[7]; # ignore colloquial and historic names
564
+ my $pri = $items[5] ? 0 : ($items[4] ? 1 : 2); # priority for best type of name
565
+ my $langPri = $bestPri{$lng};
566
+ $langPri or $langPri = $bestPri{$lng} = { };
567
+ next if $$langPri{$items[1]} and $$langPri{$items[1]} > $pri;
568
+ $$langPri{$items[1]} = $pri;
569
+ # save language-specific name for this feature, removing commas
570
+ ($$lkup{$lng} = $items[3]) =~ tr/,//d;
571
+ }
572
+ print "\b\b\b\bDone.\n";
573
+ close INFILE;
574
+ # read alternate feature names
575
+ if (%featureNames) {
576
+ my $lng;
577
+ foreach $lng (@languages) {
578
+ next if $lng eq 'en' or not $languages{$lng};
579
+ my $file = "$dbdir/$featureFile";
580
+ $file =~ s/_en\./_$lng./ or next;
581
+ next unless open INFILE, '<', $file;
582
+ print "Reading $file\n";
583
+ while (<INFILE>) {
584
+ my @items = split /\t/;
585
+ $items[0] =~ s/^.\.//; # remove feature group and "." separator
586
+ next unless $featureNames{$items[0]};
587
+ utf8::decode($items[1]);
588
+ my $name = ucfirst $items[1];
589
+ $name =~ s/ (.)/ \U$1/g;
590
+ # change $name back to byte string
591
+ if ($] >= 5.006 and (eval { require Encode; Encode::is_utf8($name) } or $@)) {
592
+ $name = $@ ? pack('C*',unpack($] < 5.010000 ? 'U0C*' : 'C0C*',$name)) : Encode::encode('utf8',$name);
593
+ }
594
+ next if $name eq $featureNames{$items[0]};
595
+ $featureLang{$lng}{$items[0]} = $name;
596
+ }
597
+ close INFILE;
598
+ }
599
+ }
600
+ } else {
601
+ print "Not found: $dbdir/$altNamesFile\n--> " unless $noLang;
602
+ print "Not writing alternate languages\n";
603
+ $noLang = 1;
604
+ }
605
+
606
+ my (%coords, %langLookups);
607
+
608
+ foreach $dbfile (@dbfiles) {
609
+ my $database = $$dbfile{file};
610
+
611
+ print "Processing database entries... 0%";
612
+ my $i = 0;
613
+ my $num = scalar @{$$dbfile{kept}};
614
+
615
+ foreach (@{$$dbfile{kept}}) {
616
+ my @items = split /\t/;
617
+ my ($lat, $lon) = @items[4,5];
618
+ $lat = int(($lat + 90) / 180 * 0x100000 + 0.5) & 0xfffff;
619
+ $lon = int(($lon + 180) / 360 * 0x100000 + 0.5) & 0xfffff;
620
+ my $coord = pack('nCn',$lat>>4,(($lat&0x0f)<<4)|($lon&0x0f),$lon>>4);;
621
+ # take the city with the highest population if there are
622
+ # multiple cities with the same reduced coordinates
623
+ if ($coords{$coord} and $coords{$coord}[6] >= $items[14]) {
624
+ next;
625
+ }
626
+ # coords=(0.lat,1.lon,2.city,3.cc,4.rgn,5.admin2,6.population,7.timezone,8.feature code,9.alt names)
627
+ my ($altList, $alt);
628
+ die "Internal error\n" unless $lang{$items[0]} and $altList = $lang{$items[0]}{alt};
629
+ if (@$altList) {
630
+ tr/,//d foreach @$altList;
631
+ $alt = join ',', sort @$altList;
632
+ } else {
633
+ $alt = '';
634
+ }
635
+ $coords{$coord} = [ @items[4,5,1,8,10,11,14,17,7] ];
636
+ $coords{$coord}[9] = $alt;
637
+ my $lkup = $lang{$items[0]}; # 0=geoID
638
+ my $key = $items[1]; # 1=city
639
+ $lkup or die "Missing language for geoID $items[0]\n";
640
+ $cityFlags{$flags{$items[0]}} = ($cityFlags{$flags{$items[0]}} || 0) + 1 if defined $flags{$items[0]};
641
+ my $ccLang = $ccLang{$items[8]}; # get country-specific language
642
+ if ($ccLang and $$lkup{$ccLang}) {
643
+ my $lc = $ccLang . '-' . lc($items[8]); # eg. zh-cn
644
+ # add country suffix for this language in this country
645
+ $$lkup{$lc} = $$lkup{$ccLang} unless $$lkup{$lc};
646
+ }
647
+ foreach (@supportedLangs) {
648
+ next unless $$lkup{$_} and $$lkup{$_} ne $key; # (ignore if same)
649
+ $langLookups{$_}{$key} or $langLookups{$_}{$key} = [ ];
650
+ push @{$langLookups{$_}{$key}}, "$items[8]$items[10].$items[11],$$lkup{$_}";
651
+ }
652
+ my $p = int(100 * ++$i / $num + 0.5);
653
+ next if $percent == $p;
654
+ printf("\b\b\b\b%3d%%", $percent = $p);
655
+ flush STDOUT;
656
+ }
657
+ print "\b\b\b\bDone.\n";
658
+ }
659
+
660
+ # write city database
661
+ my $str = $noLang ? '' : " and $outAltNames";
662
+ my @t = localtime;
663
+ my $date = sprintf('%.4d-%.2d-%.2d', $t[5]+1900, $t[4]+1, $t[3]);
664
+ print "Writing $outDir/$outFile (version $dbVer)$str...\n";
665
+ print OUTFILE "Geolocation$dbVer\t",scalar(keys %coords),"\n";
666
+ print OUTFILE "# $date Cities with population $dbfiles[0]{minpop} or greater from geonames.org with a Creative Commons license\n";
667
+
668
+ if ($noLang) {
669
+ unlink "$outDir/$outAltNames";
670
+ } else {
671
+ open ALTOUT, ">$outDir/$outAltNames";
672
+ binmode ALTOUT;
673
+ }
674
+ my (%tz, @tz, %fcodes);
675
+ my $tzNum = 0;
676
+ foreach (sort { $a cmp $b } keys %coords) {
677
+ my $items = $coords{$_};
678
+ # @$items=(0.lat,1.lon,2.city,3.cc,4.rgn,5.admin2,6.population,7.timezone,8.feature code,9.alt names)
679
+ my $iCC = $orderCC{$$items[3]};
680
+ defined $iCC or warn("Unknown country code $$items[3]\n"), next;
681
+ my $iRgn = $orderRgn{"$$items[3]$$items[4]"} || 0;
682
+ my $iSub = $orderSub{"$$items[3]$$items[4].$$items[5]"} || 0;
683
+ my $tn = $tz{$$items[7]};
684
+ unless ($tn) {
685
+ push @tz, $$items[7];
686
+ $tn = $tz{$$items[7]} = $tzNum++;
687
+ }
688
+ # convert population to our binary format
689
+ # Note: format in ActivePerl is "3.1e+004", but "3.1+04" in other Perls,
690
+ # but other Perls will round 34500 to "3.4e+04", so add 1 to get "3.5e+04"
691
+ $$items[6] += 1 if $$items[6] > 100 and not $$items[6] % 10;
692
+ my $pop = sprintf('%.1e',$$items[6]);
693
+ # pack CC, population and region index into a 32-bit integer
694
+ my $code = ($iCC << 24) | (substr($pop,-1,1)<<20) | (substr($pop,0,1)<<16) | (substr($pop,2,1)<<12) | $iRgn;
695
+ $fcodes{$$items[8]} = ($fcodes{$$items[8]} || 0) + 1;
696
+ my $fc = $featureCodes{$$items[8]} || 0;
697
+ # store high bit of timezone index
698
+ if ($tn > 255) {
699
+ if ($dbVer eq '1.02') {
700
+ $iSub |= 0x8000;
701
+ $tn -= 256;
702
+ } else {
703
+ $fc |= 0x80;
704
+ $tn -= 256;
705
+ }
706
+ }
707
+ my $pt = pack('NnCC', $code, $iSub, $tn, $fc);
708
+ $$items[2] =~ tr/,//d; # remove any commas
709
+ print OUTFILE "$_$pt$$items[2]\n";
710
+ next if $noLang;
711
+ $$items[9] =~ tr/,/\n/;
712
+ print ALTOUT $$items[9],"\0";
713
+ }
714
+ my $altSize = 0;
715
+ unless ($noLang) {
716
+ $altSize = tell ALTOUT;
717
+ close ALTOUT;
718
+ }
719
+ print OUTFILE "\0\0\0\0\x01\n"; # section terminator
720
+
721
+ die "Too many time zones!\n" if $tzNum > 0x01ff;
722
+
723
+ if ($verbose) {
724
+ $i = 0;
725
+ print "Features kept:\n";
726
+ foreach (sort keys %fcodes) {
727
+ my $fc = $featureCodes{$_} || 0;
728
+ printf "%6d (%2d) %s\n", $fcodes{$_}, $fc, $_;
729
+ }
730
+ }
731
+
732
+ # write country codes
733
+ open COUNTRY, '<', "$dbdir/$countryFile" or die "Error opening $dbdir/$countryFile\n";
734
+ my %cc;
735
+ while (<COUNTRY>) {
736
+ next if /^#/;
737
+ my @items = split /\t/;
738
+ next unless $haveCountry{$items[0]};
739
+ $cc{$items[4]} = $items[0];
740
+ die "country code error\n" if length $items[0] != 2;
741
+ $items[4] =~ tr/,//d; # remove any commas
742
+ print OUTFILE "$items[0]$items[4]\n";
743
+ if ($lang{$items[16]}) { # (16=geoID)
744
+ my $lkup = $lang{$items[16]};
745
+ my $key = $items[4]; # country name
746
+ $ccFlags{$flags{$items[16]}} = ($ccFlags{$flags{$items[16]}} || 0) + 1 if defined $flags{$items[16]};
747
+ foreach (@supportedLangs) {
748
+ next unless $$lkup{$_} and $$lkup{$_} ne $key; # (ignore if same)
749
+ $langLookups{$_}{$key} or $langLookups{$_}{$key} = [ ];
750
+ push @{$langLookups{$_}{$key}}, ",$$lkup{$_}";
751
+ }
752
+ }
753
+ }
754
+ close COUNTRY;
755
+
756
+ print OUTFILE "\0\0\0\0\x02\n"; # section terminator
757
+
758
+ # write regions
759
+ print OUTFILE "\n"; # (null region)
760
+ open REGION, '<', "$dbdir/$regionFile" or die "Error opening $dbdir/$regionFile\n";
761
+ my %region;
762
+ while (<REGION>) {
763
+ chomp;
764
+ my @items = split /\t/;
765
+ #items: 0=region code, 1=name, 2=ascii, 3=geoID
766
+ $items[0] =~ tr/.//d; # (remove "." separator)
767
+ next unless $haveRegion{$items[0]};
768
+ $region{$items[0]} = $items[1];
769
+ $items[1] =~ tr/,//d; # remove any commas
770
+ print OUTFILE "$items[1]\n";
771
+ if ($lang{$items[3]}) { # (3=geoID)
772
+ my $lkup = $lang{$items[3]};
773
+ my $key = $items[1]; # region name
774
+ my $cc = substr($items[0], 0, 2);
775
+ $rgnFlags{$flags{$items[3]}} = ($rgnFlags{$flags{$items[3]}} || 0) + 1 if defined $flags{$items[3]};
776
+ foreach (@supportedLangs) {
777
+ next unless $$lkup{$_} and $$lkup{$_} ne $key; # (ignore if same)
778
+ $langLookups{$_}{$key} or $langLookups{$_}{$key} = [ ];
779
+ push @{$langLookups{$_}{$key}}, "$cc,$$lkup{$_}";
780
+ }
781
+ }
782
+ }
783
+ close REGION;
784
+
785
+ print OUTFILE "\0\0\0\0\x03\n"; # section terminator
786
+
787
+ # write subregions
788
+ print OUTFILE "\n"; # (null admin2)
789
+ open ADMIN2, '<', "$dbdir/$admin2File" or die "Error opening $dbdir/$admin2File\n";
790
+ my %subregion;
791
+ while (<ADMIN2>) {
792
+ chomp;
793
+ my @items = split /\t/;
794
+ #items: 0=admin2 code, 1=name, 2=ascii, 3=geoID
795
+ $items[0] =~ s/\.//; # (remove first "." separator)
796
+ next unless $haveSubRgn{$items[0]};
797
+ $subregion{$items[0]} = $items[1];
798
+ $items[1] =~ tr/,//d; # remove any commas
799
+ print OUTFILE "$items[1]\n";
800
+ if ($lang{$items[3]}) { # (3=geoID)
801
+ my $lkup = $lang{$items[3]};
802
+ my $key = $items[1]; # region name
803
+ $subFlags{$flags{$items[3]}} = ($subFlags{$flags{$items[3]}} || 0) + 1 if defined $flags{$items[3]};
804
+ my $rc = $items[0];
805
+ $rc =~ s/\..*//; # (remove subregion code)
806
+ foreach (@supportedLangs) {
807
+ next unless $$lkup{$_} and $$lkup{$_} ne $key; # (ignore if same)
808
+ $langLookups{$_}{$key} or $langLookups{$_}{$key} = [ ];
809
+ push @{$langLookups{$_}{$key}}, "$rc,$$lkup{$_}";
810
+ }
811
+ }
812
+ }
813
+ close ADMIN2;
814
+
815
+ print OUTFILE "\0\0\0\0\x04\n"; # section terminator
816
+
817
+ # write timezones
818
+ print OUTFILE $_,"\n" foreach @tz;
819
+
820
+ print OUTFILE "\0\0\0\0\x05\n"; # section terminator
821
+
822
+ # write feature codes and optional names
823
+ foreach (@featureCodes) {
824
+ print OUTFILE $_;
825
+ print OUTFILE ' ', $featureNames{$_} if $featureNames{$_};
826
+ print OUTFILE "\n";
827
+ }
828
+
829
+ # write terminator and close Geolocation.dat
830
+ print OUTFILE "\0\0\0\0\0\n"; # file terminator
831
+ my $outSize = tell OUTFILE;
832
+ close OUTFILE;
833
+
834
+ # write language lookups
835
+ my $langSize = 0;
836
+ my $langDir = "$outDir/$geoLang";
837
+ # delete existing languages
838
+ unlink <"$langDir/*.pm">;
839
+ if ($noLang) {
840
+ rmdir $langDir;
841
+ } else {
842
+ my $n = scalar(keys %langLookups);
843
+ print "Writing $n language files to $outDir/$geoLang...\n";
844
+ mkdir $langDir, 0777;
845
+ my ($lng, $key, $str, $nm, $alt);
846
+ foreach $lng (sort keys %langLookups) { # ($lng = language code)
847
+ my $myLng = $lng;
848
+ $myLng =~ tr/-/_/;
849
+ my $lkup = $langLookups{$lng};
850
+ my $file = "$myLng.pm";
851
+ open OUT, ">$langDir/$file" or die "Error creating $file\n";
852
+ binmode OUT;
853
+ print OUT "# Geolocation language translations for $myLng\n";
854
+ print OUT "#\n# Based on Creative Commons database from geonames.org\n\n";
855
+ print OUT "%Image::ExifTool::GeoLang::${myLng}::Translate = (\n";
856
+ foreach $key (sort keys %$lkup) {
857
+ ($nm = $key) =~ s/'/\\'/g;
858
+ # count entries and use the most common one, then add others with country+region ID's
859
+ # (entries in @$li are of the form: City:"CCRc,Sc,Alt", Sub:"CCRc,Alt", Rgn:"CC,Alt", Country:",Alt")
860
+ # (Rc = region code, Sc = subregion code)
861
+ my $li = $$lkup{$key};
862
+ my %count;
863
+ # sort by popularity of alternate name
864
+ foreach (@$li) {
865
+ my $val = $_;
866
+ $val =~ s/.*?,//;
867
+ $count{$val} = ($count{$val} || 0) + 1;
868
+ }
869
+ my @order = sort { $count{$b} <=> $count{$a} or length($a) <=> length($b) or $a cmp $b } keys %count;
870
+ my $first = 1;
871
+ foreach $alt (@order) {
872
+ foreach (sort @$li) {
873
+ my ($code,$val) = split ',', $_, 2;
874
+ # ($code will be empty for a country name, and 2 characters for a region name,
875
+ # and contain a "." for a city name)
876
+ next unless $val eq $alt; # don't add if alternate name is the same
877
+ die "Backslash in translated name" if $val =~ /\\/;
878
+ $val =~ s/'/\\'/g; # escape single quotes
879
+ if ($first and $val !~ /\(/) { # (don't add general translation if name is qualified with brackets)
880
+ print OUT "\t'$nm' => '$val',\n";
881
+ undef $first;
882
+ last;
883
+ }
884
+ # format for keys in language table
885
+ # City: "CCRgn,Subregion,City", "CCRgn,,City", "CC,City", ",City"
886
+ # Subregion: "CCRgn,Subregion,", "CCRgn,,"
887
+ # Region: "CCRgn,"
888
+ # Country: "CC,"
889
+ # Any: "Name"
890
+ if (not $code) {
891
+ # this is a country
892
+ $code = $cc{$key};
893
+ printf OUT "\t'$code,' => '$val',\n";
894
+ } elsif ($code !~ /\./) {
895
+ # this is a region or subregion
896
+ print OUT "\t'$code$nm,' => '$val',\n";
897
+ } else {
898
+ # this is a city
899
+ # use region/subregions name instead of code
900
+ my $sub = $subregion{$code} || '';
901
+ $sub =~ s/'/\\'/g;
902
+ $code =~ s/\..*//;
903
+ $code = substr($code,0,2) . $region{$code} if $region{$code};
904
+ $code =~ s/'/\\'/g;
905
+ print OUT "\t'$code,$sub,$nm' => '$val',\n";
906
+ }
907
+ }
908
+ }
909
+ }
910
+ if ($featureLang{$lng}) {
911
+ print OUT "\n\t# feature types\n";
912
+ foreach (sort keys %{$featureLang{$lng}}) {
913
+ my $ftype = $featureLang{$lng}{$_};
914
+ $ftype =~ s/'/\\'/g;
915
+ print OUT "\t$_ => '$ftype',\n"
916
+ }
917
+ }
918
+ print OUT ");\n\n1; #end\n";
919
+ $langSize += tell OUT;
920
+ close OUT;
921
+ }
922
+ if ($verbose) {
923
+ my @type = ( City => \%cityFlags, Region => \%rgnFlags, Subregion => \%subFlags, Country => \%ccFlags );
924
+ for (;;) {
925
+ my $type = shift @type or last;
926
+ my $flags = shift @type;
927
+ print "$type flags:\n";
928
+ printf(" 0x%.2x - %d\n", 0, $$flags{0} || 0);
929
+ my @label = qw(preferred short colloquial historic);
930
+ foreach my $bit (0..5) {
931
+ my $n = 0;
932
+ $_ & (1<<$bit) and ++$n foreach keys %$flags;
933
+ printf(" 0x%.2x - %d (%s)\n", (1<<$bit), $n, shift(@label)) if $n;
934
+ }
935
+ }
936
+ }
937
+ }
938
+
939
+ print "Output file size(s):\n";
940
+ printf "%8.2f MB %s (%d entries)\n", $outSize / 1e6, $outFile, scalar(keys %coords);
941
+ printf "%8.2f MB %s\n", $altSize / 1e6, $outAltNames if $altSize;
942
+ printf "%8.2f MB %s/*.pm\n", $langSize / 1e6, $geoLang if $langSize;
943
+ printf "%8.2f MB Total\n", ($outSize + $altSize + $langSize) / 1e6 if $altSize or $langSize;
944
+
945
+ # end