exiftool_vendored 12.82.0 → 12.85.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/Changes +59 -0
- data/bin/MANIFEST +2 -18
- data/bin/META.json +1 -1
- data/bin/META.yml +1 -1
- data/bin/README +47 -46
- data/bin/build_geolocation +945 -0
- data/bin/config_files/example.config +2 -2
- data/bin/exiftool +75 -51
- data/bin/fmt_files/gpx.fmt +2 -1
- data/bin/fmt_files/gpx_wpt.fmt +2 -1
- data/bin/lib/Image/ExifTool/Apple.pm +52 -7
- data/bin/lib/Image/ExifTool/BuildTagLookup.pm +5 -2
- data/bin/lib/Image/ExifTool/Canon.pm +11 -2
- data/bin/lib/Image/ExifTool/CanonVRD.pm +18 -5
- data/bin/lib/Image/ExifTool/DJI.pm +29 -0
- data/bin/lib/Image/ExifTool/DPX.pm +3 -3
- data/bin/lib/Image/ExifTool/Exif.pm +19 -2
- data/bin/lib/Image/ExifTool/GM.pm +17 -8
- data/bin/lib/Image/ExifTool/Geolocation.dat +0 -0
- data/bin/lib/Image/ExifTool/Geolocation.pm +170 -104
- data/bin/lib/Image/ExifTool/Geotag.pm +18 -10
- data/bin/lib/Image/ExifTool/ID3.pm +28 -6
- data/bin/lib/Image/ExifTool/Nikon.pm +25 -6
- data/bin/lib/Image/ExifTool/Pentax.pm +64 -13
- data/bin/lib/Image/ExifTool/QuickTime.pm +26 -7
- data/bin/lib/Image/ExifTool/QuickTimeStream.pl +5 -0
- data/bin/lib/Image/ExifTool/Sony.pm +15 -6
- data/bin/lib/Image/ExifTool/TagLookup.pm +3557 -3540
- data/bin/lib/Image/ExifTool/TagNames.pod +61 -13
- data/bin/lib/Image/ExifTool/WriteQuickTime.pl +4 -2
- data/bin/lib/Image/ExifTool/Writer.pl +166 -134
- data/bin/lib/Image/ExifTool/XMP.pm +2 -0
- data/bin/lib/Image/ExifTool/XMP2.pl +3 -0
- data/bin/lib/Image/ExifTool.pm +59 -27
- data/bin/lib/Image/ExifTool.pod +88 -71
- data/bin/perl-Image-ExifTool.spec +45 -45
- data/lib/exiftool_vendored/version.rb +1 -1
- metadata +3 -20
- data/bin/lib/Image/ExifTool/GeoLang/cs.pm +0 -978
- data/bin/lib/Image/ExifTool/GeoLang/de.pm +0 -1975
- data/bin/lib/Image/ExifTool/GeoLang/en_ca.pm +0 -44
- data/bin/lib/Image/ExifTool/GeoLang/en_gb.pm +0 -124
- data/bin/lib/Image/ExifTool/GeoLang/es.pm +0 -2921
- data/bin/lib/Image/ExifTool/GeoLang/fi.pm +0 -1116
- data/bin/lib/Image/ExifTool/GeoLang/fr.pm +0 -3171
- data/bin/lib/Image/ExifTool/GeoLang/it.pm +0 -2750
- data/bin/lib/Image/ExifTool/GeoLang/ja.pm +0 -10256
- data/bin/lib/Image/ExifTool/GeoLang/ko.pm +0 -4499
- data/bin/lib/Image/ExifTool/GeoLang/nl.pm +0 -1270
- data/bin/lib/Image/ExifTool/GeoLang/pl.pm +0 -3019
- data/bin/lib/Image/ExifTool/GeoLang/ru.pm +0 -18220
- data/bin/lib/Image/ExifTool/GeoLang/sk.pm +0 -441
- data/bin/lib/Image/ExifTool/GeoLang/sv.pm +0 -714
- data/bin/lib/Image/ExifTool/GeoLang/tr.pm +0 -452
- data/bin/lib/Image/ExifTool/GeoLang/zh_cn.pm +0 -2225
- data/bin/lib/Image/ExifTool/GeoLang/zh_tw.pm +0 -72
@@ -0,0 +1,945 @@
|
|
1
|
+
#!/usr/bin/perl -w
|
2
|
+
#-------------------------------------------------------------------------------
|
3
|
+
# File: build_geolocation
|
4
|
+
#
|
5
|
+
# Description: Parse geonames files to create ExifTool geolocation database
|
6
|
+
#
|
7
|
+
# Syntax: build_geolocation [OPTIONS] [DBFILE] ...
|
8
|
+
#
|
9
|
+
# Options: (see -h output)
|
10
|
+
#
|
11
|
+
# Created: 2024-03-03 - P. Harvey
|
12
|
+
# 2024-04-15 - PH Clean up and add options for public release
|
13
|
+
# 2024-04-22 - PH Increased number of possible feature codes from
|
14
|
+
# 32 to 64. Convert backslashes in directory names
|
15
|
+
# 2024-04-24 - PH Fixed problem with population exponent when run
|
16
|
+
# under ActivePerl
|
17
|
+
# 2024-04-29 - PH Added feature types and default to db v1.03
|
18
|
+
#
|
19
|
+
# Notes: Requires these files from https://download.geonames.org/export/
|
20
|
+
#
|
21
|
+
# allCountries.txt (or other input database if specified)
|
22
|
+
# countryInfo.txt
|
23
|
+
# admin1CodesASCII.txt
|
24
|
+
# admin2Codes.txt
|
25
|
+
# featureCodes_XX.txt (optional)
|
26
|
+
# alternateNamesV2.txt (optional)
|
27
|
+
#
|
28
|
+
# Output datbase format (Geolocation.dat):
|
29
|
+
#
|
30
|
+
# Header:
|
31
|
+
# "GeolocationV.VV\tNNNN\n" (V.VV=version, NNNN=num city entries)
|
32
|
+
# "# <comment>\n"
|
33
|
+
# NNNN City entries:
|
34
|
+
# Offset Format Description
|
35
|
+
# 0 int16u - latitude high 16 bits (converted to 0-0x100000 range)
|
36
|
+
# 2 int8u - latitude low 4 bits, longitude low 4 bits
|
37
|
+
# 3 int16u - longitude high 16 bits
|
38
|
+
# 5 int8u - index of country in country list
|
39
|
+
# 6 int8u - 0xf0 = population E exponent (in format "N.Fe+0E"), 0x0f = population N digit
|
40
|
+
# 7 int16u - 0xf000 = population F digit, 0x0fff = index in region list (admin1)
|
41
|
+
# 9 int16u - v1.02: 0x7fff = index in subregion (admin2), 0x8000 = high bit of time zone
|
42
|
+
# 9 int16u - v1.03: index in subregion (admin2)
|
43
|
+
# 11 int8u - low byte of time zone index
|
44
|
+
# 12 int8u - 0x3f = feature code index (see below), v1.03: 0x80 = high bit of time zone
|
45
|
+
# 13 string - UTF8 City name, terminated by newline
|
46
|
+
# "\0\0\0\0\x01"
|
47
|
+
# Country entries:
|
48
|
+
# 1. 2-character country code
|
49
|
+
# 2. Country name, terminated by newline
|
50
|
+
# "\0\0\0\0\x02"
|
51
|
+
# Region entries:
|
52
|
+
# 1. Region name, terminated by newline
|
53
|
+
# "\0\0\0\0\x03"
|
54
|
+
# Subregion entries:
|
55
|
+
# 1. Subregion name, terminated by newline
|
56
|
+
# "\0\0\0\0\x04"
|
57
|
+
# Time zone entries:
|
58
|
+
# 1. Time zone name, terminated by newline
|
59
|
+
# "\0\0\0\0\x05" (feature codes added in v1.03)
|
60
|
+
# Feature codes:
|
61
|
+
# 1. Feature code, optional space-followed-by-feature-name, then newline
|
62
|
+
# "\0\0\0\0\0"
|
63
|
+
#
|
64
|
+
# Feature Codes v1.02: (see http://www.geonames.org/export/codes.html#P for descriptions)
|
65
|
+
#
|
66
|
+
# 0. Other 3. PPLA2 6. PPLA5 9. PPLF 12. PPLR 15. PPLX
|
67
|
+
# 1. PPL 4. PPLA3 7. PPLC 10. PPLG 13. PPLS
|
68
|
+
# 2. PPLA 5. PPLA4 8. PPLCH 11. PPLL 14. STLMT
|
69
|
+
#
|
70
|
+
# Feature Codes v1.03 and later are listed at the end of the database
|
71
|
+
#-------------------------------------------------------------------------------
|
72
|
+
|
73
|
+
use strict;
|
74
|
+
|
75
|
+
my $dbVer = '1.03'; # default output database version
|
76
|
+
my $dbFile = 'allCountries.txt'; # default database file
|
77
|
+
my $countryFile = 'countryInfo.txt'; # mandatory country names file
|
78
|
+
my $regionFile = 'admin1CodesASCII.txt'; # mandatory region names file
|
79
|
+
my $admin2File = 'admin2Codes.txt'; # mandatory subregion names file
|
80
|
+
my $featureFile = 'featureCodes_en.txt'; # optional feature names file
|
81
|
+
my $altNamesFile = 'alternateNamesV2.txt'; # optional alternate names file
|
82
|
+
my $outFile = 'Geolocation.dat'; # output ExifTool database file
|
83
|
+
my $outAltNames = 'AltNames.dat'; # output alternate names file
|
84
|
+
my $outDirName = 'Geolocation_out'; # output directory for database files
|
85
|
+
my $geoLang = 'GeoLang'; # output directory for language files
|
86
|
+
|
87
|
+
my %defaults = (
|
88
|
+
file => $dbFile,
|
89
|
+
minpop => 2000,
|
90
|
+
def_codes => 'PPLA,PPLA2',
|
91
|
+
def_codesp => 'PPL,PPLA,PPLA2,PPLA3,PPLA4,PPLA5,PPLC,PPLCH,PPLF,PPLG,PPLH,PPLL,PPLQ,PPLR,PPLS,PPLW,PPLX,STLMT',
|
92
|
+
);
|
93
|
+
|
94
|
+
# languages to read from geonames database
|
95
|
+
my @languages = qw(cs de en en-ca en-gb es fi fr it ja ko nl pl ru sk sv tr zh zh-cn zh-tw);
|
96
|
+
|
97
|
+
# indices of feature codes (v1.02 is hard-coded in ExifTool)
|
98
|
+
my @fc102 = qw(
|
99
|
+
Other PPL PPLA PPLA2 PPLA3 PPLA4 PPLA5 PPLC PPLCH
|
100
|
+
PPLF PPLG PPLL PPLR PPLS STLMT PPLX
|
101
|
+
);
|
102
|
+
# base features for v1.03+
|
103
|
+
my @fc103 = qw(
|
104
|
+
Other PPL PPLA PPLA2 PPLA3 PPLA4 PPLA5 PPLC PPLCH
|
105
|
+
PPLF PPLG PPLH PPLL PPLQ PPLR PPLS PPLW PPLX STLMT
|
106
|
+
);
|
107
|
+
my $i = 0;
|
108
|
+
my @featureCodes = $dbVer eq '1.02' ? @fc102 : @fc103;
|
109
|
+
my %featureCodes = map { $_ => $i++ } @featureCodes;
|
110
|
+
my %featureNames;
|
111
|
+
|
112
|
+
my ($dbfile, @dbfiles, $outDir, $verbose, $noLang, %needRgn);
|
113
|
+
my %optArgs = ( p => 1, c => 1, cp => 1, l => 1, o => 1, ver => 1 );
|
114
|
+
|
115
|
+
# process command-line arguments
|
116
|
+
my $opts = { };
|
117
|
+
while (@ARGV) {
|
118
|
+
my $opt = shift;
|
119
|
+
if (not $opt =~ s/^-//) {
|
120
|
+
$opt = '.' unless length $opt;
|
121
|
+
$opt =~ tr(\\)(/); # use forward slashes
|
122
|
+
$opt =~ s(/$)(); # remove trailing slash
|
123
|
+
$opt = "$opt/$defaults{file}" if -d $opt;
|
124
|
+
-e $opt or die "Error opening database $opt\n";
|
125
|
+
push @dbfiles, { %defaults, %$opts, file => $opt };
|
126
|
+
$opts = { };
|
127
|
+
next;
|
128
|
+
}
|
129
|
+
my $arg;
|
130
|
+
if ($optArgs{$opt}) {
|
131
|
+
$arg = shift;
|
132
|
+
defined $arg or die "Expecting argument for -$opt option\n";
|
133
|
+
}
|
134
|
+
if ($opt eq 'p') {
|
135
|
+
$arg = uc $arg;
|
136
|
+
if ($arg =~ /=/) {
|
137
|
+
my ($cc, $mp) = split /=/, $arg;
|
138
|
+
$mp =~ /^\d+$/ or die "Expecting number on rhs of '=' for -p option\n";
|
139
|
+
my @cc = split /,/, $cc;
|
140
|
+
foreach $cc (@cc) {
|
141
|
+
$cc =~ /^([A-Z]{2})(\..+)?$/ or die "Invalid country/region '$cc' for -p option\n";
|
142
|
+
$needRgn{$1} = $needRgn{$cc} = 1 if length $cc > 2;
|
143
|
+
$$opts{cc_minpop}{$cc} = $mp;
|
144
|
+
}
|
145
|
+
} else {
|
146
|
+
$arg =~ /^\d+$/ or die "Expecting number for -p option\n";
|
147
|
+
$$opts{minpop} = $arg;
|
148
|
+
}
|
149
|
+
} elsif ($opt =~ /^c(p?)$/) {
|
150
|
+
my $p = $1;
|
151
|
+
$arg = uc $arg;
|
152
|
+
my ($cc, $co);
|
153
|
+
if ($arg =~ /=/) {
|
154
|
+
($cc, $co) = split /=/, $arg;
|
155
|
+
} else {
|
156
|
+
($cc, $co) = ('??', $arg);
|
157
|
+
}
|
158
|
+
my $sign = $co =~ s/^([-+])// ? $1 : '';
|
159
|
+
my @co = split /,/, $co;
|
160
|
+
my @cc = split /,/, $cc;
|
161
|
+
# store lookup for features to keep for each country ('??' = any country)
|
162
|
+
foreach $cc (@cc) {
|
163
|
+
$cc =~ /^([A-Z]{2}|\?\?)(\..+)?$/ or die "Invalid country/region '$cc' for -$opt option\n";
|
164
|
+
$needRgn{$1} = $needRgn{$cc} = 1 if length $cc > 2;
|
165
|
+
if (not $sign) {
|
166
|
+
$$opts{"keep$p"}{$cc} = { };
|
167
|
+
} elsif (not $$opts{"keep$p"}{$cc}) {
|
168
|
+
# start from defaults
|
169
|
+
my %codes = map { $_ => 1 } split /,/, $defaults{"def_codes$p"};
|
170
|
+
$$opts{"keep$p"}{$cc} = \%codes;
|
171
|
+
}
|
172
|
+
foreach $co (@co) {
|
173
|
+
if ($sign eq '-') {
|
174
|
+
delete $$opts{"keep$p"}{$cc}{$co};
|
175
|
+
} else {
|
176
|
+
$$opts{"keep$p"}{$cc}{$co} = 1;
|
177
|
+
}
|
178
|
+
}
|
179
|
+
}
|
180
|
+
} elsif ($opt eq 'l') {
|
181
|
+
$arg = lc $arg;
|
182
|
+
my @langs = split ',', $arg;
|
183
|
+
if (not @langs) {
|
184
|
+
undef @languages;
|
185
|
+
$noLang = 1;
|
186
|
+
} elsif ($langs[0] =~ s/^-//) {
|
187
|
+
@languages = grep !/^$langs[0]$/, @languages foreach @langs;
|
188
|
+
} else {
|
189
|
+
@languages = @langs;
|
190
|
+
}
|
191
|
+
} elsif ($opt eq 'o') {
|
192
|
+
$outDir = $arg;
|
193
|
+
} elsif ($opt eq 'ver') {
|
194
|
+
$dbVer = $arg;
|
195
|
+
$dbVer =~ /^1\.0[23]$/ or die "Unsupported version number $dbVer\n";
|
196
|
+
} elsif ($opt eq 'v') {
|
197
|
+
$verbose = 1;
|
198
|
+
} elsif ($opt eq 'h') {
|
199
|
+
my $defcp = $defaults{def_codesp};
|
200
|
+
$defcp =~ s/(PPLG,)/\n $1/;
|
201
|
+
my $defLang = join ',', @languages;
|
202
|
+
$defLang =~ s/(ja,)/\n $1/;
|
203
|
+
print <<"END";
|
204
|
+
Description: Build ExifTool Geolocation database.
|
205
|
+
|
206
|
+
Syntax: build_geolocation [OPTIONS] [DBFILE] ...
|
207
|
+
|
208
|
+
Options:
|
209
|
+
DBFILE - Input database file name or directory. Multiple input database
|
210
|
+
files may be specified. The -p, -c and -cp options apply to
|
211
|
+
the database that comes after them on the command line.
|
212
|
+
Default is "$dbFile".
|
213
|
+
-p POP - Minimum population for cities to include. POP may be a number
|
214
|
+
or be of the form "CC[,C2...]=###" to set different limits for
|
215
|
+
specific countries/regions, where CC and C2 are country codes
|
216
|
+
with optional region name or code appended after a period (eg.
|
217
|
+
"CA.Ontario,US=500" sets the minimum population to 500 for
|
218
|
+
cities on Ontario Canada or the U.S.). If a region is
|
219
|
+
specified, either the full name or the geonames admin1 code may
|
220
|
+
be used, and case and spaces are not significant. May be
|
221
|
+
multiple -p options for each input DBFILE. Default is "$defaults{minpop}".
|
222
|
+
-c CODE - Feature codes to always include, regardless of population. CODE
|
223
|
+
is a comma-separated list of feature codes, with an optional
|
224
|
+
leading comma-separated list of country/region codes followed
|
225
|
+
by an equals sign to apply only to specific countries. The
|
226
|
+
feature-code list may begin with a dash to remove entries from
|
227
|
+
the default list, or a plus sign to add entries. May be
|
228
|
+
multiple -c options for each intput DBFILE. Country/region and
|
229
|
+
feature names are case insensitive. Default is "$defaults{def_codes}".
|
230
|
+
-cp CODE - Additional features to include if above minimum population.
|
231
|
+
Default is "$defcp".
|
232
|
+
-l LANG - Alternate languages to read from $altNamesFile if
|
233
|
+
available. These are used to generate $outAltNames an the
|
234
|
+
$geoLang files. LANG is a comma-separated list of language
|
235
|
+
codes, starting with a dash to remove items from the default
|
236
|
+
list. May be set to an empty string to disable generation
|
237
|
+
of alternate language files even if $altNamesFile
|
238
|
+
exists. The same set of languages applies to all input
|
239
|
+
database files. Default is "$defLang".
|
240
|
+
-o OUTDIR - Output directory name. Default is the same directory as the
|
241
|
+
first input database file. A directory named $outDirName
|
242
|
+
containing the output files will be created in this directory.
|
243
|
+
-ver VER - Version for output geolocation database (default is $dbVer).
|
244
|
+
-v - Verbose messages.
|
245
|
+
-h - Show this help.
|
246
|
+
|
247
|
+
Input files (download from https://download.geonames.org/export/dump/):
|
248
|
+
$dbFile - default database file (smaller files with names
|
249
|
+
like "cities###.txt" may be specified instead)
|
250
|
+
$countryFile - mandatory country names file
|
251
|
+
$regionFile - mandatory region names file
|
252
|
+
$admin2File - mandatory subregion names file
|
253
|
+
$featureFile - optional feature codes file
|
254
|
+
$altNamesFile - optional alternate names file (must exist to
|
255
|
+
to generate $outAltNames and $geoLang files)
|
256
|
+
|
257
|
+
Output files:
|
258
|
+
$outDirName - default output directory name
|
259
|
+
$outFile - ExifTool database file
|
260
|
+
$outAltNames - alternate names file
|
261
|
+
$geoLang - directory for alternate language files
|
262
|
+
|
263
|
+
Author:
|
264
|
+
Copyright 2024, Phil Harvey
|
265
|
+
|
266
|
+
This is free software; you can redistribute it and/or modify it under the
|
267
|
+
same terms as Perl itself.
|
268
|
+
END
|
269
|
+
exit 0;
|
270
|
+
} else {
|
271
|
+
die "Unknown option '-$opt'\n";
|
272
|
+
}
|
273
|
+
}
|
274
|
+
|
275
|
+
if (@dbfiles) {
|
276
|
+
# apply any remaining options to last database file
|
277
|
+
$dbfiles[-1]{$_} = $$opts{$_} foreach keys %$opts;
|
278
|
+
} else {
|
279
|
+
# use default database file if none specified
|
280
|
+
push @dbfiles, { %defaults, %$opts };
|
281
|
+
unless (-e $dbfiles[0]{file}) {
|
282
|
+
# also look in script directory
|
283
|
+
if ($0 =~ m{(.*)/} and -e "$1/$dbfiles[0]{file}") {
|
284
|
+
$dbfiles[0]{file} = "$1/$dbfiles[0]{file}";
|
285
|
+
} else {
|
286
|
+
die qq(Database "$dbfiles[0]{file}" not found. Use -h option for help.\n);
|
287
|
+
}
|
288
|
+
}
|
289
|
+
}
|
290
|
+
|
291
|
+
# determine our working directory
|
292
|
+
my $dbdir = $dbfiles[0]{file};
|
293
|
+
$dbdir = '.' unless $dbdir =~ s(/[^/]*$)();
|
294
|
+
|
295
|
+
# add default feature code lookups if necessary
|
296
|
+
foreach $dbfile (@dbfiles) {
|
297
|
+
my $p;
|
298
|
+
foreach $p ('', 'p') {
|
299
|
+
next if $$dbfile{"keep$p"}{'??'};
|
300
|
+
my %codes = map { $_ => 1 } split /,/, $defaults{"def_codes$p"};
|
301
|
+
$$dbfile{"keep$p"}{'??'} = \%codes;
|
302
|
+
}
|
303
|
+
}
|
304
|
+
|
305
|
+
# pre-read region file if necessary
|
306
|
+
if (%needRgn) {
|
307
|
+
open REGION, '<', "$dbdir/$regionFile" or die "Error opening $dbdir/$regionFile\n";
|
308
|
+
while (<REGION>) {
|
309
|
+
my @items = split /\t/;
|
310
|
+
my $rgn = $items[0];
|
311
|
+
my ($cc) = split /\./, $rgn;
|
312
|
+
next unless $needRgn{$cc};
|
313
|
+
unless ($needRgn{$rgn}) { # allow region code to be used
|
314
|
+
$rgn = $cc . '.' . uc$items[1]; # also support full region name
|
315
|
+
unless ($needRgn{$rgn}) {
|
316
|
+
$rgn =~ tr/ //d;
|
317
|
+
next unless $needRgn{$rgn}; # also allow no spaces
|
318
|
+
}
|
319
|
+
}
|
320
|
+
$needRgn{$rgn} = [$items[0], "$cc.$items[1]"];
|
321
|
+
}
|
322
|
+
close REGION;
|
323
|
+
foreach (sort keys %needRgn) {
|
324
|
+
next if length == 2;
|
325
|
+
die "No matching region for $_\n" unless ref $needRgn{$_};
|
326
|
+
}
|
327
|
+
}
|
328
|
+
|
329
|
+
if ($verbose) {
|
330
|
+
my $langs = join ',', @languages;
|
331
|
+
$langs or $langs = '<none>';
|
332
|
+
print "Languages to read from input database(s):\n $langs\n";
|
333
|
+
foreach $dbfile (@dbfiles) {
|
334
|
+
print "Parameters for reading $$dbfile{file}:\n";
|
335
|
+
print " Minimum populations (??=any country):\n";
|
336
|
+
print " ??=$$dbfile{minpop}\n";
|
337
|
+
foreach (reverse sort keys %{$$dbfile{cc_minpop}}) {
|
338
|
+
my $cc = ref $needRgn{$_} ? $needRgn{$_}[1] : $_;
|
339
|
+
print " $cc=$$dbfile{cc_minpop}{$_}\n";
|
340
|
+
}
|
341
|
+
print " Features to keep regardless of population:\n";
|
342
|
+
foreach (reverse sort keys %{$$dbfile{keep}}) {
|
343
|
+
my $cc = ref $needRgn{$_} ? $needRgn{$_}[1] : $_;
|
344
|
+
print " $cc=",join(',', sort keys %{$$dbfile{keep}{$_}}), "\n";
|
345
|
+
}
|
346
|
+
print " Features to keep for population >= minimum:\n";
|
347
|
+
foreach (reverse sort keys %{$$dbfile{keepp}}) {
|
348
|
+
my $cc = ref $needRgn{$_} ? $needRgn{$_}[1] : $_;
|
349
|
+
print " $_=",join(',', sort keys %{$$dbfile{keepp}{$_}}), "\n";
|
350
|
+
}
|
351
|
+
}
|
352
|
+
}
|
353
|
+
|
354
|
+
# translate option region arguments to region codes
|
355
|
+
foreach $dbfile (@dbfiles) {
|
356
|
+
my ($type, $cc);
|
357
|
+
foreach $type (qw(cc_minpop keep keepp)) {
|
358
|
+
my @cc = keys %{$$dbfile{$type}};
|
359
|
+
foreach $cc (@cc) {
|
360
|
+
next unless ref $needRgn{$cc};
|
361
|
+
my $tmp = $$dbfile{$type}{$cc};
|
362
|
+
delete $$dbfile{$type}{$cc};
|
363
|
+
$$dbfile{$type}{$needRgn{$cc}[0]} = $tmp;
|
364
|
+
}
|
365
|
+
}
|
366
|
+
}
|
367
|
+
|
368
|
+
$outDir = "$dbdir/$outDirName" unless defined $outDir;
|
369
|
+
-d $outDir or mkdir $outDir, 0777 or die "Error creating output directory '$outDir'\n";
|
370
|
+
-e "$dbdir/$_" or die "Missing input file $dbdir/$_\n" foreach $countryFile, $regionFile, $admin2File;
|
371
|
+
|
372
|
+
# order of country codes, region names and subregions in database
|
373
|
+
my (%orderCC, %orderRgn, %orderSub);
|
374
|
+
|
375
|
+
# languages to read from geonames database (converted to lower case)
|
376
|
+
my %languages = map { $_ => 1 } @languages;
|
377
|
+
|
378
|
+
# language codes supported by ExifTool
|
379
|
+
my @supportedLangs = qw(cs de en-ca en-gb es fi fr it ja ko nl pl ru sk sv tr zh-cn zh-tw);
|
380
|
+
|
381
|
+
# supported country-specific languages
|
382
|
+
my %ccLang = ( TW => 'zh', CN => 'zh', CA => 'en', GB => 'en' );
|
383
|
+
my (%lang, %featureLang, %haveCountry, %cityFlags, %rgnFlags, %subFlags, %ccFlags, %flags);
|
384
|
+
my (%haveRegion, %haveSubRgn, $filesize, $percent);
|
385
|
+
|
386
|
+
sub GetFileSize($)
|
387
|
+
{
|
388
|
+
my $file = shift;
|
389
|
+
seek $file, 0, 2 or die "Seek error\n";
|
390
|
+
my $size = tell $file;
|
391
|
+
seek $file, 0, 0 or die "Seek error\n";
|
392
|
+
return $size;
|
393
|
+
}
|
394
|
+
|
395
|
+
# pre-scan database to determine which countries, regions subregions and
|
396
|
+
# feature codes we will be using
|
397
|
+
foreach $dbfile (@dbfiles) {
|
398
|
+
my $database = $$dbfile{file};
|
399
|
+
my $upgraded;
|
400
|
+
|
401
|
+
print "Reading $database... 0%";
|
402
|
+
flush STDOUT;
|
403
|
+
|
404
|
+
# pre-read the files to initialize necessary variables
|
405
|
+
open INFILE, '<', $database or die "Error opening $database\n";
|
406
|
+
$filesize = GetFileSize(\*INFILE);
|
407
|
+
|
408
|
+
open OUTFILE, '>', "$outDir/$outFile" or die "Error creating $outFile in $outDir\n";
|
409
|
+
binmode(OUTFILE);
|
410
|
+
|
411
|
+
$$dbfile{kept} = [ ];
|
412
|
+
$percent = -1;
|
413
|
+
while (<INFILE>) {
|
414
|
+
my $p = int(100 * tell(INFILE) / $filesize + 0.5);
|
415
|
+
if ($percent != $p) {
|
416
|
+
printf("\b\b\b\b%3d%%", $percent = $p);
|
417
|
+
flush STDOUT;
|
418
|
+
}
|
419
|
+
my @items = split /\t/;
|
420
|
+
my ($dbnum, $code, $cc, $rgn, $sub, $pop) = @items[0,7,8,10,11,14];
|
421
|
+
next unless @items > 17 and $cc =~ /^[A-Z]{2}$/;
|
422
|
+
my ($minpop, $keep);
|
423
|
+
if ($needRgn{$cc} and defined $$dbfile{cc_minpop}{"$cc$rgn"}) {
|
424
|
+
$minpop = $$dbfile{cc_minpop}{"$cc$rgn"};
|
425
|
+
} elsif (defined $$dbfile{cc_minpop}{$cc}) {
|
426
|
+
$minpop = $$dbfile{cc_minpop}{$cc};
|
427
|
+
} else {
|
428
|
+
$minpop = $$dbfile{minpop};
|
429
|
+
}
|
430
|
+
# keep regardless of population
|
431
|
+
if ($needRgn{$cc} and $$dbfile{keep}{"$cc$rgn"}) {
|
432
|
+
$keep = $$dbfile{keep}{"$cc$rgn"}{$code};
|
433
|
+
} elsif ($$dbfile{keep}{$cc}) {
|
434
|
+
$keep = $$dbfile{keep}{$cc}{$code};
|
435
|
+
} else {
|
436
|
+
$keep = $$dbfile{keep}{'??'}{$code};
|
437
|
+
}
|
438
|
+
if ($pop < $minpop) {
|
439
|
+
next unless $keep;
|
440
|
+
} elsif ($needRgn{$cc} and $$dbfile{keepp}{"$cc$rgn"}) {
|
441
|
+
next unless $$dbfile{keepp}{"$cc$rgn"}{$code};
|
442
|
+
} elsif ($$dbfile{keepp}{$cc}) {
|
443
|
+
next unless $$dbfile{keepp}{$cc}{$code};
|
444
|
+
} else {
|
445
|
+
next unless $$dbfile{keepp}{'??'}{$code};
|
446
|
+
}
|
447
|
+
push @{$$dbfile{kept}}, $_;
|
448
|
+
$lang{$dbnum} = { alt => [ ] };
|
449
|
+
$haveCountry{$cc} = 1;
|
450
|
+
$haveRegion{"$cc$rgn"} = 1;
|
451
|
+
$haveSubRgn{"$cc$rgn.$sub"} = 1;
|
452
|
+
# add new feature codes (up to maximum index of 0x3f)
|
453
|
+
unless ($featureCodes{$code} or @featureCodes > 0x3f) {
|
454
|
+
if ($dbVer eq '1.02') {
|
455
|
+
next if $code =~ /^(PPLH|PPLQ|PPLW)$/; # (stored as "Other" in v1.02)
|
456
|
+
$dbVer = '1.03';
|
457
|
+
$upgraded = 1; # print upgrade warning
|
458
|
+
@featureCodes = @fc103;
|
459
|
+
my $i = 0;
|
460
|
+
%featureCodes = map { $_ => $i++ } @featureCodes;
|
461
|
+
next if $featureCodes{$code};
|
462
|
+
}
|
463
|
+
push @featureCodes, $code;
|
464
|
+
$featureCodes{$code} = $#featureCodes;
|
465
|
+
}
|
466
|
+
}
|
467
|
+
close INFILE;
|
468
|
+
print "\b\b\b\bDone.\n";
|
469
|
+
warn "Some feature codes not supported by version 1.02, writing as 1.03 instead.\n" if $upgraded;
|
470
|
+
}
|
471
|
+
|
472
|
+
# read feature names
|
473
|
+
if (open INFILE, '<', "$dbdir/$featureFile") {
|
474
|
+
print "Reading $dbdir/$featureFile\n";
|
475
|
+
while (<INFILE>) {
|
476
|
+
my @items = split /\t/;
|
477
|
+
$items[0] =~ s/^.\.//; # remove feature group and "." separator
|
478
|
+
next unless $featureCodes{$items[0]};
|
479
|
+
my $name = ucfirst $items[1];
|
480
|
+
$name =~ s/ ([a-z])/ \U$1/g;
|
481
|
+
$featureNames{$items[0]} = $name;
|
482
|
+
}
|
483
|
+
close INFILE;
|
484
|
+
} else {
|
485
|
+
print "Not found: $dbdir/$featureFile\n";
|
486
|
+
print "--> Not storing feature type names\n";
|
487
|
+
}
|
488
|
+
|
489
|
+
# read country names
|
490
|
+
$i = 0;
|
491
|
+
open INFILE, '<', "$dbdir/$countryFile" or die "Error opening $dbdir/$countryFile\n";
|
492
|
+
print "Reading $dbdir/$countryFile\n";
|
493
|
+
while (<INFILE>) {
|
494
|
+
next if /^#/;
|
495
|
+
my @items = split /\t/;
|
496
|
+
next unless $haveCountry{$items[0]};
|
497
|
+
$lang{$items[16]} = { alt => [ ] }; # reference lookup by db number
|
498
|
+
$orderCC{$items[0]} = $i++; # (entry 0 is the first country)
|
499
|
+
}
|
500
|
+
close INFILE;
|
501
|
+
printf " %.6d countries (0x%.4x)\n",$i,$i if $verbose;
|
502
|
+
die "Too many countries!\n" if $i > 0x100; # (no default 0 entry)
|
503
|
+
|
504
|
+
# read region (admin1) names
|
505
|
+
$i = 0;
|
506
|
+
open REGION, '<', "$dbdir/$regionFile" or die "Error opening $dbdir/$regionFile\n";
|
507
|
+
print "Reading $dbdir/$regionFile\n";
|
508
|
+
while (<REGION>) {
|
509
|
+
chomp;
|
510
|
+
my @items = split /\t/;
|
511
|
+
$items[0] =~ tr/.//d; # (remove "." separator)
|
512
|
+
next unless $haveRegion{$items[0]};
|
513
|
+
$lang{$items[3]} = { alt => [ ] }; # reference lookup by db number
|
514
|
+
$orderRgn{$items[0]} = ++$i; # (entry 0 is default "" region)
|
515
|
+
}
|
516
|
+
close REGION;
|
517
|
+
printf " %.6d regions (0x%.4x)\n",$i,$i if $verbose;
|
518
|
+
die "Too many regions!\n" if $i > 0x0fff; # (account for default 0 entry)
|
519
|
+
|
520
|
+
# read subregion (admin2) names
|
521
|
+
$i = 0;
|
522
|
+
open ADMIN2, '<', "$dbdir/$admin2File" or die "Error opening $dbdir/$admin2File\n";
|
523
|
+
print "Reading $dbdir/$admin2File\n";
|
524
|
+
while (<ADMIN2>) {
|
525
|
+
chomp;
|
526
|
+
my @items = split /\t/;
|
527
|
+
$items[0] =~ s/\.//; # (remove first "." separator)
|
528
|
+
next unless $haveSubRgn{$items[0]};
|
529
|
+
$lang{$items[3]} = { alt => [ ] }; # reference lookup by db number
|
530
|
+
$orderSub{$items[0]} = ++$i; # (entry 0 is default "" subregion)
|
531
|
+
}
|
532
|
+
close ADMIN2;
|
533
|
+
printf " %.6d subregions (0x%.4x)\n",$i,$i if $verbose;
|
534
|
+
if ($i > ($dbVer eq '1.02' ? 0x7fff : 0xffff)) {
|
535
|
+
die "Too many subregions!\n" if $i > 0xffff;
|
536
|
+
$dbVer = '1.03';
|
537
|
+
warn "Too many subregions for version 1.02, writing as 1.03 instead.\n";
|
538
|
+
}
|
539
|
+
|
540
|
+
# read alternate names file if available
|
541
|
+
if (not $noLang and open INFILE, '<', "$dbdir/$altNamesFile") {
|
542
|
+
$filesize = GetFileSize(\*INFILE);
|
543
|
+
print "Reading $dbdir/$altNamesFile... 0%";
|
544
|
+
my %bestPri;
|
545
|
+
while (<INFILE>) {
|
546
|
+
my $p = int(100 * tell(INFILE) / $filesize + 0.5);
|
547
|
+
if ($percent != $p) {
|
548
|
+
printf("\b\b\b\b%3d%%", $percent = $p);
|
549
|
+
flush STDOUT;
|
550
|
+
}
|
551
|
+
# items: 0=altID,1=geoID,2=lang,3=alt name,4=preferred,5=short,6=colloquial,7=historic
|
552
|
+
my @items = split /\t/;
|
553
|
+
my $lkup = $lang{$items[1]} or next;
|
554
|
+
my $altList = $lang{$items[1]}{alt};
|
555
|
+
my $lng = lc $items[2];
|
556
|
+
next if $lng and not $languages{$lng};
|
557
|
+
push @$altList, $items[3] unless grep /^\Q$items[3]\E$/i, @$altList;
|
558
|
+
next unless $lng;
|
559
|
+
my $flags = 0;
|
560
|
+
# keep only the best translation for this name for each language
|
561
|
+
$items[$_] and $flags |= (1<<($_-4)) foreach 4,5,6,7;
|
562
|
+
$flags{$items[1]} = ( $flags{$items[1]} || 0 ) | $flags;
|
563
|
+
next if $items[6] or $items[7]; # ignore colloquial and historic names
|
564
|
+
my $pri = $items[5] ? 0 : ($items[4] ? 1 : 2); # priority for best type of name
|
565
|
+
my $langPri = $bestPri{$lng};
|
566
|
+
$langPri or $langPri = $bestPri{$lng} = { };
|
567
|
+
next if $$langPri{$items[1]} and $$langPri{$items[1]} > $pri;
|
568
|
+
$$langPri{$items[1]} = $pri;
|
569
|
+
# save language-specific name for this feature, removing commas
|
570
|
+
($$lkup{$lng} = $items[3]) =~ tr/,//d;
|
571
|
+
}
|
572
|
+
print "\b\b\b\bDone.\n";
|
573
|
+
close INFILE;
|
574
|
+
# read alternate feature names
|
575
|
+
if (%featureNames) {
|
576
|
+
my $lng;
|
577
|
+
foreach $lng (@languages) {
|
578
|
+
next if $lng eq 'en' or not $languages{$lng};
|
579
|
+
my $file = "$dbdir/$featureFile";
|
580
|
+
$file =~ s/_en\./_$lng./ or next;
|
581
|
+
next unless open INFILE, '<', $file;
|
582
|
+
print "Reading $file\n";
|
583
|
+
while (<INFILE>) {
|
584
|
+
my @items = split /\t/;
|
585
|
+
$items[0] =~ s/^.\.//; # remove feature group and "." separator
|
586
|
+
next unless $featureNames{$items[0]};
|
587
|
+
utf8::decode($items[1]);
|
588
|
+
my $name = ucfirst $items[1];
|
589
|
+
$name =~ s/ (.)/ \U$1/g;
|
590
|
+
# change $name back to byte string
|
591
|
+
if ($] >= 5.006 and (eval { require Encode; Encode::is_utf8($name) } or $@)) {
|
592
|
+
$name = $@ ? pack('C*',unpack($] < 5.010000 ? 'U0C*' : 'C0C*',$name)) : Encode::encode('utf8',$name);
|
593
|
+
}
|
594
|
+
next if $name eq $featureNames{$items[0]};
|
595
|
+
$featureLang{$lng}{$items[0]} = $name;
|
596
|
+
}
|
597
|
+
close INFILE;
|
598
|
+
}
|
599
|
+
}
|
600
|
+
} else {
|
601
|
+
print "Not found: $dbdir/$altNamesFile\n--> " unless $noLang;
|
602
|
+
print "Not writing alternate languages\n";
|
603
|
+
$noLang = 1;
|
604
|
+
}
|
605
|
+
|
606
|
+
my (%coords, %langLookups);
|
607
|
+
|
608
|
+
foreach $dbfile (@dbfiles) {
|
609
|
+
my $database = $$dbfile{file};
|
610
|
+
|
611
|
+
print "Processing database entries... 0%";
|
612
|
+
my $i = 0;
|
613
|
+
my $num = scalar @{$$dbfile{kept}};
|
614
|
+
|
615
|
+
foreach (@{$$dbfile{kept}}) {
|
616
|
+
my @items = split /\t/;
|
617
|
+
my ($lat, $lon) = @items[4,5];
|
618
|
+
$lat = int(($lat + 90) / 180 * 0x100000 + 0.5) & 0xfffff;
|
619
|
+
$lon = int(($lon + 180) / 360 * 0x100000 + 0.5) & 0xfffff;
|
620
|
+
my $coord = pack('nCn',$lat>>4,(($lat&0x0f)<<4)|($lon&0x0f),$lon>>4);;
|
621
|
+
# take the city with the highest population if there are
|
622
|
+
# multiple cities with the same reduced coordinates
|
623
|
+
if ($coords{$coord} and $coords{$coord}[6] >= $items[14]) {
|
624
|
+
next;
|
625
|
+
}
|
626
|
+
# coords=(0.lat,1.lon,2.city,3.cc,4.rgn,5.admin2,6.population,7.timezone,8.feature code,9.alt names)
|
627
|
+
my ($altList, $alt);
|
628
|
+
die "Internal error\n" unless $lang{$items[0]} and $altList = $lang{$items[0]}{alt};
|
629
|
+
if (@$altList) {
|
630
|
+
tr/,//d foreach @$altList;
|
631
|
+
$alt = join ',', sort @$altList;
|
632
|
+
} else {
|
633
|
+
$alt = '';
|
634
|
+
}
|
635
|
+
$coords{$coord} = [ @items[4,5,1,8,10,11,14,17,7] ];
|
636
|
+
$coords{$coord}[9] = $alt;
|
637
|
+
my $lkup = $lang{$items[0]}; # 0=geoID
|
638
|
+
my $key = $items[1]; # 1=city
|
639
|
+
$lkup or die "Missing language for geoID $items[0]\n";
|
640
|
+
$cityFlags{$flags{$items[0]}} = ($cityFlags{$flags{$items[0]}} || 0) + 1 if defined $flags{$items[0]};
|
641
|
+
my $ccLang = $ccLang{$items[8]}; # get country-specific language
|
642
|
+
if ($ccLang and $$lkup{$ccLang}) {
|
643
|
+
my $lc = $ccLang . '-' . lc($items[8]); # eg. zh-cn
|
644
|
+
# add country suffix for this language in this country
|
645
|
+
$$lkup{$lc} = $$lkup{$ccLang} unless $$lkup{$lc};
|
646
|
+
}
|
647
|
+
foreach (@supportedLangs) {
|
648
|
+
next unless $$lkup{$_} and $$lkup{$_} ne $key; # (ignore if same)
|
649
|
+
$langLookups{$_}{$key} or $langLookups{$_}{$key} = [ ];
|
650
|
+
push @{$langLookups{$_}{$key}}, "$items[8]$items[10].$items[11],$$lkup{$_}";
|
651
|
+
}
|
652
|
+
my $p = int(100 * ++$i / $num + 0.5);
|
653
|
+
next if $percent == $p;
|
654
|
+
printf("\b\b\b\b%3d%%", $percent = $p);
|
655
|
+
flush STDOUT;
|
656
|
+
}
|
657
|
+
print "\b\b\b\bDone.\n";
|
658
|
+
}
|
659
|
+
|
660
|
+
# write city database
|
661
|
+
my $str = $noLang ? '' : " and $outAltNames";
|
662
|
+
my @t = localtime;
|
663
|
+
my $date = sprintf('%.4d-%.2d-%.2d', $t[5]+1900, $t[4]+1, $t[3]);
|
664
|
+
print "Writing $outDir/$outFile (version $dbVer)$str...\n";
|
665
|
+
print OUTFILE "Geolocation$dbVer\t",scalar(keys %coords),"\n";
|
666
|
+
print OUTFILE "# $date Cities with population $dbfiles[0]{minpop} or greater from geonames.org with a Creative Commons license\n";
|
667
|
+
|
668
|
+
if ($noLang) {
|
669
|
+
unlink "$outDir/$outAltNames";
|
670
|
+
} else {
|
671
|
+
open ALTOUT, ">$outDir/$outAltNames";
|
672
|
+
binmode ALTOUT;
|
673
|
+
}
|
674
|
+
my (%tz, @tz, %fcodes);
|
675
|
+
my $tzNum = 0;
|
676
|
+
foreach (sort { $a cmp $b } keys %coords) {
|
677
|
+
my $items = $coords{$_};
|
678
|
+
# @$items=(0.lat,1.lon,2.city,3.cc,4.rgn,5.admin2,6.population,7.timezone,8.feature code,9.alt names)
|
679
|
+
my $iCC = $orderCC{$$items[3]};
|
680
|
+
defined $iCC or warn("Unknown country code $$items[3]\n"), next;
|
681
|
+
my $iRgn = $orderRgn{"$$items[3]$$items[4]"} || 0;
|
682
|
+
my $iSub = $orderSub{"$$items[3]$$items[4].$$items[5]"} || 0;
|
683
|
+
my $tn = $tz{$$items[7]};
|
684
|
+
unless ($tn) {
|
685
|
+
push @tz, $$items[7];
|
686
|
+
$tn = $tz{$$items[7]} = $tzNum++;
|
687
|
+
}
|
688
|
+
# convert population to our binary format
|
689
|
+
# Note: format in ActivePerl is "3.1e+004", but "3.1+04" in other Perls,
|
690
|
+
# but other Perls will round 34500 to "3.4e+04", so add 1 to get "3.5e+04"
|
691
|
+
$$items[6] += 1 if $$items[6] > 100 and not $$items[6] % 10;
|
692
|
+
my $pop = sprintf('%.1e',$$items[6]);
|
693
|
+
# pack CC, population and region index into a 32-bit integer
|
694
|
+
my $code = ($iCC << 24) | (substr($pop,-1,1)<<20) | (substr($pop,0,1)<<16) | (substr($pop,2,1)<<12) | $iRgn;
|
695
|
+
$fcodes{$$items[8]} = ($fcodes{$$items[8]} || 0) + 1;
|
696
|
+
my $fc = $featureCodes{$$items[8]} || 0;
|
697
|
+
# store high bit of timezone index
|
698
|
+
if ($tn > 255) {
|
699
|
+
if ($dbVer eq '1.02') {
|
700
|
+
$iSub |= 0x8000;
|
701
|
+
$tn -= 256;
|
702
|
+
} else {
|
703
|
+
$fc |= 0x80;
|
704
|
+
$tn -= 256;
|
705
|
+
}
|
706
|
+
}
|
707
|
+
my $pt = pack('NnCC', $code, $iSub, $tn, $fc);
|
708
|
+
$$items[2] =~ tr/,//d; # remove any commas
|
709
|
+
print OUTFILE "$_$pt$$items[2]\n";
|
710
|
+
next if $noLang;
|
711
|
+
$$items[9] =~ tr/,/\n/;
|
712
|
+
print ALTOUT $$items[9],"\0";
|
713
|
+
}
|
714
|
+
my $altSize = 0;
|
715
|
+
unless ($noLang) {
|
716
|
+
$altSize = tell ALTOUT;
|
717
|
+
close ALTOUT;
|
718
|
+
}
|
719
|
+
print OUTFILE "\0\0\0\0\x01\n"; # section terminator
|
720
|
+
|
721
|
+
die "Too many time zones!\n" if $tzNum > 0x01ff;
|
722
|
+
|
723
|
+
if ($verbose) {
|
724
|
+
$i = 0;
|
725
|
+
print "Features kept:\n";
|
726
|
+
foreach (sort keys %fcodes) {
|
727
|
+
my $fc = $featureCodes{$_} || 0;
|
728
|
+
printf "%6d (%2d) %s\n", $fcodes{$_}, $fc, $_;
|
729
|
+
}
|
730
|
+
}
|
731
|
+
|
732
|
+
# write country codes
|
733
|
+
open COUNTRY, '<', "$dbdir/$countryFile" or die "Error opening $dbdir/$countryFile\n";
|
734
|
+
my %cc;
|
735
|
+
while (<COUNTRY>) {
|
736
|
+
next if /^#/;
|
737
|
+
my @items = split /\t/;
|
738
|
+
next unless $haveCountry{$items[0]};
|
739
|
+
$cc{$items[4]} = $items[0];
|
740
|
+
die "country code error\n" if length $items[0] != 2;
|
741
|
+
$items[4] =~ tr/,//d; # remove any commas
|
742
|
+
print OUTFILE "$items[0]$items[4]\n";
|
743
|
+
if ($lang{$items[16]}) { # (16=geoID)
|
744
|
+
my $lkup = $lang{$items[16]};
|
745
|
+
my $key = $items[4]; # country name
|
746
|
+
$ccFlags{$flags{$items[16]}} = ($ccFlags{$flags{$items[16]}} || 0) + 1 if defined $flags{$items[16]};
|
747
|
+
foreach (@supportedLangs) {
|
748
|
+
next unless $$lkup{$_} and $$lkup{$_} ne $key; # (ignore if same)
|
749
|
+
$langLookups{$_}{$key} or $langLookups{$_}{$key} = [ ];
|
750
|
+
push @{$langLookups{$_}{$key}}, ",$$lkup{$_}";
|
751
|
+
}
|
752
|
+
}
|
753
|
+
}
|
754
|
+
close COUNTRY;
|
755
|
+
|
756
|
+
print OUTFILE "\0\0\0\0\x02\n"; # section terminator
|
757
|
+
|
758
|
+
# write regions
|
759
|
+
print OUTFILE "\n"; # (null region)
|
760
|
+
open REGION, '<', "$dbdir/$regionFile" or die "Error opening $dbdir/$regionFile\n";
|
761
|
+
my %region;
|
762
|
+
while (<REGION>) {
|
763
|
+
chomp;
|
764
|
+
my @items = split /\t/;
|
765
|
+
#items: 0=region code, 1=name, 2=ascii, 3=geoID
|
766
|
+
$items[0] =~ tr/.//d; # (remove "." separator)
|
767
|
+
next unless $haveRegion{$items[0]};
|
768
|
+
$region{$items[0]} = $items[1];
|
769
|
+
$items[1] =~ tr/,//d; # remove any commas
|
770
|
+
print OUTFILE "$items[1]\n";
|
771
|
+
if ($lang{$items[3]}) { # (3=geoID)
|
772
|
+
my $lkup = $lang{$items[3]};
|
773
|
+
my $key = $items[1]; # region name
|
774
|
+
my $cc = substr($items[0], 0, 2);
|
775
|
+
$rgnFlags{$flags{$items[3]}} = ($rgnFlags{$flags{$items[3]}} || 0) + 1 if defined $flags{$items[3]};
|
776
|
+
foreach (@supportedLangs) {
|
777
|
+
next unless $$lkup{$_} and $$lkup{$_} ne $key; # (ignore if same)
|
778
|
+
$langLookups{$_}{$key} or $langLookups{$_}{$key} = [ ];
|
779
|
+
push @{$langLookups{$_}{$key}}, "$cc,$$lkup{$_}";
|
780
|
+
}
|
781
|
+
}
|
782
|
+
}
|
783
|
+
close REGION;
|
784
|
+
|
785
|
+
print OUTFILE "\0\0\0\0\x03\n"; # section terminator
|
786
|
+
|
787
|
+
# write subregions
|
788
|
+
print OUTFILE "\n"; # (null admin2)
|
789
|
+
open ADMIN2, '<', "$dbdir/$admin2File" or die "Error opening $dbdir/$admin2File\n";
|
790
|
+
my %subregion;
|
791
|
+
while (<ADMIN2>) {
|
792
|
+
chomp;
|
793
|
+
my @items = split /\t/;
|
794
|
+
#items: 0=admin2 code, 1=name, 2=ascii, 3=geoID
|
795
|
+
$items[0] =~ s/\.//; # (remove first "." separator)
|
796
|
+
next unless $haveSubRgn{$items[0]};
|
797
|
+
$subregion{$items[0]} = $items[1];
|
798
|
+
$items[1] =~ tr/,//d; # remove any commas
|
799
|
+
print OUTFILE "$items[1]\n";
|
800
|
+
if ($lang{$items[3]}) { # (3=geoID)
|
801
|
+
my $lkup = $lang{$items[3]};
|
802
|
+
my $key = $items[1]; # region name
|
803
|
+
$subFlags{$flags{$items[3]}} = ($subFlags{$flags{$items[3]}} || 0) + 1 if defined $flags{$items[3]};
|
804
|
+
my $rc = $items[0];
|
805
|
+
$rc =~ s/\..*//; # (remove subregion code)
|
806
|
+
foreach (@supportedLangs) {
|
807
|
+
next unless $$lkup{$_} and $$lkup{$_} ne $key; # (ignore if same)
|
808
|
+
$langLookups{$_}{$key} or $langLookups{$_}{$key} = [ ];
|
809
|
+
push @{$langLookups{$_}{$key}}, "$rc,$$lkup{$_}";
|
810
|
+
}
|
811
|
+
}
|
812
|
+
}
|
813
|
+
close ADMIN2;
|
814
|
+
|
815
|
+
print OUTFILE "\0\0\0\0\x04\n"; # section terminator
|
816
|
+
|
817
|
+
# write timezones
|
818
|
+
print OUTFILE $_,"\n" foreach @tz;
|
819
|
+
|
820
|
+
print OUTFILE "\0\0\0\0\x05\n"; # section terminator
|
821
|
+
|
822
|
+
# write feature codes and optional names
|
823
|
+
foreach (@featureCodes) {
|
824
|
+
print OUTFILE $_;
|
825
|
+
print OUTFILE ' ', $featureNames{$_} if $featureNames{$_};
|
826
|
+
print OUTFILE "\n";
|
827
|
+
}
|
828
|
+
|
829
|
+
# write terminator and close Geolocation.dat
|
830
|
+
print OUTFILE "\0\0\0\0\0\n"; # file terminator
|
831
|
+
my $outSize = tell OUTFILE;
|
832
|
+
close OUTFILE;
|
833
|
+
|
834
|
+
# write language lookups
|
835
|
+
my $langSize = 0;
|
836
|
+
my $langDir = "$outDir/$geoLang";
|
837
|
+
# delete existing languages
|
838
|
+
unlink <"$langDir/*.pm">;
|
839
|
+
if ($noLang) {
|
840
|
+
rmdir $langDir;
|
841
|
+
} else {
|
842
|
+
my $n = scalar(keys %langLookups);
|
843
|
+
print "Writing $n language files to $outDir/$geoLang...\n";
|
844
|
+
mkdir $langDir, 0777;
|
845
|
+
my ($lng, $key, $str, $nm, $alt);
|
846
|
+
foreach $lng (sort keys %langLookups) { # ($lng = language code)
|
847
|
+
my $myLng = $lng;
|
848
|
+
$myLng =~ tr/-/_/;
|
849
|
+
my $lkup = $langLookups{$lng};
|
850
|
+
my $file = "$myLng.pm";
|
851
|
+
open OUT, ">$langDir/$file" or die "Error creating $file\n";
|
852
|
+
binmode OUT;
|
853
|
+
print OUT "# Geolocation language translations for $myLng\n";
|
854
|
+
print OUT "#\n# Based on Creative Commons database from geonames.org\n\n";
|
855
|
+
print OUT "%Image::ExifTool::GeoLang::${myLng}::Translate = (\n";
|
856
|
+
foreach $key (sort keys %$lkup) {
|
857
|
+
($nm = $key) =~ s/'/\\'/g;
|
858
|
+
# count entries and use the most common one, then add others with country+region ID's
|
859
|
+
# (entries in @$li are of the form: City:"CCRc,Sc,Alt", Sub:"CCRc,Alt", Rgn:"CC,Alt", Country:",Alt")
|
860
|
+
# (Rc = region code, Sc = subregion code)
|
861
|
+
my $li = $$lkup{$key};
|
862
|
+
my %count;
|
863
|
+
# sort by popularity of alternate name
|
864
|
+
foreach (@$li) {
|
865
|
+
my $val = $_;
|
866
|
+
$val =~ s/.*?,//;
|
867
|
+
$count{$val} = ($count{$val} || 0) + 1;
|
868
|
+
}
|
869
|
+
my @order = sort { $count{$b} <=> $count{$a} or length($a) <=> length($b) or $a cmp $b } keys %count;
|
870
|
+
my $first = 1;
|
871
|
+
foreach $alt (@order) {
|
872
|
+
foreach (sort @$li) {
|
873
|
+
my ($code,$val) = split ',', $_, 2;
|
874
|
+
# ($code will be empty for a country name, and 2 characters for a region name,
|
875
|
+
# and contain a "." for a city name)
|
876
|
+
next unless $val eq $alt; # don't add if alternate name is the same
|
877
|
+
die "Backslash in translated name" if $val =~ /\\/;
|
878
|
+
$val =~ s/'/\\'/g; # escape single quotes
|
879
|
+
if ($first and $val !~ /\(/) { # (don't add general translation if name is qualified with brackets)
|
880
|
+
print OUT "\t'$nm' => '$val',\n";
|
881
|
+
undef $first;
|
882
|
+
last;
|
883
|
+
}
|
884
|
+
# format for keys in language table
|
885
|
+
# City: "CCRgn,Subregion,City", "CCRgn,,City", "CC,City", ",City"
|
886
|
+
# Subregion: "CCRgn,Subregion,", "CCRgn,,"
|
887
|
+
# Region: "CCRgn,"
|
888
|
+
# Country: "CC,"
|
889
|
+
# Any: "Name"
|
890
|
+
if (not $code) {
|
891
|
+
# this is a country
|
892
|
+
$code = $cc{$key};
|
893
|
+
printf OUT "\t'$code,' => '$val',\n";
|
894
|
+
} elsif ($code !~ /\./) {
|
895
|
+
# this is a region or subregion
|
896
|
+
print OUT "\t'$code$nm,' => '$val',\n";
|
897
|
+
} else {
|
898
|
+
# this is a city
|
899
|
+
# use region/subregions name instead of code
|
900
|
+
my $sub = $subregion{$code} || '';
|
901
|
+
$sub =~ s/'/\\'/g;
|
902
|
+
$code =~ s/\..*//;
|
903
|
+
$code = substr($code,0,2) . $region{$code} if $region{$code};
|
904
|
+
$code =~ s/'/\\'/g;
|
905
|
+
print OUT "\t'$code,$sub,$nm' => '$val',\n";
|
906
|
+
}
|
907
|
+
}
|
908
|
+
}
|
909
|
+
}
|
910
|
+
if ($featureLang{$lng}) {
|
911
|
+
print OUT "\n\t# feature types\n";
|
912
|
+
foreach (sort keys %{$featureLang{$lng}}) {
|
913
|
+
my $ftype = $featureLang{$lng}{$_};
|
914
|
+
$ftype =~ s/'/\\'/g;
|
915
|
+
print OUT "\t$_ => '$ftype',\n"
|
916
|
+
}
|
917
|
+
}
|
918
|
+
print OUT ");\n\n1; #end\n";
|
919
|
+
$langSize += tell OUT;
|
920
|
+
close OUT;
|
921
|
+
}
|
922
|
+
if ($verbose) {
|
923
|
+
my @type = ( City => \%cityFlags, Region => \%rgnFlags, Subregion => \%subFlags, Country => \%ccFlags );
|
924
|
+
for (;;) {
|
925
|
+
my $type = shift @type or last;
|
926
|
+
my $flags = shift @type;
|
927
|
+
print "$type flags:\n";
|
928
|
+
printf(" 0x%.2x - %d\n", 0, $$flags{0} || 0);
|
929
|
+
my @label = qw(preferred short colloquial historic);
|
930
|
+
foreach my $bit (0..5) {
|
931
|
+
my $n = 0;
|
932
|
+
$_ & (1<<$bit) and ++$n foreach keys %$flags;
|
933
|
+
printf(" 0x%.2x - %d (%s)\n", (1<<$bit), $n, shift(@label)) if $n;
|
934
|
+
}
|
935
|
+
}
|
936
|
+
}
|
937
|
+
}
|
938
|
+
|
939
|
+
print "Output file size(s):\n";
|
940
|
+
printf "%8.2f MB %s (%d entries)\n", $outSize / 1e6, $outFile, scalar(keys %coords);
|
941
|
+
printf "%8.2f MB %s\n", $altSize / 1e6, $outAltNames if $altSize;
|
942
|
+
printf "%8.2f MB %s/*.pm\n", $langSize / 1e6, $geoLang if $langSize;
|
943
|
+
printf "%8.2f MB Total\n", ($outSize + $altSize + $langSize) / 1e6 if $altSize or $langSize;
|
944
|
+
|
945
|
+
# end
|