langtable 0.0.66__py3-none-any.whl → 0.0.68__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- langtable/data/languages.xml.gz +0 -0
- langtable/data/territories.xml.gz +0 -0
- langtable/data/timezoneidparts.xml.gz +0 -0
- langtable/langtable.py +279 -278
- {langtable-0.0.66.dist-info → langtable-0.0.68.dist-info}/METADATA +1 -1
- {langtable-0.0.66.dist-info → langtable-0.0.68.dist-info}/RECORD +9 -9
- {langtable-0.0.66.dist-info → langtable-0.0.68.dist-info}/COPYING +0 -0
- {langtable-0.0.66.dist-info → langtable-0.0.68.dist-info}/WHEEL +0 -0
- {langtable-0.0.66.dist-info → langtable-0.0.68.dist-info}/top_level.txt +0 -0
langtable/data/languages.xml.gz
CHANGED
|
Binary file
|
|
Binary file
|
|
Binary file
|
langtable/langtable.py
CHANGED
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
# vim:fileencoding=utf-8:sw=4:et -*- coding: utf-8 -*-
|
|
2
|
-
|
|
3
1
|
# Copyright (c) 2013 Mike FABIAN <mfabian@redhat.com>
|
|
4
2
|
#
|
|
5
3
|
# This program is free software: you can redistribute it and/or modify
|
|
@@ -15,96 +13,104 @@
|
|
|
15
13
|
# You should have received a copy of the GNU General Public License
|
|
16
14
|
# along with this program. If not, see <http://www.gnu.org/licenses/>
|
|
17
15
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
# list_consolefonts()
|
|
28
|
-
# list_inputmethods()
|
|
29
|
-
# list_timezones()
|
|
30
|
-
# list_scripts()
|
|
31
|
-
# language_name()
|
|
32
|
-
# territory_name()
|
|
33
|
-
# timezone_name()
|
|
34
|
-
# languageId()
|
|
35
|
-
# territoryId()
|
|
36
|
-
# supports_ascii()
|
|
37
|
-
# list_all_languages()
|
|
38
|
-
# list_all_locales()
|
|
39
|
-
# list_all_keyboards()
|
|
40
|
-
# list_all_territories()
|
|
41
|
-
# list_all_timezones()
|
|
42
|
-
# list_all_scripts()
|
|
43
|
-
# list_all_input_methods()
|
|
44
|
-
# list_all_console_fonts()
|
|
45
|
-
#
|
|
46
|
-
# These are the functions which do not start with an “_” in their name.
|
|
47
|
-
# All global functions and global variables whose name starts with an
|
|
48
|
-
# “_” are internal and should not be used by a user of langtable.py.
|
|
49
|
-
#
|
|
50
|
-
# Many of the above public functions have named parameters like
|
|
51
|
-
#
|
|
52
|
-
# languageId
|
|
53
|
-
# scriptId
|
|
54
|
-
# territoryId
|
|
55
|
-
#
|
|
56
|
-
# and
|
|
57
|
-
#
|
|
58
|
-
# languageIdQuery
|
|
59
|
-
# scriptIdQuery
|
|
60
|
-
# territoryIdQuery
|
|
61
|
-
#
|
|
62
|
-
# languageId and languageIdQuery may contain a full locale name,
|
|
63
|
-
# specifying the script and the territory as well.
|
|
64
|
-
#
|
|
65
|
-
# For example:
|
|
66
|
-
#
|
|
67
|
-
# language_name(languageId="sr_Latn_RS")
|
|
68
|
-
#
|
|
69
|
-
# behaves the same as
|
|
70
|
-
#
|
|
71
|
-
# language_name(languageId="sr", scriptId="Latn", territoryId="RS")
|
|
72
|
-
#
|
|
73
|
-
# If languageId contains a script or a territory, the values found there
|
|
74
|
-
# are preferred over those given as extra parameters. For example:
|
|
75
|
-
#
|
|
76
|
-
# language_name(languageId="sr_Latn_RS", scriptId="Cyrl", territoryId="ME")
|
|
77
|
-
#
|
|
78
|
-
# behaves the same as
|
|
79
|
-
#
|
|
80
|
-
# language_name(languageId="sr", scriptId="Latn", territoryId="RS")
|
|
81
|
-
#
|
|
82
|
-
# scriptId="Cyrl" and territoryId="ME" are overridden by the values found
|
|
83
|
-
# in languageId.
|
|
84
|
-
#
|
|
85
|
-
# It is also possible to put a full locale name in the spelling used by glibc
|
|
86
|
-
# into languageId. For example:
|
|
87
|
-
#
|
|
88
|
-
# language_name(languageId="sr_RS.utf8@latin")
|
|
89
|
-
#
|
|
90
|
-
# or
|
|
91
|
-
#
|
|
92
|
-
# language_name(languageId="sr_RS.UTF-8@latin")
|
|
93
|
-
#
|
|
94
|
-
# also behave the same as:
|
|
95
|
-
#
|
|
96
|
-
# language_name(languageId="sr_Latn_RS")
|
|
97
|
-
#
|
|
98
|
-
# which is the same as:
|
|
99
|
-
#
|
|
100
|
-
# language_name(languageId="sr", scriptId="Latn", territoryId="RS")
|
|
101
|
-
#
|
|
102
|
-
# langtable always parses languageId, cuts out the encoding and translates
|
|
103
|
-
# script names in glibc spelling like "latin" to the official
|
|
104
|
-
# ISO 15924 script codes, see: https://en.wikipedia.org/wiki/ISO_15924
|
|
105
|
-
#
|
|
106
|
-
######################################################################
|
|
16
|
+
'''
|
|
17
|
+
Guessing reasonable defaults for locale, keyboard layout, territory, and language.
|
|
18
|
+
|
|
19
|
+
langtable is used to guess reasonable defaults for locale, keyboard,
|
|
20
|
+
territory, …, if part of that information is already known. For example,
|
|
21
|
+
guess the territory and the keyboard layout if the language is known or guess
|
|
22
|
+
the language and keyboard layout if the territory is already known.
|
|
23
|
+
|
|
24
|
+
Public API:
|
|
107
25
|
|
|
26
|
+
parse_locale()
|
|
27
|
+
list_locales()
|
|
28
|
+
list_keyboards()
|
|
29
|
+
list_common_languages()
|
|
30
|
+
list_common_locales()
|
|
31
|
+
list_common_keyboards()
|
|
32
|
+
list_consolefonts()
|
|
33
|
+
list_inputmethods()
|
|
34
|
+
list_timezones()
|
|
35
|
+
list_scripts()
|
|
36
|
+
language_name()
|
|
37
|
+
territory_name()
|
|
38
|
+
timezone_name()
|
|
39
|
+
languageId()
|
|
40
|
+
territoryId()
|
|
41
|
+
supports_ascii()
|
|
42
|
+
list_all_languages()
|
|
43
|
+
list_all_locales()
|
|
44
|
+
list_all_keyboards()
|
|
45
|
+
list_all_territories()
|
|
46
|
+
list_all_timezones()
|
|
47
|
+
list_all_scripts()
|
|
48
|
+
list_all_input_methods()
|
|
49
|
+
list_all_console_fonts()
|
|
50
|
+
|
|
51
|
+
These are the functions which do not start with an “_” in their name.
|
|
52
|
+
All global functions and global variables whose name starts with an
|
|
53
|
+
“_” are internal and should not be used by a user of langtable.py.
|
|
54
|
+
|
|
55
|
+
Many of the above public functions have named parameters like
|
|
56
|
+
|
|
57
|
+
languageId
|
|
58
|
+
scriptId
|
|
59
|
+
territoryId
|
|
60
|
+
|
|
61
|
+
and
|
|
62
|
+
|
|
63
|
+
languageIdQuery
|
|
64
|
+
scriptIdQuery
|
|
65
|
+
territoryIdQuery
|
|
66
|
+
|
|
67
|
+
languageId and languageIdQuery may contain a full locale name,
|
|
68
|
+
specifying the script and the territory as well.
|
|
69
|
+
|
|
70
|
+
For example:
|
|
71
|
+
|
|
72
|
+
language_name(languageId="sr_Latn_RS")
|
|
73
|
+
|
|
74
|
+
behaves the same as
|
|
75
|
+
|
|
76
|
+
language_name(languageId="sr", scriptId="Latn", territoryId="RS")
|
|
77
|
+
|
|
78
|
+
If languageId contains a script or a territory, the values found there
|
|
79
|
+
are preferred over those given as extra parameters. For example:
|
|
80
|
+
|
|
81
|
+
language_name(languageId="sr_Latn_RS", scriptId="Cyrl", territoryId="ME")
|
|
82
|
+
|
|
83
|
+
behaves the same as
|
|
84
|
+
|
|
85
|
+
language_name(languageId="sr", scriptId="Latn", territoryId="RS")
|
|
86
|
+
|
|
87
|
+
scriptId="Cyrl" and territoryId="ME" are overridden by the values found
|
|
88
|
+
in languageId.
|
|
89
|
+
|
|
90
|
+
It is also possible to put a full locale name in the spelling used by glibc
|
|
91
|
+
into languageId. For example:
|
|
92
|
+
|
|
93
|
+
language_name(languageId="sr_RS.utf8@latin")
|
|
94
|
+
|
|
95
|
+
or
|
|
96
|
+
|
|
97
|
+
language_name(languageId="sr_RS.UTF-8@latin")
|
|
98
|
+
|
|
99
|
+
also behave the same as:
|
|
100
|
+
|
|
101
|
+
language_name(languageId="sr_Latn_RS")
|
|
102
|
+
|
|
103
|
+
which is the same as:
|
|
104
|
+
|
|
105
|
+
language_name(languageId="sr", scriptId="Latn", territoryId="RS")
|
|
106
|
+
|
|
107
|
+
langtable always parses languageId, cuts out the encoding and translates
|
|
108
|
+
script names in glibc spelling like "latin" to the official
|
|
109
|
+
ISO 15924 script codes, see: https://en.wikipedia.org/wiki/ISO_15924
|
|
110
|
+
'''
|
|
111
|
+
|
|
112
|
+
# pylint: disable=invalid-name
|
|
113
|
+
# pylint: disable=redefined-outer-name
|
|
108
114
|
from typing import List
|
|
109
115
|
from typing import Dict
|
|
110
116
|
import os
|
|
@@ -170,7 +176,8 @@ _keyboards_db = {}
|
|
|
170
176
|
_timezones_db = {}
|
|
171
177
|
_timezoneIdParts_db = {}
|
|
172
178
|
|
|
173
|
-
class territory_db_item:
|
|
179
|
+
class territory_db_item: # pylint: disable=too-few-public-methods
|
|
180
|
+
'''Holds information for one territory'''
|
|
174
181
|
def __init__(self, names = None, scripts=None, locales=None, languages=None, keyboards=None, inputmethods=None, consolefonts=None, timezones=None):
|
|
175
182
|
self.names = names
|
|
176
183
|
self.scripts = scripts
|
|
@@ -181,7 +188,8 @@ class territory_db_item:
|
|
|
181
188
|
self.consolefonts = consolefonts
|
|
182
189
|
self.timezones = timezones
|
|
183
190
|
|
|
184
|
-
class language_db_item:
|
|
191
|
+
class language_db_item: # pylint: disable=too-few-public-methods
|
|
192
|
+
'''Holds information for one language'''
|
|
185
193
|
def __init__(self, iso639_1=None, iso639_2_t=None, iso639_2_b=None, names=None, scripts=None, locales=None, territories=None, keyboards=None, inputmethods=None, consolefonts=None, timezones=None):
|
|
186
194
|
self.iso639_1 = iso639_1
|
|
187
195
|
self.iso639_2_t = iso639_2_t
|
|
@@ -195,7 +203,8 @@ class language_db_item:
|
|
|
195
203
|
self.consolefonts = consolefonts
|
|
196
204
|
self.timezones = timezones
|
|
197
205
|
|
|
198
|
-
class keyboard_db_item:
|
|
206
|
+
class keyboard_db_item: # pylint: disable=too-few-public-methods
|
|
207
|
+
'''Holds information for one keyboard layout'''
|
|
199
208
|
def __init__(self, description=None, ascii=True, languages=None, territories = None, comment=None):
|
|
200
209
|
self.description = description
|
|
201
210
|
self.ascii = ascii
|
|
@@ -203,17 +212,19 @@ class keyboard_db_item:
|
|
|
203
212
|
self.languages = languages
|
|
204
213
|
self.territories = territories
|
|
205
214
|
|
|
206
|
-
class timezone_db_item:
|
|
215
|
+
class timezone_db_item: # pylint: disable=too-few-public-methods
|
|
216
|
+
'''Holds information for one timezone'''
|
|
207
217
|
def __init__(self, names=None):
|
|
208
218
|
self.names = names
|
|
209
219
|
|
|
210
|
-
class timezoneIdPart_db_item:
|
|
220
|
+
class timezoneIdPart_db_item: # pylint: disable=too-few-public-methods
|
|
221
|
+
'''Holds information for one timezone part'''
|
|
211
222
|
def __init__(self, names=None):
|
|
212
223
|
self.names = names
|
|
213
224
|
|
|
214
225
|
# xml.sax.handler.ContentHandler is not inherited from the 'object' class,
|
|
215
226
|
# 'super' keyword wouldn't work, we need to inherit it on our own
|
|
216
|
-
class LangtableContentHandler(ContentHandler
|
|
227
|
+
class LangtableContentHandler(ContentHandler):
|
|
217
228
|
"""
|
|
218
229
|
A base class inherited from the xml.sax.handler.ContentHandler class
|
|
219
230
|
providing handling for SAX events produced when parsing the langtable data
|
|
@@ -222,6 +233,7 @@ class LangtableContentHandler(ContentHandler, object):
|
|
|
222
233
|
"""
|
|
223
234
|
|
|
224
235
|
def __init__(self):
|
|
236
|
+
super().__init__()
|
|
225
237
|
# internal attribute used to set where the upcoming text data should be
|
|
226
238
|
# stored
|
|
227
239
|
self._save_to = None
|
|
@@ -246,7 +258,7 @@ class TerritoriesContentHandler(LangtableContentHandler):
|
|
|
246
258
|
"""Handler for SAX events produced when parsing the territories.xml file."""
|
|
247
259
|
|
|
248
260
|
def __init__(self):
|
|
249
|
-
super(
|
|
261
|
+
super().__init__()
|
|
250
262
|
|
|
251
263
|
# simple values
|
|
252
264
|
self._territoryId = None
|
|
@@ -267,27 +279,27 @@ class TerritoriesContentHandler(LangtableContentHandler):
|
|
|
267
279
|
self._timezones = None
|
|
268
280
|
|
|
269
281
|
def startElement(self, name, attrs):
|
|
270
|
-
if name ==
|
|
271
|
-
self._names =
|
|
272
|
-
self._scripts =
|
|
273
|
-
self._locales =
|
|
274
|
-
self._languages =
|
|
275
|
-
self._keyboards =
|
|
276
|
-
self._inputmethods =
|
|
277
|
-
self._consolefonts =
|
|
278
|
-
self._timezones =
|
|
282
|
+
if name == "territory":
|
|
283
|
+
self._names = {}
|
|
284
|
+
self._scripts = {}
|
|
285
|
+
self._locales = {}
|
|
286
|
+
self._languages = {}
|
|
287
|
+
self._keyboards = {}
|
|
288
|
+
self._inputmethods = {}
|
|
289
|
+
self._consolefonts = {}
|
|
290
|
+
self._timezones = {}
|
|
279
291
|
|
|
280
292
|
# non-dict values
|
|
281
|
-
elif name ==
|
|
293
|
+
elif name == "territoryId":
|
|
282
294
|
self._save_to = "_territoryId"
|
|
283
295
|
|
|
284
296
|
# dict items
|
|
285
|
-
elif name in (
|
|
286
|
-
|
|
297
|
+
elif name in ("languageId", "scriptId", "localeId", "keyboardId", "inputmethodId",
|
|
298
|
+
"consolefontId", "timezoneId"):
|
|
287
299
|
self._save_to = "_item_id"
|
|
288
|
-
elif name ==
|
|
300
|
+
elif name == "trName":
|
|
289
301
|
self._save_to = "_item_name"
|
|
290
|
-
elif name ==
|
|
302
|
+
elif name == "rank":
|
|
291
303
|
self._save_to = "_item_rank"
|
|
292
304
|
|
|
293
305
|
def endElement(self, name):
|
|
@@ -295,7 +307,7 @@ class TerritoriesContentHandler(LangtableContentHandler):
|
|
|
295
307
|
# of an element no text should appear
|
|
296
308
|
self._save_to = None
|
|
297
309
|
|
|
298
|
-
if name ==
|
|
310
|
+
if name == "territory":
|
|
299
311
|
_territories_db[str(self._territoryId)] = territory_db_item(
|
|
300
312
|
names = self._names,
|
|
301
313
|
scripts = self._scripts,
|
|
@@ -318,28 +330,28 @@ class TerritoriesContentHandler(LangtableContentHandler):
|
|
|
318
330
|
self._timezones = None
|
|
319
331
|
|
|
320
332
|
# populating dictionaries
|
|
321
|
-
elif name ==
|
|
333
|
+
elif name == "name":
|
|
322
334
|
self._names[str(self._item_id)] = self._item_name
|
|
323
335
|
self._clear_item()
|
|
324
|
-
elif name ==
|
|
336
|
+
elif name == "script":
|
|
325
337
|
self._scripts[str(self._item_id)] = int(self._item_rank)
|
|
326
338
|
self._clear_item()
|
|
327
|
-
elif name ==
|
|
339
|
+
elif name == "locale":
|
|
328
340
|
self._locales[str(self._item_id)] = int(self._item_rank)
|
|
329
341
|
self._clear_item()
|
|
330
|
-
elif name ==
|
|
342
|
+
elif name == "language":
|
|
331
343
|
self._languages[str(self._item_id)] = int(self._item_rank)
|
|
332
344
|
self._clear_item()
|
|
333
|
-
elif name ==
|
|
345
|
+
elif name == "keyboard":
|
|
334
346
|
self._keyboards[str(self._item_id)] = int(self._item_rank)
|
|
335
347
|
self._clear_item()
|
|
336
|
-
elif name ==
|
|
348
|
+
elif name == "inputmethod":
|
|
337
349
|
self._inputmethods[str(self._item_id)] = int(self._item_rank)
|
|
338
350
|
self._clear_item()
|
|
339
|
-
elif name ==
|
|
351
|
+
elif name == "consolefont":
|
|
340
352
|
self._consolefonts[str(self._item_id)] = int(self._item_rank)
|
|
341
353
|
self._clear_item()
|
|
342
|
-
elif name ==
|
|
354
|
+
elif name == "timezone":
|
|
343
355
|
self._timezones[str(self._item_id)] = int(self._item_rank)
|
|
344
356
|
self._clear_item()
|
|
345
357
|
|
|
@@ -352,7 +364,7 @@ class KeyboardsContentHandler(LangtableContentHandler):
|
|
|
352
364
|
"""Handler for SAX events produced when parsing the keyboards.xml file."""
|
|
353
365
|
|
|
354
366
|
def __init__(self):
|
|
355
|
-
super(
|
|
367
|
+
super().__init__()
|
|
356
368
|
|
|
357
369
|
# simple values
|
|
358
370
|
self._keyboardId = None
|
|
@@ -369,24 +381,24 @@ class KeyboardsContentHandler(LangtableContentHandler):
|
|
|
369
381
|
self._territories = None
|
|
370
382
|
|
|
371
383
|
def startElement(self, name, attrs):
|
|
372
|
-
if name ==
|
|
373
|
-
self._languages =
|
|
374
|
-
self._territories =
|
|
384
|
+
if name == "keyboard":
|
|
385
|
+
self._languages = {}
|
|
386
|
+
self._territories = {}
|
|
375
387
|
|
|
376
388
|
# non-dict values
|
|
377
|
-
elif name ==
|
|
389
|
+
elif name == "keyboardId":
|
|
378
390
|
self._save_to = "_keyboardId"
|
|
379
|
-
elif name ==
|
|
391
|
+
elif name == "description":
|
|
380
392
|
self._save_to = "_description"
|
|
381
|
-
elif name ==
|
|
393
|
+
elif name == "ascii":
|
|
382
394
|
self._save_to = "_ascii"
|
|
383
|
-
elif name ==
|
|
395
|
+
elif name == "comment":
|
|
384
396
|
self._save_to = "_comment"
|
|
385
397
|
|
|
386
398
|
# dict items
|
|
387
|
-
elif name in (
|
|
399
|
+
elif name in ("languageId", "territoryId"):
|
|
388
400
|
self._save_to = "_item_id"
|
|
389
|
-
elif name ==
|
|
401
|
+
elif name == "rank":
|
|
390
402
|
self._save_to = "_item_rank"
|
|
391
403
|
|
|
392
404
|
def endElement(self, name):
|
|
@@ -394,10 +406,10 @@ class KeyboardsContentHandler(LangtableContentHandler):
|
|
|
394
406
|
# of an element no text should appear
|
|
395
407
|
self._save_to = None
|
|
396
408
|
|
|
397
|
-
if name ==
|
|
409
|
+
if name == "keyboard":
|
|
398
410
|
_keyboards_db[str(self._keyboardId)] = keyboard_db_item(
|
|
399
411
|
description = self._description,
|
|
400
|
-
ascii = self._ascii ==
|
|
412
|
+
ascii = self._ascii == "True",
|
|
401
413
|
comment = self._comment,
|
|
402
414
|
languages = self._languages,
|
|
403
415
|
territories = self._territories)
|
|
@@ -411,10 +423,10 @@ class KeyboardsContentHandler(LangtableContentHandler):
|
|
|
411
423
|
self._territories = None
|
|
412
424
|
|
|
413
425
|
# populating dictionaries
|
|
414
|
-
elif name ==
|
|
426
|
+
elif name == "language":
|
|
415
427
|
self._languages[str(self._item_id)] = int(self._item_rank)
|
|
416
428
|
self._clear_item()
|
|
417
|
-
elif name ==
|
|
429
|
+
elif name == "territory":
|
|
418
430
|
self._territories[str(self._item_id)] = int(self._item_rank)
|
|
419
431
|
self._clear_item()
|
|
420
432
|
|
|
@@ -426,7 +438,7 @@ class LanguagesContentHandler(LangtableContentHandler):
|
|
|
426
438
|
"""Handler for SAX events produced when parsing the languages.xml file."""
|
|
427
439
|
|
|
428
440
|
def __init__(self):
|
|
429
|
-
super(
|
|
441
|
+
super().__init__()
|
|
430
442
|
# simple values
|
|
431
443
|
self._languageId = None
|
|
432
444
|
self._iso639_1 = None
|
|
@@ -453,39 +465,39 @@ class LanguagesContentHandler(LangtableContentHandler):
|
|
|
453
465
|
self._timezones = None
|
|
454
466
|
|
|
455
467
|
def startElement(self, name, attrs):
|
|
456
|
-
if name ==
|
|
457
|
-
self._names =
|
|
458
|
-
self._scripts =
|
|
459
|
-
self._locales =
|
|
460
|
-
self._territories =
|
|
461
|
-
self._keyboards =
|
|
462
|
-
self._inputmethods =
|
|
463
|
-
self._consolefonts =
|
|
464
|
-
self._timezones =
|
|
468
|
+
if name == "language":
|
|
469
|
+
self._names = {}
|
|
470
|
+
self._scripts = {}
|
|
471
|
+
self._locales = {}
|
|
472
|
+
self._territories = {}
|
|
473
|
+
self._keyboards = {}
|
|
474
|
+
self._inputmethods = {}
|
|
475
|
+
self._consolefonts = {}
|
|
476
|
+
self._timezones = {}
|
|
465
477
|
|
|
466
478
|
# non-dict values
|
|
467
|
-
elif name ==
|
|
479
|
+
elif name == "languageId" and not self._in_names:
|
|
468
480
|
# ID of the language
|
|
469
481
|
self._save_to = "_languageId"
|
|
470
|
-
elif name ==
|
|
482
|
+
elif name == "iso639-1":
|
|
471
483
|
self._save_to = "_iso639_1"
|
|
472
|
-
elif name ==
|
|
484
|
+
elif name == "iso639-2-t":
|
|
473
485
|
self._save_to = "_iso639_2_t"
|
|
474
|
-
elif name ==
|
|
486
|
+
elif name == "iso639-2-b":
|
|
475
487
|
self._save_to = "_iso639_2_b"
|
|
476
|
-
elif name ==
|
|
488
|
+
elif name == "names":
|
|
477
489
|
self._in_names = True
|
|
478
490
|
|
|
479
491
|
# dict items
|
|
480
|
-
elif name in (
|
|
481
|
-
|
|
492
|
+
elif name in ("scriptId", "localeId", "territoryId", "keyboardId", "inputmethodId",
|
|
493
|
+
"consolefontId", "timezoneId"):
|
|
482
494
|
self._save_to = "_item_id"
|
|
483
|
-
elif name ==
|
|
495
|
+
elif name == "languageId" and self._in_names:
|
|
484
496
|
# ID of the translated name's language
|
|
485
497
|
self._save_to = "_item_id"
|
|
486
|
-
elif name ==
|
|
498
|
+
elif name == "trName":
|
|
487
499
|
self._save_to = "_item_name"
|
|
488
|
-
elif name ==
|
|
500
|
+
elif name == "rank":
|
|
489
501
|
self._save_to = "_item_rank"
|
|
490
502
|
|
|
491
503
|
def endElement(self, name):
|
|
@@ -493,7 +505,7 @@ class LanguagesContentHandler(LangtableContentHandler):
|
|
|
493
505
|
# of an element no text should appear
|
|
494
506
|
self._save_to = None
|
|
495
507
|
|
|
496
|
-
if name ==
|
|
508
|
+
if name == "language":
|
|
497
509
|
_languages_db[str(self._languageId)] = language_db_item(
|
|
498
510
|
iso639_1 = self._iso639_1,
|
|
499
511
|
iso639_2_t = self._iso639_2_t,
|
|
@@ -522,32 +534,32 @@ class LanguagesContentHandler(LangtableContentHandler):
|
|
|
522
534
|
self._timezones = None
|
|
523
535
|
|
|
524
536
|
# leaving the "names" element
|
|
525
|
-
elif name ==
|
|
537
|
+
elif name == "names":
|
|
526
538
|
self._in_names = False
|
|
527
539
|
|
|
528
540
|
# populating dictionaries
|
|
529
|
-
elif name ==
|
|
541
|
+
elif name == "name":
|
|
530
542
|
self._names[str(self._item_id)] = self._item_name
|
|
531
543
|
self._clear_item()
|
|
532
|
-
elif name ==
|
|
544
|
+
elif name == "script":
|
|
533
545
|
self._scripts[str(self._item_id)] = int(self._item_rank)
|
|
534
546
|
self._clear_item()
|
|
535
|
-
elif name ==
|
|
547
|
+
elif name == "locale":
|
|
536
548
|
self._locales[str(self._item_id)] = int(self._item_rank)
|
|
537
549
|
self._clear_item()
|
|
538
|
-
elif name ==
|
|
550
|
+
elif name == "territory":
|
|
539
551
|
self._territories[str(self._item_id)] = int(self._item_rank)
|
|
540
552
|
self._clear_item()
|
|
541
|
-
elif name ==
|
|
553
|
+
elif name == "keyboard":
|
|
542
554
|
self._keyboards[str(self._item_id)] = int(self._item_rank)
|
|
543
555
|
self._clear_item()
|
|
544
|
-
elif name ==
|
|
556
|
+
elif name == "inputmethod":
|
|
545
557
|
self._inputmethods[str(self._item_id)] = int(self._item_rank)
|
|
546
558
|
self._clear_item()
|
|
547
|
-
elif name ==
|
|
559
|
+
elif name == "consolefont":
|
|
548
560
|
self._consolefonts[str(self._item_id)] = int(self._item_rank)
|
|
549
561
|
self._clear_item()
|
|
550
|
-
elif name ==
|
|
562
|
+
elif name == "timezone":
|
|
551
563
|
self._timezones[str(self._item_id)] = int(self._item_rank)
|
|
552
564
|
self._clear_item()
|
|
553
565
|
|
|
@@ -560,7 +572,7 @@ class TimezonesContentHandler(LangtableContentHandler):
|
|
|
560
572
|
"""Handler for SAX events produced when parsing the timezones.xml file."""
|
|
561
573
|
|
|
562
574
|
def __init__(self):
|
|
563
|
-
super(
|
|
575
|
+
super().__init__()
|
|
564
576
|
# simple values
|
|
565
577
|
self._timezoneId = None
|
|
566
578
|
|
|
@@ -572,19 +584,19 @@ class TimezonesContentHandler(LangtableContentHandler):
|
|
|
572
584
|
self._names = None
|
|
573
585
|
|
|
574
586
|
def startElement(self, name, attrs):
|
|
575
|
-
if name ==
|
|
576
|
-
self._names =
|
|
587
|
+
if name == "timezone":
|
|
588
|
+
self._names = {}
|
|
577
589
|
|
|
578
590
|
# non-dict values
|
|
579
|
-
elif name ==
|
|
591
|
+
elif name == "timezoneId":
|
|
580
592
|
# ID of the timezone
|
|
581
593
|
self._save_to = "_timezoneId"
|
|
582
594
|
|
|
583
595
|
# dict items
|
|
584
|
-
elif name ==
|
|
596
|
+
elif name == "languageId":
|
|
585
597
|
# ID of the translated timezone's language
|
|
586
598
|
self._save_to = "_item_id"
|
|
587
|
-
elif name ==
|
|
599
|
+
elif name == "trName":
|
|
588
600
|
self._save_to = "_item_name"
|
|
589
601
|
|
|
590
602
|
def endElement(self, name):
|
|
@@ -592,7 +604,7 @@ class TimezonesContentHandler(LangtableContentHandler):
|
|
|
592
604
|
# of an element no text should appear
|
|
593
605
|
self._save_to = None
|
|
594
606
|
|
|
595
|
-
if name ==
|
|
607
|
+
if name == "timezone":
|
|
596
608
|
_timezones_db[str(self._timezoneId)] = timezone_db_item(
|
|
597
609
|
names = self._names)
|
|
598
610
|
|
|
@@ -601,7 +613,7 @@ class TimezonesContentHandler(LangtableContentHandler):
|
|
|
601
613
|
self._names = None
|
|
602
614
|
|
|
603
615
|
# populating dictionaries
|
|
604
|
-
elif name ==
|
|
616
|
+
elif name == "name":
|
|
605
617
|
self._names[str(self._item_id)] = self._item_name
|
|
606
618
|
self._clear_item()
|
|
607
619
|
|
|
@@ -613,7 +625,7 @@ class TimezoneIdPartsContentHandler(LangtableContentHandler):
|
|
|
613
625
|
"""Handler for SAX events produced when parsing the timezoneidparts.xml file."""
|
|
614
626
|
|
|
615
627
|
def __init__(self):
|
|
616
|
-
super(
|
|
628
|
+
super().__init__()
|
|
617
629
|
# simple values
|
|
618
630
|
self._timezoneIdPartId = None
|
|
619
631
|
|
|
@@ -625,19 +637,19 @@ class TimezoneIdPartsContentHandler(LangtableContentHandler):
|
|
|
625
637
|
self._names = None
|
|
626
638
|
|
|
627
639
|
def startElement(self, name, attrs):
|
|
628
|
-
if name ==
|
|
629
|
-
self._names =
|
|
640
|
+
if name == "timezoneIdPart":
|
|
641
|
+
self._names = {}
|
|
630
642
|
|
|
631
643
|
# non-dict values
|
|
632
|
-
elif name ==
|
|
644
|
+
elif name == "timezoneIdPartId":
|
|
633
645
|
# partial timezone ID
|
|
634
646
|
self._save_to = "_timezoneIdPartId"
|
|
635
647
|
|
|
636
648
|
# dict items
|
|
637
|
-
elif name ==
|
|
649
|
+
elif name == "languageId":
|
|
638
650
|
# ID of the translated partial timezone ID's language
|
|
639
651
|
self._save_to = "_item_id"
|
|
640
|
-
elif name ==
|
|
652
|
+
elif name == "trName":
|
|
641
653
|
self._save_to = "_item_name"
|
|
642
654
|
|
|
643
655
|
def endElement(self, name):
|
|
@@ -645,7 +657,7 @@ class TimezoneIdPartsContentHandler(LangtableContentHandler):
|
|
|
645
657
|
# of an element no text should appear
|
|
646
658
|
self._save_to = None
|
|
647
659
|
|
|
648
|
-
if name ==
|
|
660
|
+
if name == "timezoneIdPart":
|
|
649
661
|
_timezoneIdParts_db[str(self._timezoneIdPartId)] = timezoneIdPart_db_item(
|
|
650
662
|
names = self._names)
|
|
651
663
|
|
|
@@ -654,7 +666,7 @@ class TimezoneIdPartsContentHandler(LangtableContentHandler):
|
|
|
654
666
|
self._names = None
|
|
655
667
|
|
|
656
668
|
# populating dictionaries
|
|
657
|
-
elif name ==
|
|
669
|
+
elif name == "name":
|
|
658
670
|
self._names[str(self._item_id)] = self._item_name
|
|
659
671
|
self._clear_item()
|
|
660
672
|
|
|
@@ -745,7 +757,6 @@ def _write_territories_file(file):
|
|
|
745
757
|
file.write(' </timezones>\n')
|
|
746
758
|
file.write(' </territory>\n')
|
|
747
759
|
file.write('</territories>\n')
|
|
748
|
-
return
|
|
749
760
|
|
|
750
761
|
def _write_languages_file(file):
|
|
751
762
|
'''
|
|
@@ -833,7 +844,6 @@ def _write_languages_file(file):
|
|
|
833
844
|
file.write(' </timezones>\n')
|
|
834
845
|
file.write(' </language>\n')
|
|
835
846
|
file.write('</languages>\n')
|
|
836
|
-
return
|
|
837
847
|
|
|
838
848
|
def _write_keyboards_file(file):
|
|
839
849
|
'''
|
|
@@ -846,7 +856,7 @@ def _write_keyboards_file(file):
|
|
|
846
856
|
file.write(' <keyboardId>'+keyboardId+'</keyboardId>\n')
|
|
847
857
|
file.write(' <description>'+_keyboards_db[keyboardId].description+'</description>\n')
|
|
848
858
|
file.write(' <ascii>'+str(_keyboards_db[keyboardId].ascii)+'</ascii>\n')
|
|
849
|
-
if _keyboards_db[keyboardId].comment
|
|
859
|
+
if _keyboards_db[keyboardId].comment is not None:
|
|
850
860
|
file.write(' <comment>'+_keyboards_db[keyboardId].comment+'</comment>\n')
|
|
851
861
|
languages = _keyboards_db[keyboardId].languages
|
|
852
862
|
file.write(' <languages>\n')
|
|
@@ -868,7 +878,6 @@ def _write_keyboards_file(file):
|
|
|
868
878
|
file.write(' </territories>\n')
|
|
869
879
|
file.write(' </keyboard>\n')
|
|
870
880
|
file.write('</keyboards>\n')
|
|
871
|
-
return
|
|
872
881
|
|
|
873
882
|
def _write_timezones_file(file):
|
|
874
883
|
'''
|
|
@@ -890,7 +899,6 @@ def _write_timezones_file(file):
|
|
|
890
899
|
file.write(' </names>\n')
|
|
891
900
|
file.write(' </timezone>\n')
|
|
892
901
|
file.write('</timezones>\n')
|
|
893
|
-
return
|
|
894
902
|
|
|
895
903
|
def _write_timezoneIdParts_file(file):
|
|
896
904
|
'''
|
|
@@ -912,7 +920,6 @@ def _write_timezoneIdParts_file(file):
|
|
|
912
920
|
file.write(' </names>\n')
|
|
913
921
|
file.write(' </timezoneIdPart>\n')
|
|
914
922
|
file.write('</timezoneIdParts>\n')
|
|
915
|
-
return
|
|
916
923
|
|
|
917
924
|
def _expat_parse(file, sax_handler):
|
|
918
925
|
"""
|
|
@@ -930,20 +937,20 @@ def _read_file(filename, sax_handler):
|
|
|
930
937
|
'''
|
|
931
938
|
Only for internal use
|
|
932
939
|
'''
|
|
933
|
-
for
|
|
940
|
+
for directory in (
|
|
934
941
|
os.path.join(os.path.dirname(os.path.realpath(__file__)), 'data'),
|
|
935
942
|
os.path.join(_DATADIR, 'data')):
|
|
936
|
-
path = os.path.join(
|
|
943
|
+
path = os.path.join(directory, filename)
|
|
937
944
|
if os.path.isfile(path):
|
|
938
945
|
with open(path, mode='rb') as file:
|
|
939
|
-
logging.info('reading file=%s'
|
|
946
|
+
logging.info('reading file=%s', file)
|
|
940
947
|
_expat_parse(file, sax_handler)
|
|
941
948
|
_INFO['data_files_read'].append(path)
|
|
942
949
|
return
|
|
943
|
-
path = os.path.join(
|
|
950
|
+
path = os.path.join(directory, filename+'.gz')
|
|
944
951
|
if os.path.isfile(path):
|
|
945
952
|
with gzip.open(path, mode='rb') as file:
|
|
946
|
-
logging.info('reading file=%s'
|
|
953
|
+
logging.info('reading file=%s', file)
|
|
947
954
|
_expat_parse(file, sax_handler)
|
|
948
955
|
_INFO['data_files_read'].append(path)
|
|
949
956
|
return
|
|
@@ -953,31 +960,30 @@ def _write_files(territoriesfilename, languagesfilename, keyboardsfilename, time
|
|
|
953
960
|
'''
|
|
954
961
|
Only for internal use
|
|
955
962
|
'''
|
|
956
|
-
with open(territoriesfilename, 'w') as territoriesfile:
|
|
957
|
-
logging.info(
|
|
963
|
+
with open(territoriesfilename, 'w', encoding='UTF-8') as territoriesfile:
|
|
964
|
+
logging.info('writing territories file=%s', territoriesfile)
|
|
958
965
|
_write_territories_file(territoriesfile)
|
|
959
|
-
with open(languagesfilename, 'w') as languagesfile:
|
|
960
|
-
logging.info(
|
|
966
|
+
with open(languagesfilename, 'w', encoding='UTF-8') as languagesfile:
|
|
967
|
+
logging.info('writing languages file=%s', languagesfile)
|
|
961
968
|
_write_languages_file(languagesfile)
|
|
962
|
-
with open(keyboardsfilename, 'w') as keyboardsfile:
|
|
963
|
-
logging.info(
|
|
969
|
+
with open(keyboardsfilename, 'w', encoding='UTF-8') as keyboardsfile:
|
|
970
|
+
logging.info('writing keyboards file=%s', keyboardsfile)
|
|
964
971
|
_write_keyboards_file(keyboardsfile)
|
|
965
|
-
with open(keyboardsfilename, 'w') as keyboardsfile:
|
|
966
|
-
logging.info(
|
|
972
|
+
with open(keyboardsfilename, 'w', encoding='UTF-8') as keyboardsfile:
|
|
973
|
+
logging.info('writing keyboards file=%s', keyboardsfile)
|
|
967
974
|
_write_keyboards_file(keyboardsfile)
|
|
968
|
-
with open(timezonesfilename, 'w') as timezonesfile:
|
|
969
|
-
logging.info(
|
|
975
|
+
with open(timezonesfilename, 'w', encoding='UTF-8') as timezonesfile:
|
|
976
|
+
logging.info('writing timezones file=%s', timezonesfile)
|
|
970
977
|
_write_timezones_file(timezonesfile)
|
|
971
|
-
with open(timezoneidpartsfilename, 'w') as timezoneidpartsfile:
|
|
972
|
-
logging.info(
|
|
978
|
+
with open(timezoneidpartsfilename, 'w', encoding='UTF-8') as timezoneidpartsfile:
|
|
979
|
+
logging.info('writing timezoneidparts file=%s', timezoneidpartsfile)
|
|
973
980
|
_write_timezoneIdParts_file(timezoneidpartsfile)
|
|
974
|
-
return
|
|
975
981
|
|
|
976
|
-
def _dictionary_to_ranked_list(
|
|
982
|
+
def _dictionary_to_ranked_list(dictionary, reverse=True):
|
|
977
983
|
sorted_list = []
|
|
978
|
-
for item in sorted(
|
|
979
|
-
if
|
|
980
|
-
sorted_list.append([item,
|
|
984
|
+
for item in sorted(dictionary, key=lambda x: (dictionary.get(x), x), reverse=reverse):
|
|
985
|
+
if dictionary[item] != 0:
|
|
986
|
+
sorted_list.append([item, dictionary[item]])
|
|
981
987
|
return sorted_list
|
|
982
988
|
|
|
983
989
|
def _ranked_list_to_list(ranked_list):
|
|
@@ -992,7 +998,7 @@ def _make_ranked_list_concise(ranked_list, cut_off_factor=1000):
|
|
|
992
998
|
break
|
|
993
999
|
return ranked_list
|
|
994
1000
|
|
|
995
|
-
def _capitalize_name(text, languageId='', scriptId='', territoryId='', languageIdQuery='', scriptIdQuery='', territoryIdQuery=''):
|
|
1001
|
+
def _capitalize_name(text, languageId='', scriptId='', territoryId='', languageIdQuery='', scriptIdQuery='', territoryIdQuery=''): # pylint: disable=unused-argument
|
|
996
1002
|
'''
|
|
997
1003
|
Title cases the first letter of “text”
|
|
998
1004
|
|
|
@@ -1023,7 +1029,7 @@ def _capitalize_name(text, languageId='', scriptId='', territoryId='', languageI
|
|
|
1023
1029
|
if not languageIdQuery:
|
|
1024
1030
|
languageIdQuery = 'en'
|
|
1025
1031
|
for lang in ('ka', 'nr', 'ss', 'xh', 'yo', 'zu'):
|
|
1026
|
-
if re.match(
|
|
1032
|
+
if re.match(rf'^{lang}', languageIdQuery):
|
|
1027
1033
|
return text
|
|
1028
1034
|
return text[0].capitalize() + text[1:]
|
|
1029
1035
|
|
|
@@ -1185,7 +1191,7 @@ def parse_locale(localeId):
|
|
|
1185
1191
|
if match.group('territory'):
|
|
1186
1192
|
territory = match.group('territory')
|
|
1187
1193
|
else:
|
|
1188
|
-
logging.info("localeId contains invalid locale id=%s"
|
|
1194
|
+
logging.info("localeId contains invalid locale id=%s", localeId)
|
|
1189
1195
|
return Locale(language=language,
|
|
1190
1196
|
script=script,
|
|
1191
1197
|
territory=territory,
|
|
@@ -1295,8 +1301,7 @@ def _parse_and_split_languageId(languageId='', scriptId='', territoryId=''):
|
|
|
1295
1301
|
variant=locale.variant,
|
|
1296
1302
|
encoding=locale.encoding)
|
|
1297
1303
|
if not locale.script and scriptId:
|
|
1298
|
-
|
|
1299
|
-
scriptId = _glibc_script_ids[scriptId]
|
|
1304
|
+
scriptId = _glibc_script_ids.get(scriptId, scriptId)
|
|
1300
1305
|
locale = Locale(language=locale.language,
|
|
1301
1306
|
script=scriptId,
|
|
1302
1307
|
territory=locale.territory,
|
|
@@ -1326,7 +1331,7 @@ def _parse_and_split_languageId(languageId='', scriptId='', territoryId=''):
|
|
|
1326
1331
|
return locale
|
|
1327
1332
|
|
|
1328
1333
|
def territory_name(territoryId = None, languageIdQuery = None, scriptIdQuery = None, territoryIdQuery = None, fallback=True):
|
|
1329
|
-
|
|
1334
|
+
'''Query translations of territory names
|
|
1330
1335
|
|
|
1331
1336
|
:param territoryId: identifier for the territory
|
|
1332
1337
|
:type territoryId: string
|
|
@@ -1370,7 +1375,7 @@ def territory_name(territoryId = None, languageIdQuery = None, scriptIdQuery = N
|
|
|
1370
1375
|
territoryIdQuery=territoryIdQuery)
|
|
1371
1376
|
|
|
1372
1377
|
def _territory_name(territoryId = None, languageIdQuery = None, scriptIdQuery = None, territoryIdQuery = None, fallback=True):
|
|
1373
|
-
|
|
1378
|
+
'''Internal function to query translations of territory names
|
|
1374
1379
|
|
|
1375
1380
|
:param territoryId: identifier for the territory
|
|
1376
1381
|
:type territoryId: string
|
|
@@ -1421,7 +1426,7 @@ def _territory_name(territoryId = None, languageIdQuery = None, scriptIdQuery =
|
|
|
1421
1426
|
return ''
|
|
1422
1427
|
|
|
1423
1428
|
def language_name(languageId = None, scriptId = None, territoryId = None, languageIdQuery = None, scriptIdQuery = None, territoryIdQuery = None, fallback=True):
|
|
1424
|
-
|
|
1429
|
+
'''Query translations of language names
|
|
1425
1430
|
|
|
1426
1431
|
:param languageId: identifier for the language
|
|
1427
1432
|
:type languageId: string
|
|
@@ -1515,7 +1520,7 @@ def language_name(languageId = None, scriptId = None, territoryId = None, langua
|
|
|
1515
1520
|
territoryIdQuery=territoryIdQuery)
|
|
1516
1521
|
|
|
1517
1522
|
def _language_name(languageId = None, scriptId = None, territoryId = None, languageIdQuery = None, scriptIdQuery = None, territoryIdQuery = None, fallback=True):
|
|
1518
|
-
|
|
1523
|
+
'''Internal function to query translations of language names
|
|
1519
1524
|
|
|
1520
1525
|
:param languageId: identifier for the language
|
|
1521
1526
|
:type languageId: string
|
|
@@ -1627,7 +1632,7 @@ def _language_name(languageId = None, scriptId = None, territoryId = None, langu
|
|
|
1627
1632
|
icuLocaleIdQuery = languageIdQuery
|
|
1628
1633
|
if icuLocaleIdQuery in _languages_db[icuLocaleId].names:
|
|
1629
1634
|
return _languages_db[icuLocaleId].names[icuLocaleIdQuery]
|
|
1630
|
-
if
|
|
1635
|
+
if locale.variant not in ('VALENCIA',):
|
|
1631
1636
|
# Don’t do this if locale variant is VALENCIA
|
|
1632
1637
|
# because then this will run into endless recursion:
|
|
1633
1638
|
lname = language_name(languageId=languageId,
|
|
@@ -1693,7 +1698,7 @@ def _timezone_name_from_id_parts(timezoneId = None, icuLocaleIdQuery = None):
|
|
|
1693
1698
|
name = timezoneId_part.replace('_', ' ')
|
|
1694
1699
|
part_names.append(name)
|
|
1695
1700
|
if len(part_names) == len(timezoneId_parts):
|
|
1696
|
-
return
|
|
1701
|
+
return '/'.join(part_names)
|
|
1697
1702
|
return ''
|
|
1698
1703
|
|
|
1699
1704
|
def _timezone_name(timezoneId = None, icuLocaleIdQuery = None):
|
|
@@ -1712,7 +1717,7 @@ def _timezone_name(timezoneId = None, icuLocaleIdQuery = None):
|
|
|
1712
1717
|
return ''
|
|
1713
1718
|
|
|
1714
1719
|
def timezone_name(timezoneId = None, languageIdQuery = None, scriptIdQuery = None, territoryIdQuery = None):
|
|
1715
|
-
|
|
1720
|
+
'''Query translations of timezone IDs
|
|
1716
1721
|
|
|
1717
1722
|
:param timezoneId: identifier for the time zone
|
|
1718
1723
|
:type timezoneId: string
|
|
@@ -1767,7 +1772,7 @@ def timezone_name(timezoneId = None, languageIdQuery = None, scriptIdQuery = Non
|
|
|
1767
1772
|
return name
|
|
1768
1773
|
return timezoneId
|
|
1769
1774
|
|
|
1770
|
-
def territoryId(territoryName =
|
|
1775
|
+
def territoryId(territoryName = ''):
|
|
1771
1776
|
'''Query the territoryId from a translated name of a territory.
|
|
1772
1777
|
|
|
1773
1778
|
:param territoryName: the translated name of a language
|
|
@@ -1800,15 +1805,15 @@ def territoryId(territoryName = u''):
|
|
|
1800
1805
|
'''
|
|
1801
1806
|
if not territoryName:
|
|
1802
1807
|
return ''
|
|
1803
|
-
if
|
|
1808
|
+
if not isinstance(territoryName, str):
|
|
1804
1809
|
territoryName = territoryName.decode('UTF-8')
|
|
1805
|
-
for territoryId in _territories_db:
|
|
1806
|
-
for
|
|
1807
|
-
if territoryName ==
|
|
1810
|
+
for territoryId, territory_item in _territories_db.items():
|
|
1811
|
+
for name in territory_item.names.values():
|
|
1812
|
+
if territoryName == name:
|
|
1808
1813
|
return territoryId
|
|
1809
1814
|
return ''
|
|
1810
1815
|
|
|
1811
|
-
def languageId(languageName =
|
|
1816
|
+
def languageId(languageName = ''):
|
|
1812
1817
|
'''Query the languageId from a translated name of a language.
|
|
1813
1818
|
|
|
1814
1819
|
:param languageName: the translated name of a language
|
|
@@ -1835,11 +1840,11 @@ def languageId(languageName = u''):
|
|
|
1835
1840
|
'''
|
|
1836
1841
|
if not languageName:
|
|
1837
1842
|
return ''
|
|
1838
|
-
if
|
|
1843
|
+
if not isinstance(languageName, str):
|
|
1839
1844
|
languageName = languageName.decode('UTF-8')
|
|
1840
|
-
for languageId in _languages_db:
|
|
1841
|
-
for
|
|
1842
|
-
if languageName.lower() ==
|
|
1845
|
+
for languageId, language_item in _languages_db.items():
|
|
1846
|
+
for name in language_item.names.values():
|
|
1847
|
+
if languageName.lower() == name.lower():
|
|
1843
1848
|
return languageId
|
|
1844
1849
|
language_territory_pattern = re.compile(
|
|
1845
1850
|
r'^(?P<language_name>[^()]+)[\s]+[(](?P<territory_name>[^()]+)[)]',
|
|
@@ -1848,12 +1853,12 @@ def languageId(languageName = u''):
|
|
|
1848
1853
|
if match:
|
|
1849
1854
|
language_name = match.group('language_name')
|
|
1850
1855
|
territory_name = match.group('territory_name')
|
|
1851
|
-
for languageId in _languages_db:
|
|
1852
|
-
for
|
|
1853
|
-
if language_name.lower() ==
|
|
1854
|
-
for territoryId in _territories_db:
|
|
1855
|
-
for
|
|
1856
|
-
if territory_name.lower() ==
|
|
1856
|
+
for languageId, language_item in _languages_db.items():
|
|
1857
|
+
for language_item_name in language_item.names.values():
|
|
1858
|
+
if language_name.lower() == language_item_name.lower():
|
|
1859
|
+
for territoryId, territory_item in _territories_db.items():
|
|
1860
|
+
for territory_item_name in territory_item.names.values():
|
|
1861
|
+
if territory_name.lower() == territory_item_name.lower():
|
|
1857
1862
|
return languageId+'_'+territoryId
|
|
1858
1863
|
|
|
1859
1864
|
return ''
|
|
@@ -1940,8 +1945,7 @@ def list_locales(concise=True, show_weights=False, languageId = None, scriptId =
|
|
|
1940
1945
|
ranked_list = _make_ranked_list_concise(ranked_list)
|
|
1941
1946
|
if show_weights:
|
|
1942
1947
|
return ranked_list
|
|
1943
|
-
|
|
1944
|
-
return _ranked_list_to_list(ranked_list)
|
|
1948
|
+
return _ranked_list_to_list(ranked_list)
|
|
1945
1949
|
|
|
1946
1950
|
def list_common_languages():
|
|
1947
1951
|
'''List common languages
|
|
@@ -1959,7 +1963,7 @@ def list_common_languages():
|
|
|
1959
1963
|
|
|
1960
1964
|
'''
|
|
1961
1965
|
|
|
1962
|
-
common_locales =
|
|
1966
|
+
common_locales = []
|
|
1963
1967
|
common_locales.append("ar_EG.UTF-8")
|
|
1964
1968
|
common_locales.append("en_US.UTF-8")
|
|
1965
1969
|
common_locales.append("fr_FR.UTF-8")
|
|
@@ -2067,8 +2071,7 @@ def list_scripts(concise=True, show_weights=False, languageId = None, scriptId =
|
|
|
2067
2071
|
ranked_list = _make_ranked_list_concise(ranked_list)
|
|
2068
2072
|
if show_weights:
|
|
2069
2073
|
return ranked_list
|
|
2070
|
-
|
|
2071
|
-
return _ranked_list_to_list(ranked_list)
|
|
2074
|
+
return _ranked_list_to_list(ranked_list)
|
|
2072
2075
|
|
|
2073
2076
|
def list_inputmethods(concise=True, show_weights=False, languageId = None, scriptId = None, territoryId = None):
|
|
2074
2077
|
'''List suitable input methods
|
|
@@ -2143,8 +2146,7 @@ def list_inputmethods(concise=True, show_weights=False, languageId = None, scrip
|
|
|
2143
2146
|
ranked_list = _make_ranked_list_concise(ranked_list)
|
|
2144
2147
|
if show_weights:
|
|
2145
2148
|
return ranked_list
|
|
2146
|
-
|
|
2147
|
-
return _ranked_list_to_list(ranked_list)
|
|
2149
|
+
return _ranked_list_to_list(ranked_list)
|
|
2148
2150
|
|
|
2149
2151
|
def list_keyboards(concise=True, show_weights=False, languageId = None, scriptId = None, territoryId = None):
|
|
2150
2152
|
'''List likely X11 keyboard layouts
|
|
@@ -2220,10 +2222,10 @@ def list_keyboards(concise=True, show_weights=False, languageId = None, scriptId
|
|
|
2220
2222
|
ranked_list = _make_ranked_list_concise(ranked_list)
|
|
2221
2223
|
if show_weights:
|
|
2222
2224
|
return ranked_list
|
|
2223
|
-
|
|
2224
|
-
return _ranked_list_to_list(ranked_list)
|
|
2225
|
+
return _ranked_list_to_list(ranked_list)
|
|
2225
2226
|
|
|
2226
2227
|
def list_common_keyboards(languageId = None, scriptId = None, territoryId = None):
|
|
2228
|
+
# pylint: disable=line-too-long
|
|
2227
2229
|
'''Returns highest ranked keyboard layout(s)
|
|
2228
2230
|
2
|
|
2229
2231
|
:param languageId: identifier for the language
|
|
@@ -2238,7 +2240,7 @@ def list_common_keyboards(languageId = None, scriptId = None, territoryId = None
|
|
|
2238
2240
|
**Examples:**
|
|
2239
2241
|
|
|
2240
2242
|
>>> list_common_keyboards()
|
|
2241
|
-
['af(ps)', 'al', 'am', 'ara', 'az', 'ba', 'be(oss)', 'bg', 'br', 'bt', 'by', 'ca(eng)', 'ca(ike)', 'ch', 'cn', 'cn(ug)', 'cz', 'de(nodeadkeys)', 'dk', 'ee', 'es', 'es(ast)', 'es(cat)', 'et', 'fi', 'fo', 'fr(bre)', 'fr(oss)', 'gb', 'ge', 'gr', 'hr', 'hu', 'ie(CloGaelach)', 'il', 'in(eng)', 'ir', 'is', 'it', 'jp', 'ke', 'kg', 'kh', 'kr', 'kz', 'la', 'latam', 'lt', 'lv', 'ma(tifinagh)', 'mk', 'mm', 'mn', 'mt', 'mv', 'ng(hausa)', 'ng(igbo)', 'ng(yoruba)', 'no', 'np', 'ph', 'pk', 'pl', 'ro', 'rs', 'rs(latin)', 'ru', 'ru(bak)', 'ru(chm)', 'ru(cv)', 'ru(kom)', 'ru(os_winkeys)', 'ru(sah)', 'ru(tt)', 'ru(udm)', 'ru(xal)', 'se', 'si', 'sk', 'sn', 'syc', 'th', 'tj', 'tm', 'tr', 'tr(crh)', 'tr(ku)', 'tw', 'ua', 'us', 'us(altgr-intl)', 'us(euro)', 'us(intl)', 'uz', 'vn', 'za']
|
|
2243
|
+
['af(ps)', 'al', 'am', 'ara', 'au', 'az', 'ba', 'be(oss)', 'bg', 'br', 'bt', 'by', 'ca(eng)', 'ca(ike)', 'ch', 'cn', 'cn(ug)', 'cz', 'de(nodeadkeys)', 'dk', 'ee', 'es', 'es(ast)', 'es(cat)', 'et', 'fi', 'fo', 'fr(bre)', 'fr(oss)', 'gb', 'ge', 'gr', 'hr', 'hu', 'ie(CloGaelach)', 'il', 'in(eng)', 'ir', 'is', 'it', 'jp', 'ke', 'kg', 'kh', 'kr', 'kz', 'la', 'latam', 'lt', 'lv', 'ma(tifinagh)', 'mk', 'mm', 'mn', 'mt', 'mv', 'ng', 'ng(hausa)', 'ng(igbo)', 'ng(yoruba)', 'no', 'np', 'ph', 'pk', 'pl', 'pt', 'ro', 'rs', 'rs(latin)', 'ru', 'ru(bak)', 'ru(chm)', 'ru(cv)', 'ru(kom)', 'ru(os_winkeys)', 'ru(sah)', 'ru(tt)', 'ru(udm)', 'ru(xal)', 'se', 'si', 'sk', 'sn', 'syc', 'th', 'tj', 'tm', 'tr', 'tr(crh)', 'tr(ku)', 'tw', 'ua', 'us', 'us(altgr-intl)', 'us(euro)', 'us(intl)', 'uz', 'vn', 'za']
|
|
2242
2244
|
>>> list_common_keyboards(languageId='fr')
|
|
2243
2245
|
['fr(oss)']
|
|
2244
2246
|
>>> list_common_keyboards(territoryId='CA')
|
|
@@ -2256,7 +2258,8 @@ def list_common_keyboards(languageId = None, scriptId = None, territoryId = None
|
|
|
2256
2258
|
>>> list_common_keyboards(languageId='zh', scriptId='Hans', territoryId='TW')
|
|
2257
2259
|
['tw']
|
|
2258
2260
|
'''
|
|
2259
|
-
|
|
2261
|
+
# pylint: enable=line-too-long
|
|
2262
|
+
high_ranked_keyboards = []
|
|
2260
2263
|
if not languageId and not scriptId and not territoryId:
|
|
2261
2264
|
for _, language in _languages_db.items():
|
|
2262
2265
|
keyboard_layouts = language.keyboards
|
|
@@ -2266,16 +2269,16 @@ def list_common_keyboards(languageId = None, scriptId = None, territoryId = None
|
|
|
2266
2269
|
high_ranked_keyboards.extend(selected_layouts)
|
|
2267
2270
|
high_ranked_keyboards = list(set(high_ranked_keyboards))
|
|
2268
2271
|
|
|
2269
|
-
kwargs =
|
|
2272
|
+
kwargs = {}
|
|
2270
2273
|
locale = _parse_and_split_languageId(
|
|
2271
2274
|
languageId=languageId, scriptId=scriptId, territoryId=territoryId
|
|
2272
2275
|
)
|
|
2273
2276
|
if locale.language:
|
|
2274
|
-
kwargs.update(
|
|
2277
|
+
kwargs.update({'languageId': locale.language})
|
|
2275
2278
|
if locale.script:
|
|
2276
|
-
kwargs.update(
|
|
2279
|
+
kwargs.update({'scriptId': locale.script})
|
|
2277
2280
|
if locale.territory:
|
|
2278
|
-
kwargs.update(
|
|
2281
|
+
kwargs.update({'territoryId': locale.territory})
|
|
2279
2282
|
common_layouts = list_keyboards(**kwargs)
|
|
2280
2283
|
if common_layouts:
|
|
2281
2284
|
# Picking up first layout from the list
|
|
@@ -2330,7 +2333,7 @@ def list_common_locales(languageId = None, scriptId = None, territoryId = None):
|
|
|
2330
2333
|
>>> list_common_locales(languageId='zh', territoryId='TW')
|
|
2331
2334
|
['zh_TW.UTF-8']
|
|
2332
2335
|
'''
|
|
2333
|
-
high_ranked_locales =
|
|
2336
|
+
high_ranked_locales = []
|
|
2334
2337
|
if not languageId and not scriptId and not territoryId:
|
|
2335
2338
|
for language in list_common_languages():
|
|
2336
2339
|
locales = _languages_db[language].locales
|
|
@@ -2342,16 +2345,16 @@ def list_common_locales(languageId = None, scriptId = None, territoryId = None):
|
|
|
2342
2345
|
high_ranked_locales.extend(selected_locales)
|
|
2343
2346
|
return high_ranked_locales
|
|
2344
2347
|
|
|
2345
|
-
kwargs =
|
|
2348
|
+
kwargs = {}
|
|
2346
2349
|
locale = _parse_and_split_languageId(
|
|
2347
2350
|
languageId=languageId, scriptId=scriptId, territoryId=territoryId
|
|
2348
2351
|
)
|
|
2349
2352
|
if locale.language:
|
|
2350
|
-
kwargs.update(
|
|
2353
|
+
kwargs.update({'languageId': locale.language})
|
|
2351
2354
|
if locale.script:
|
|
2352
|
-
kwargs.update(
|
|
2355
|
+
kwargs.update({'scriptId': locale.script})
|
|
2353
2356
|
if locale.territory:
|
|
2354
|
-
kwargs.update(
|
|
2357
|
+
kwargs.update({'territoryId': locale.territory})
|
|
2355
2358
|
common_locales = list_locales(**kwargs)
|
|
2356
2359
|
if common_locales:
|
|
2357
2360
|
# Picking up first locale from the list
|
|
@@ -2359,7 +2362,7 @@ def list_common_locales(languageId = None, scriptId = None, territoryId = None):
|
|
|
2359
2362
|
return high_ranked_locales
|
|
2360
2363
|
|
|
2361
2364
|
def list_consolefonts(concise=True, show_weights=False, languageId = None, scriptId = None, territoryId = None):
|
|
2362
|
-
|
|
2365
|
+
'''List likely Linux Console fonts
|
|
2363
2366
|
|
|
2364
2367
|
:param concise: if True, return only to highly ranked results
|
|
2365
2368
|
:type concise: boolean
|
|
@@ -2454,8 +2457,7 @@ def list_consolefonts(concise=True, show_weights=False, languageId = None, scrip
|
|
|
2454
2457
|
ranked_list = _make_ranked_list_concise(ranked_list)
|
|
2455
2458
|
if show_weights:
|
|
2456
2459
|
return ranked_list
|
|
2457
|
-
|
|
2458
|
-
return _ranked_list_to_list(ranked_list)
|
|
2460
|
+
return _ranked_list_to_list(ranked_list)
|
|
2459
2461
|
|
|
2460
2462
|
def list_timezones(concise=True, show_weights=False, languageId = None, scriptId = None, territoryId = None):
|
|
2461
2463
|
'''List likely timezones
|
|
@@ -2535,8 +2537,7 @@ def list_timezones(concise=True, show_weights=False, languageId = None, scriptId
|
|
|
2535
2537
|
ranked_list = _make_ranked_list_concise(ranked_list)
|
|
2536
2538
|
if show_weights:
|
|
2537
2539
|
return ranked_list
|
|
2538
|
-
|
|
2539
|
-
return _ranked_list_to_list(ranked_list)
|
|
2540
|
+
return _ranked_list_to_list(ranked_list)
|
|
2540
2541
|
|
|
2541
2542
|
def list_all_languages() -> List[str]:
|
|
2542
2543
|
'''
|
|
@@ -2640,22 +2641,23 @@ def version():
|
|
|
2640
2641
|
Return version of langtable
|
|
2641
2642
|
'''
|
|
2642
2643
|
# pkg_resources is part of setuptools
|
|
2643
|
-
import pkg_resources # type: ignore
|
|
2644
|
+
import pkg_resources # type: ignore pylint: disable=import-outside-toplevel
|
|
2644
2645
|
return pkg_resources.require("langtable")[0].version
|
|
2645
2646
|
|
|
2646
2647
|
def info():
|
|
2647
2648
|
'''
|
|
2648
2649
|
Print some info about langtable
|
|
2649
2650
|
'''
|
|
2650
|
-
|
|
2651
|
+
# pkg_resources is part of setuptools
|
|
2652
|
+
import pkg_resources # type: ignore pylint: disable=import-outside-toplevel
|
|
2651
2653
|
project_name = pkg_resources.require("langtable")[0].project_name
|
|
2652
2654
|
version = pkg_resources.require("langtable")[0].version
|
|
2653
2655
|
module_path = pkg_resources.require("langtable")[0].module_path
|
|
2654
|
-
print('Project name: =
|
|
2655
|
-
print('Version: =
|
|
2656
|
-
print('Module path: =
|
|
2657
|
-
print('Loaded from:
|
|
2658
|
-
print('Data files read:
|
|
2656
|
+
print(f'Project name: = {project_name}')
|
|
2657
|
+
print(f'Version: = {version}')
|
|
2658
|
+
print(f'Module path: = {module_path}')
|
|
2659
|
+
print(f'Loaded from: {os.path.realpath(__file__)}')
|
|
2660
|
+
print(f'Data files read: {_INFO["data_files_read"]}')
|
|
2659
2661
|
|
|
2660
2662
|
def _test_cldr_locale_pattern(localeId):
|
|
2661
2663
|
'''
|
|
@@ -2664,8 +2666,7 @@ def _test_cldr_locale_pattern(localeId):
|
|
|
2664
2666
|
match = _cldr_locale_pattern.match(localeId)
|
|
2665
2667
|
if match:
|
|
2666
2668
|
return [('language', match.group('language')), ('script', match.group('script')), ('territory', match.group('territory'))]
|
|
2667
|
-
|
|
2668
|
-
return []
|
|
2669
|
+
return []
|
|
2669
2670
|
|
|
2670
2671
|
def _test_language_territory(show_weights=False, languageId=None, scriptId=None, territoryId=None):
|
|
2671
2672
|
'''
|
|
@@ -2689,9 +2690,8 @@ def _test_language_territory(show_weights=False, languageId=None, scriptId=None,
|
|
|
2689
2690
|
+" +: "
|
|
2690
2691
|
+repr(list_keyboards(show_weights=show_weights,languageId=languageId,scriptId=scriptId,territoryId=territoryId))
|
|
2691
2692
|
)
|
|
2692
|
-
return
|
|
2693
2693
|
|
|
2694
|
-
def _init(debug=False, logfilename='/dev/null'):
|
|
2694
|
+
def _init(debug=False, logfilename='/dev/null') -> None:
|
|
2695
2695
|
|
|
2696
2696
|
log_level = logging.INFO
|
|
2697
2697
|
if debug:
|
|
@@ -2707,12 +2707,13 @@ def _init(debug=False, logfilename='/dev/null'):
|
|
|
2707
2707
|
_read_file('timezones.xml', TimezonesContentHandler())
|
|
2708
2708
|
_read_file('timezoneidparts.xml', TimezoneIdPartsContentHandler())
|
|
2709
2709
|
|
|
2710
|
-
|
|
2711
|
-
|
|
2710
|
+
# pylint: enable=invalid-name
|
|
2711
|
+
|
|
2712
|
+
class __ModuleInitializer: # pylint: disable=too-few-public-methods,invalid-name
|
|
2713
|
+
def __init__(self) -> None:
|
|
2712
2714
|
_init()
|
|
2713
|
-
return
|
|
2714
2715
|
|
|
2715
|
-
def __del__(self):
|
|
2716
|
+
def __del__(self) -> None:
|
|
2716
2717
|
return
|
|
2717
2718
|
|
|
2718
2719
|
__module_init = __ModuleInitializer()
|
|
@@ -2725,5 +2726,5 @@ if __name__ == "__main__":
|
|
|
2725
2726
|
print(f'{ATTEMPTED} tests run. {ATTEMPTED - FAILED} passed and {FAILED} failed.')
|
|
2726
2727
|
if FAILED:
|
|
2727
2728
|
sys.exit(FAILED)
|
|
2728
|
-
print(
|
|
2729
|
+
print('All tests passed.')
|
|
2729
2730
|
sys.exit(0)
|
|
@@ -1,17 +1,17 @@
|
|
|
1
1
|
langtable/__init__.py,sha256=09oRQxG7NrxWL_v5bKHHCt-DFuK-WIIKmyLp0RtS84M,526
|
|
2
|
-
langtable/langtable.py,sha256=
|
|
2
|
+
langtable/langtable.py,sha256=cItWXxnagK7FohpHYf3Gd4FxsWvKjDLVrb3_NfbyJJI,107991
|
|
3
3
|
langtable/data/keyboards.xml.gz,sha256=SiBNgt_lJ6qiXM_Ni1fElNRSqnaCRBhqhKSn2FEAoWg,5888
|
|
4
|
-
langtable/data/languages.xml.gz,sha256=
|
|
5
|
-
langtable/data/territories.xml.gz,sha256=
|
|
6
|
-
langtable/data/timezoneidparts.xml.gz,sha256=
|
|
4
|
+
langtable/data/languages.xml.gz,sha256=DvwZphkDEgLQW_DlEZErDSA99O0Gw2Iu9vFAwTQnqu8,466823
|
|
5
|
+
langtable/data/territories.xml.gz,sha256=oySCvcwY02ouT-_AN4BchDEH_RijetCzeA2Ss0UXIQA,522833
|
|
6
|
+
langtable/data/timezoneidparts.xml.gz,sha256=hmBEL8dbYVqna710eqe6eBNTIP9JOTSVWlJ36dRUGno,401034
|
|
7
7
|
langtable/data/timezones.xml.gz,sha256=UJ6YNPOKG5yp2hEgbEK_4yWsnmrbDhHZAr6ZL-Wr0Fw,3542
|
|
8
8
|
langtable/schemas/keyboards.rng,sha256=Gy8zXngR6VZ6QXGrsp4u7Aolpk-VkWTJ_8kTlT-a9ss,1301
|
|
9
9
|
langtable/schemas/languages.rng,sha256=rDTnJ-yPK7CgwQCjVMhLMCf0xzGpG2cnx0iIn85f0Po,4153
|
|
10
10
|
langtable/schemas/territories.rng,sha256=KnDf45rlNpmsDTmzND0WDDXGNy08N1fx0reRjKUS_SQ,3482
|
|
11
11
|
langtable/schemas/timezoneidparts.rng,sha256=--LNbran9DrHSDQSZMqxYjermlPRo39GZwLtR-zF_Ug,683
|
|
12
12
|
langtable/schemas/timezones.rng,sha256=l2sVJPyo48ESnNaTr1QHgv5fV26cIJZ_XxElumAc3F0,665
|
|
13
|
-
langtable-0.0.
|
|
14
|
-
langtable-0.0.
|
|
15
|
-
langtable-0.0.
|
|
16
|
-
langtable-0.0.
|
|
17
|
-
langtable-0.0.
|
|
13
|
+
langtable-0.0.68.dist-info/COPYING,sha256=jOtLnuWt7d5Hsx6XXB2QxzrSe2sWWh3NgMfFRetluQM,35147
|
|
14
|
+
langtable-0.0.68.dist-info/METADATA,sha256=MfJiUiOFbhLkjIqAJ1JFJH1DWu5gty1P54udCK6QJkE,2622
|
|
15
|
+
langtable-0.0.68.dist-info/WHEEL,sha256=yQN5g4mg4AybRjkgi-9yy4iQEFibGQmlz78Pik5Or-A,92
|
|
16
|
+
langtable-0.0.68.dist-info/top_level.txt,sha256=vFuD4S3tAk7ZEIOqrR5tqqm-ThvhpwXxYUMxZdXY1-0,10
|
|
17
|
+
langtable-0.0.68.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|