ethnidata 3.1.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
ethnidata/__init__.py ADDED
@@ -0,0 +1,61 @@
1
+ """
2
+ EthniData v3.0.1 - ULTRA MASSIVE EXPANSION! Global Demographics Prediction
3
+ Predict nationality, ethnicity, gender, region, language AND religion!
4
+
5
+ 🔥 NEW in v3.0.1 - COMPLETE RELIGIOUS COVERAGE:
6
+ - 📊 **5.9M+ records** (14x increase from v2.0.0 - 1,326% growth!)
7
+ - 🌍 **238 countries** - complete global coverage
8
+ - 🗣️ **72 languages**
9
+ - 🕌 **ALL 6 MAJOR WORLD RELIGIONS** - Complete coverage:
10
+ - Christianity: 3.9M+ records (65.2%)
11
+ - Buddhism: 1.3M+ records (22.1%)
12
+ - Islam: 504K+ records (8.5%)
13
+ - Judaism: 121K+ records (2.0%) ✡️
14
+ - Hinduism: 90K+ records (1.5%)
15
+ - Sikhism: 24K+ records (0.4%) 🪯 NEW!
16
+ - 🌎 **Perfectly balanced regional distribution**:
17
+ - Asia: 33% • Americas: 32% • Africa: 30% • Europe: 3% • Oceania: 0.1%
18
+
19
+ Features:
20
+ - ✅ Nationality prediction (238 countries)
21
+ - ✅ Religion prediction (6 major world religions)
22
+ - ✅ Gender prediction
23
+ - ✅ Region prediction (5 continents)
24
+ - ✅ Language prediction (72 languages)
25
+ - ✅ Ethnicity prediction
26
+ - ✅ Full name analysis
27
+
28
+ Usage:
29
+ from ethnidata import EthniData
30
+
31
+ ed = EthniData()
32
+
33
+ # Nationality
34
+ result = ed.predict_nationality("Ahmet")
35
+
36
+ # Religion (NOW WITH 6 RELIGIONS!)
37
+ result = ed.predict_religion("Muhammad") # Islam
38
+ result = ed.predict_religion("Cohen") # Judaism
39
+ result = ed.predict_religion("Singh") # Sikhism
40
+
41
+ # Gender
42
+ result = ed.predict_gender("Emma")
43
+
44
+ # Region
45
+ result = ed.predict_region("Chen")
46
+
47
+ # Language
48
+ result = ed.predict_language("José")
49
+
50
+ # ALL at once
51
+ result = ed.predict_all("Maria")
52
+ # Returns: nationality, religion, gender, region, language, ethnicity
53
+ """
54
+
55
+ __version__ = "3.1.0"
56
+ __author__ = "Teyfik Oz"
57
+ __license__ = "MIT"
58
+
59
+ from .predictor import EthniData
60
+
61
+ __all__ = ["EthniData"]
@@ -0,0 +1,143 @@
1
+ """
2
+ Database downloader for EthniData
3
+ Downloads the full v3.0.0 database (5.8M records) on first use
4
+ """
5
+
6
+ import os
7
+ import urllib.request
8
+ import shutil
9
+ from pathlib import Path
10
+ from typing import Optional
11
+
12
+ # Database versions and URLs
13
+ DATABASES = {
14
+ 'v2.0.0': {
15
+ 'url': 'https://github.com/teyfikoz/ethnidata/releases/download/v2.0.0/ethnidata_v2.db',
16
+ 'size': '75 MB',
17
+ 'records': '415K',
18
+ 'filename': 'ethnidata.db'
19
+ },
20
+ 'v3.0.0': {
21
+ 'url': 'https://github.com/teyfikoz/ethnidata/releases/download/v3.0.0/ethnidata_v3.db',
22
+ 'size': '1.1 GB',
23
+ 'records': '5.8M',
24
+ 'filename': 'ethnidata_v3.db'
25
+ }
26
+ }
27
+
28
+ DEFAULT_VERSION = 'v2.0.0' # Included in package
29
+ FULL_VERSION = 'v3.0.0' # Downloaded on demand
30
+
31
+ class DatabaseDownloader:
32
+ """Handles database downloads"""
33
+
34
+ def __init__(self, package_dir: Path):
35
+ self.package_dir = package_dir
36
+ self.db_path = package_dir / "ethnidata.db"
37
+ self.v3_path = package_dir / "ethnidata_v3.db"
38
+
39
+ def check_database(self, version: str = DEFAULT_VERSION) -> bool:
40
+ """Check if database exists"""
41
+ if version == 'v2.0.0':
42
+ return self.db_path.exists()
43
+ elif version == 'v3.0.0':
44
+ return self.v3_path.exists()
45
+ return False
46
+
47
+ def download_database(self, version: str = FULL_VERSION, force: bool = False) -> str:
48
+ """
49
+ Download database if not exists
50
+
51
+ Args:
52
+ version: Database version to download ('v2.0.0' or 'v3.0.0')
53
+ force: Force download even if exists
54
+
55
+ Returns:
56
+ Path to database file
57
+ """
58
+ if version not in DATABASES:
59
+ raise ValueError(f"Unknown version: {version}. Available: {list(DATABASES.keys())}")
60
+
61
+ db_info = DATABASES[version]
62
+ target_path = self.v3_path if version == 'v3.0.0' else self.db_path
63
+
64
+ # Check if already exists
65
+ if target_path.exists() and not force:
66
+ print(f"✅ Database {version} already exists ({db_info['records']} records)")
67
+ return str(target_path)
68
+
69
+ print(f"\n📥 Downloading EthniData {version} database...")
70
+ print(f" Records: {db_info['records']}")
71
+ print(f" Size: {db_info['size']}")
72
+ print(f" This may take a few minutes...")
73
+
74
+ try:
75
+ # Download with progress
76
+ def report_progress(block_num, block_size, total_size):
77
+ downloaded = block_num * block_size
78
+ percent = min(downloaded * 100 / total_size, 100)
79
+ print(f"\r Progress: {percent:.1f}%", end='', flush=True)
80
+
81
+ urllib.request.urlretrieve(
82
+ db_info['url'],
83
+ target_path,
84
+ reporthook=report_progress
85
+ )
86
+ print(f"\n✅ Download complete: {target_path}")
87
+ return str(target_path)
88
+
89
+ except Exception as e:
90
+ print(f"\n❌ Download failed: {e}")
91
+ print(f"\n💡 You can manually download from:")
92
+ print(f" {db_info['url']}")
93
+ print(f" And save it as: {target_path}")
94
+ raise
95
+
96
+ def get_database_path(self, prefer_v3: bool = False) -> str:
97
+ """
98
+ Get database path, downloading if necessary
99
+
100
+ Args:
101
+ prefer_v3: If True, use v3.0.0 (5.8M records) instead of v2.0.0 (415K records)
102
+
103
+ Returns:
104
+ Path to database file
105
+ """
106
+ if prefer_v3:
107
+ # Try to use v3, download if not exists
108
+ if not self.v3_path.exists():
109
+ print(f"\n🚀 EthniData v3.0.0 offers 14x more data (5.8M vs 415K records)!")
110
+ print(f" Would you like to download it? ({DATABASES['v3.0.0']['size']})")
111
+ response = input(" Download v3.0.0? [y/N]: ").strip().lower()
112
+
113
+ if response in ['y', 'yes']:
114
+ return self.download_database('v3.0.0')
115
+ else:
116
+ print(f" Using v2.0.0 ({DATABASES['v2.0.0']['records']} records)")
117
+ return str(self.db_path)
118
+ return str(self.v3_path)
119
+ else:
120
+ # Use v2 (included in package)
121
+ if not self.db_path.exists():
122
+ raise FileNotFoundError(
123
+ f"Database not found at {self.db_path}. "
124
+ f"Please reinstall: pip install --upgrade --force-reinstall ethnidata"
125
+ )
126
+ return str(self.db_path)
127
+
128
+
129
+ def download_v3_database(package_dir: Optional[Path] = None) -> str:
130
+ """
131
+ Convenience function to download v3.0.0 database
132
+
133
+ Args:
134
+ package_dir: Package directory (auto-detected if None)
135
+
136
+ Returns:
137
+ Path to downloaded database
138
+ """
139
+ if package_dir is None:
140
+ package_dir = Path(__file__).parent
141
+
142
+ downloader = DatabaseDownloader(package_dir)
143
+ return downloader.download_database('v3.0.0')
ethnidata/ethnidata.db ADDED
Binary file
ethnidata/predictor.py ADDED
@@ -0,0 +1,560 @@
1
+ """
2
+ EthniData Predictor v2.0 - Gelişmiş özelliklerle
3
+ Yeni özellikler:
4
+ - Gender prediction (Cinsiyet tahmini)
5
+ - Region prediction (Bölge: Europe, Asia, Americas, Africa, Oceania)
6
+ - Language prediction (Yaygın dil tahmini)
7
+ """
8
+
9
+ import sqlite3
10
+ from pathlib import Path
11
+ from typing import Dict, List, Optional, Literal
12
+ from unidecode import unidecode
13
+ import pycountry
14
+
15
+ class EthniData:
16
+ """Ethnicity, Nationality, Gender, Region and Language predictor"""
17
+
18
+ def __init__(self, db_path: Optional[str] = None, use_v3: bool = False):
19
+ """
20
+ Initialize EthniData predictor
21
+
22
+ Args:
23
+ db_path: Path to SQLite database. If None, uses default location.
24
+ use_v3: If True, attempts to use v3.0.0 database (5.8M records).
25
+ If False, uses v2.0.0 database (415K records, included in package).
26
+ """
27
+ if db_path is None:
28
+ package_dir = Path(__file__).parent
29
+
30
+ if use_v3:
31
+ # Try to use v3 database
32
+ v3_path = package_dir / "ethnidata_v3.db"
33
+ if v3_path.exists():
34
+ db_path = v3_path
35
+ else:
36
+ print(f"\n💡 EthniData v3.0.0 (5.8M records) is not installed.")
37
+ print(f" To download: from ethnidata.downloader import download_v3_database")
38
+ print(f" download_v3_database()")
39
+ print(f"\n Using v2.0.0 (415K records) for now...")
40
+ db_path = package_dir / "ethnidata.db"
41
+ else:
42
+ db_path = package_dir / "ethnidata.db"
43
+
44
+ self.db_path = Path(db_path)
45
+
46
+ if not self.db_path.exists():
47
+ raise FileNotFoundError(
48
+ f"Database not found: {self.db_path}\n"
49
+ f"Please reinstall: pip install --upgrade --force-reinstall ethnidata"
50
+ )
51
+
52
+ self.conn = sqlite3.connect(self.db_path)
53
+ self.conn.row_factory = sqlite3.Row
54
+
55
+ def __del__(self):
56
+ """Close database connection"""
57
+ if hasattr(self, 'conn'):
58
+ self.conn.close()
59
+
60
+ @staticmethod
61
+ def normalize_name(name: str) -> str:
62
+ """Normalize name (lowercase, remove accents)"""
63
+ return unidecode(name.strip().lower())
64
+
65
+ def predict_nationality(
66
+ self,
67
+ name: str,
68
+ name_type: Literal["first", "last"] = "first",
69
+ top_n: int = 5
70
+ ) -> Dict:
71
+ """
72
+ Predict nationality from name
73
+
74
+ Args:
75
+ name: First or last name
76
+ name_type: "first" or "last"
77
+ top_n: Number of top predictions
78
+
79
+ Returns:
80
+ {
81
+ 'name': str,
82
+ 'country': str (ISO 3166-1 alpha-3),
83
+ 'country_name': str,
84
+ 'confidence': float (0-1),
85
+ 'region': str, # NEW
86
+ 'language': str, # NEW
87
+ 'top_countries': [...]
88
+ }
89
+ """
90
+
91
+ normalized = self.normalize_name(name)
92
+
93
+ query = """
94
+ SELECT country_code, region, language, COUNT(*) as frequency
95
+ FROM names
96
+ WHERE name = ? AND name_type = ?
97
+ GROUP BY country_code, region, language
98
+ ORDER BY frequency DESC
99
+ LIMIT ?
100
+ """
101
+
102
+ cursor = self.conn.cursor()
103
+ cursor.execute(query, (normalized, name_type, top_n))
104
+ results = cursor.fetchall()
105
+
106
+ if not results:
107
+ return {
108
+ 'name': normalized,
109
+ 'country': None,
110
+ 'country_name': None,
111
+ 'confidence': 0.0,
112
+ 'region': None,
113
+ 'language': None,
114
+ 'top_countries': []
115
+ }
116
+
117
+ # Calculate probabilities
118
+ total_freq = sum(row['frequency'] for row in results)
119
+
120
+ top_countries = []
121
+ for row in results:
122
+ prob = row['frequency'] / total_freq
123
+
124
+ try:
125
+ country = pycountry.countries.get(alpha_3=row['country_code'])
126
+ country_name = country.name if country else row['country_code']
127
+ except:
128
+ country_name = row['country_code']
129
+
130
+ top_countries.append({
131
+ 'country': row['country_code'],
132
+ 'country_name': country_name,
133
+ 'region': row['region'],
134
+ 'language': row['language'],
135
+ 'probability': round(prob, 4),
136
+ 'frequency': row['frequency']
137
+ })
138
+
139
+ top = top_countries[0]
140
+
141
+ return {
142
+ 'name': normalized,
143
+ 'country': top['country'],
144
+ 'country_name': top['country_name'],
145
+ 'confidence': top['probability'],
146
+ 'region': top['region'], # NEW
147
+ 'language': top['language'], # NEW
148
+ 'top_countries': top_countries
149
+ }
150
+
151
+ def predict_gender(
152
+ self,
153
+ name: str
154
+ ) -> Dict:
155
+ """
156
+ Predict gender from first name
157
+
158
+ Args:
159
+ name: First name
160
+
161
+ Returns:
162
+ {
163
+ 'name': str,
164
+ 'gender': str ('M' or 'F' or None),
165
+ 'confidence': float,
166
+ 'distribution': {'M': prob, 'F': prob, None: prob}
167
+ }
168
+ """
169
+
170
+ normalized = self.normalize_name(name)
171
+
172
+ query = """
173
+ SELECT gender, COUNT(*) as count
174
+ FROM names
175
+ WHERE name = ? AND name_type = 'first'
176
+ GROUP BY gender
177
+ """
178
+
179
+ cursor = self.conn.cursor()
180
+ cursor.execute(query, (normalized,))
181
+ results = cursor.fetchall()
182
+
183
+ if not results:
184
+ return {
185
+ 'name': normalized,
186
+ 'gender': None,
187
+ 'confidence': 0.0,
188
+ 'distribution': {}
189
+ }
190
+
191
+ # Count by gender
192
+ gender_counts = {}
193
+ total = 0
194
+
195
+ for row in results:
196
+ gender = row['gender']
197
+ count = row['count']
198
+ gender_counts[gender] = count
199
+ total += count
200
+
201
+ # Calculate probabilities
202
+ distribution = {g: round(c / total, 4) for g, c in gender_counts.items()}
203
+
204
+ # Top gender
205
+ top_gender = max(gender_counts.items(), key=lambda x: x[1])[0]
206
+ confidence = gender_counts[top_gender] / total
207
+
208
+ return {
209
+ 'name': normalized,
210
+ 'gender': top_gender,
211
+ 'confidence': round(confidence, 4),
212
+ 'distribution': distribution
213
+ }
214
+
215
+ def predict_region(
216
+ self,
217
+ name: str,
218
+ name_type: Literal["first", "last"] = "first"
219
+ ) -> Dict:
220
+ """
221
+ Predict geographic region from name
222
+
223
+ Args:
224
+ name: First or last name
225
+ name_type: "first" or "last"
226
+
227
+ Returns:
228
+ {
229
+ 'name': str,
230
+ 'region': str (Europe, Asia, Americas, Africa, Oceania, Other),
231
+ 'confidence': float,
232
+ 'distribution': {region: probability, ...}
233
+ }
234
+ """
235
+
236
+ normalized = self.normalize_name(name)
237
+
238
+ query = """
239
+ SELECT region, COUNT(*) as total_freq
240
+ FROM names
241
+ WHERE name = ? AND name_type = ?
242
+ GROUP BY region
243
+ ORDER BY total_freq DESC
244
+ """
245
+
246
+ cursor = self.conn.cursor()
247
+ cursor.execute(query, (normalized, name_type))
248
+ results = cursor.fetchall()
249
+
250
+ if not results:
251
+ return {
252
+ 'name': normalized,
253
+ 'region': None,
254
+ 'confidence': 0.0,
255
+ 'distribution': {}
256
+ }
257
+
258
+ total = sum(row['total_freq'] for row in results)
259
+
260
+ distribution = {}
261
+ for row in results:
262
+ region = row['region']
263
+ prob = row['total_freq'] / total
264
+ distribution[region] = round(prob, 4)
265
+
266
+ top_region = results[0]['region']
267
+ confidence = results[0]['total_freq'] / total
268
+
269
+ return {
270
+ 'name': normalized,
271
+ 'region': top_region,
272
+ 'confidence': round(confidence, 4),
273
+ 'distribution': distribution
274
+ }
275
+
276
+ def predict_language(
277
+ self,
278
+ name: str,
279
+ name_type: Literal["first", "last"] = "first",
280
+ top_n: int = 5
281
+ ) -> Dict:
282
+ """
283
+ Predict most likely language from name
284
+
285
+ Args:
286
+ name: First or last name
287
+ name_type: "first" or "last"
288
+ top_n: Number of top predictions
289
+
290
+ Returns:
291
+ {
292
+ 'name': str,
293
+ 'language': str,
294
+ 'confidence': float,
295
+ 'top_languages': [{language, probability}, ...]
296
+ }
297
+ """
298
+
299
+ normalized = self.normalize_name(name)
300
+
301
+ query = """
302
+ SELECT language, COUNT(*) as total_freq
303
+ FROM names
304
+ WHERE name = ? AND name_type = ? AND language IS NOT NULL
305
+ GROUP BY language
306
+ ORDER BY total_freq DESC
307
+ LIMIT ?
308
+ """
309
+
310
+ cursor = self.conn.cursor()
311
+ cursor.execute(query, (normalized, name_type, top_n))
312
+ results = cursor.fetchall()
313
+
314
+ if not results:
315
+ return {
316
+ 'name': normalized,
317
+ 'language': None,
318
+ 'confidence': 0.0,
319
+ 'top_languages': []
320
+ }
321
+
322
+ total = sum(row['total_freq'] for row in results)
323
+
324
+ top_languages = []
325
+ for row in results:
326
+ lang = row['language']
327
+ prob = row['total_freq'] / total
328
+ top_languages.append({
329
+ 'language': lang,
330
+ 'probability': round(prob, 4)
331
+ })
332
+
333
+ return {
334
+ 'name': normalized,
335
+ 'language': top_languages[0]['language'],
336
+ 'confidence': top_languages[0]['probability'],
337
+ 'top_languages': top_languages
338
+ }
339
+
340
+ def predict_religion(
341
+ self,
342
+ name: str,
343
+ name_type: Literal["first", "last"] = "first",
344
+ top_n: int = 5
345
+ ) -> Dict:
346
+ """
347
+ Predict religion from name - NEW in v1.3.0!
348
+
349
+ Args:
350
+ name: First or last name
351
+ name_type: "first" or "last"
352
+ top_n: Number of top predictions
353
+
354
+ Returns:
355
+ {
356
+ 'name': str,
357
+ 'religion': str (Christianity, Islam, Hinduism, Buddhism, Judaism),
358
+ 'confidence': float,
359
+ 'top_religions': [{religion, probability}, ...]
360
+ }
361
+ """
362
+
363
+ normalized = self.normalize_name(name)
364
+
365
+ query = """
366
+ SELECT religion, COUNT(*) as total_freq
367
+ FROM names
368
+ WHERE name = ? AND name_type = ? AND religion IS NOT NULL
369
+ GROUP BY religion
370
+ ORDER BY total_freq DESC
371
+ LIMIT ?
372
+ """
373
+
374
+ cursor = self.conn.cursor()
375
+ cursor.execute(query, (normalized, name_type, top_n))
376
+ results = cursor.fetchall()
377
+
378
+ if not results:
379
+ return {
380
+ 'name': normalized,
381
+ 'religion': None,
382
+ 'confidence': 0.0,
383
+ 'top_religions': []
384
+ }
385
+
386
+ total = sum(row['total_freq'] for row in results)
387
+
388
+ top_religions = []
389
+ for row in results:
390
+ religion = row['religion']
391
+ prob = row['total_freq'] / total
392
+ top_religions.append({
393
+ 'religion': religion,
394
+ 'probability': round(prob, 4)
395
+ })
396
+
397
+ return {
398
+ 'name': normalized,
399
+ 'religion': top_religions[0]['religion'],
400
+ 'confidence': top_religions[0]['probability'],
401
+ 'top_religions': top_religions
402
+ }
403
+
404
+ def predict_ethnicity(
405
+ self,
406
+ name: str,
407
+ name_type: Literal["first", "last"] = "first"
408
+ ) -> Dict:
409
+ """Predict ethnicity from name (uses nationality as proxy)"""
410
+
411
+ # Use nationality as ethnicity proxy since we don't have separate ethnicity data
412
+ nationality = self.predict_nationality(name, name_type, top_n=1)
413
+
414
+ return {
415
+ 'name': nationality['name'],
416
+ 'ethnicity': nationality['country_name'], # Use country as ethnicity
417
+ 'country': nationality['country'],
418
+ 'country_name': nationality['country_name'],
419
+ 'region': nationality.get('region'),
420
+ 'language': nationality.get('language'),
421
+ 'confidence': nationality['confidence']
422
+ }
423
+
424
+ def predict_full_name(
425
+ self,
426
+ first_name: str,
427
+ last_name: str,
428
+ top_n: int = 5
429
+ ) -> Dict:
430
+ """
431
+ Predict from full name (first + last) - ENHANCED
432
+
433
+ Returns nationality, region, language
434
+ """
435
+
436
+ first_pred = self.predict_nationality(first_name, "first", top_n=top_n)
437
+ last_pred = self.predict_nationality(last_name, "last", top_n=top_n)
438
+
439
+ # Combine scores
440
+ combined_scores = {}
441
+
442
+ for item in first_pred['top_countries']:
443
+ combined_scores[item['country']] = {
444
+ 'score': item['probability'] * 0.4,
445
+ 'region': item['region'],
446
+ 'language': item['language']
447
+ }
448
+
449
+ for item in last_pred['top_countries']:
450
+ if item['country'] in combined_scores:
451
+ combined_scores[item['country']]['score'] += item['probability'] * 0.6
452
+ else:
453
+ combined_scores[item['country']] = {
454
+ 'score': item['probability'] * 0.6,
455
+ 'region': item['region'],
456
+ 'language': item['language']
457
+ }
458
+
459
+ # Sort
460
+ sorted_countries = sorted(
461
+ combined_scores.items(),
462
+ key=lambda x: x[1]['score'],
463
+ reverse=True
464
+ )[:top_n]
465
+
466
+ # Format
467
+ top_countries = []
468
+ for country_code, data in sorted_countries:
469
+ try:
470
+ country = pycountry.countries.get(alpha_3=country_code)
471
+ country_name = country.name if country else country_code
472
+ except:
473
+ country_name = country_code
474
+
475
+ top_countries.append({
476
+ 'country': country_code,
477
+ 'country_name': country_name,
478
+ 'region': data['region'],
479
+ 'language': data['language'],
480
+ 'probability': round(data['score'], 4)
481
+ })
482
+
483
+ top = top_countries[0] if top_countries else {}
484
+
485
+ return {
486
+ 'first_name': self.normalize_name(first_name),
487
+ 'last_name': self.normalize_name(last_name),
488
+ 'country': top.get('country'),
489
+ 'country_name': top.get('country_name'),
490
+ 'region': top.get('region'), # NEW
491
+ 'language': top.get('language'), # NEW
492
+ 'confidence': top.get('probability', 0.0),
493
+ 'top_countries': top_countries
494
+ }
495
+
496
+ def predict_all(
497
+ self,
498
+ name: str,
499
+ name_type: Literal["first", "last"] = "first"
500
+ ) -> Dict:
501
+ """
502
+ Predict ALL attributes at once - UPDATED v1.3.0
503
+ Now includes: nationality, gender, region, language, religion, ethnicity
504
+
505
+ Args:
506
+ name: First or last name
507
+ name_type: "first" or "last"
508
+
509
+ Returns:
510
+ {
511
+ 'name': str,
512
+ 'nationality': {...},
513
+ 'gender': {...}, # Only for first names
514
+ 'region': {...},
515
+ 'language': {...},
516
+ 'religion': {...}, # NEW in v1.3.0!
517
+ 'ethnicity': {...}
518
+ }
519
+ """
520
+
521
+ normalized = self.normalize_name(name)
522
+
523
+ result = {
524
+ 'name': normalized,
525
+ 'nationality': self.predict_nationality(name, name_type),
526
+ 'region': self.predict_region(name, name_type),
527
+ 'language': self.predict_language(name, name_type),
528
+ 'religion': self.predict_religion(name, name_type), # NEW!
529
+ 'ethnicity': self.predict_ethnicity(name, name_type)
530
+ }
531
+
532
+ # Gender only for first names
533
+ if name_type == "first":
534
+ result['gender'] = self.predict_gender(name)
535
+
536
+ return result
537
+
538
+ def get_stats(self) -> Dict:
539
+ """Get database statistics"""
540
+
541
+ cursor = self.conn.cursor()
542
+
543
+ stats = {}
544
+
545
+ cursor.execute("SELECT COUNT(*) as count FROM names WHERE name_type = 'first'")
546
+ stats['total_first_names'] = cursor.fetchone()['count']
547
+
548
+ cursor.execute("SELECT COUNT(*) as count FROM names WHERE name_type = 'last'")
549
+ stats['total_last_names'] = cursor.fetchone()['count']
550
+
551
+ cursor.execute("SELECT COUNT(DISTINCT country_code) as count FROM names")
552
+ stats['countries'] = cursor.fetchone()['count']
553
+
554
+ cursor.execute("SELECT COUNT(DISTINCT region) as count FROM names WHERE region IS NOT NULL")
555
+ stats['regions'] = cursor.fetchone()['count']
556
+
557
+ cursor.execute("SELECT COUNT(DISTINCT language) as count FROM names WHERE language IS NOT NULL")
558
+ stats['languages'] = cursor.fetchone()['count']
559
+
560
+ return stats
@@ -0,0 +1,277 @@
1
+ """
2
+ EthniData Predictor - Ana tahmin modülü
3
+ """
4
+
5
+ import sqlite3
6
+ from pathlib import Path
7
+ from typing import Dict, List, Optional, Literal
8
+ from unidecode import unidecode
9
+ import pycountry
10
+
11
+ class EthniData:
12
+ """Ethnicity and Nationality Data predictor"""
13
+
14
+ def __init__(self, db_path: Optional[str] = None):
15
+ """
16
+ Initialize EthniData predictor
17
+
18
+ Args:
19
+ db_path: Path to SQLite database. If None, uses default location.
20
+ """
21
+ if db_path is None:
22
+ db_path = Path(__file__).parent / "ethnidata.db"
23
+
24
+ self.db_path = Path(db_path)
25
+
26
+ if not self.db_path.exists():
27
+ raise FileNotFoundError(
28
+ f"Database not found: {self.db_path}\n"
29
+ f"Please run scripts/6_create_database.py first"
30
+ )
31
+
32
+ self.conn = sqlite3.connect(self.db_path)
33
+ self.conn.row_factory = sqlite3.Row
34
+
35
+ def __del__(self):
36
+ """Close database connection"""
37
+ if hasattr(self, 'conn'):
38
+ self.conn.close()
39
+
40
+ @staticmethod
41
+ def normalize_name(name: str) -> str:
42
+ """Normalize name (lowercase, remove accents)"""
43
+ return unidecode(name.strip().lower())
44
+
45
+ def predict_nationality(
46
+ self,
47
+ name: str,
48
+ name_type: Literal["first", "last"] = "first",
49
+ top_n: int = 5
50
+ ) -> Dict:
51
+ """
52
+ Predict nationality from name
53
+
54
+ Args:
55
+ name: First or last name
56
+ name_type: "first" or "last"
57
+ top_n: Number of top predictions to return
58
+
59
+ Returns:
60
+ {
61
+ 'name': normalized name,
62
+ 'country': top country code (ISO 3166-1 alpha-3),
63
+ 'confidence': confidence score (0-1),
64
+ 'top_countries': [{country, probability, frequency}, ...]
65
+ }
66
+ """
67
+
68
+ normalized = self.normalize_name(name)
69
+
70
+ table = "first_names" if name_type == "first" else "last_names"
71
+
72
+ # Query database
73
+ query = f"""
74
+ SELECT country_code, frequency
75
+ FROM {table}
76
+ WHERE name = ?
77
+ ORDER BY frequency DESC
78
+ LIMIT ?
79
+ """
80
+
81
+ cursor = self.conn.cursor()
82
+ cursor.execute(query, (normalized, top_n))
83
+
84
+ results = cursor.fetchall()
85
+
86
+ if not results:
87
+ return {
88
+ 'name': normalized,
89
+ 'country': None,
90
+ 'confidence': 0.0,
91
+ 'top_countries': []
92
+ }
93
+
94
+ # Calculate probabilities
95
+ total_frequency = sum(row['frequency'] for row in results)
96
+
97
+ top_countries = []
98
+ for row in results:
99
+ prob = row['frequency'] / total_frequency
100
+
101
+ # Country name lookup
102
+ try:
103
+ country = pycountry.countries.get(alpha_3=row['country_code'])
104
+ country_name = country.name if country else row['country_code']
105
+ except:
106
+ country_name = row['country_code']
107
+
108
+ top_countries.append({
109
+ 'country': row['country_code'],
110
+ 'country_name': country_name,
111
+ 'probability': round(prob, 4),
112
+ 'frequency': row['frequency']
113
+ })
114
+
115
+ # Top prediction
116
+ top = top_countries[0]
117
+
118
+ return {
119
+ 'name': normalized,
120
+ 'country': top['country'],
121
+ 'country_name': top['country_name'],
122
+ 'confidence': top['probability'],
123
+ 'top_countries': top_countries
124
+ }
125
+
126
+ def predict_ethnicity(
127
+ self,
128
+ name: str,
129
+ name_type: Literal["first", "last"] = "first"
130
+ ) -> Dict:
131
+ """
132
+ Predict ethnicity from name
133
+
134
+ Args:
135
+ name: First or last name
136
+ name_type: "first" or "last"
137
+
138
+ Returns:
139
+ {
140
+ 'name': normalized name,
141
+ 'ethnicity': predicted ethnicity,
142
+ 'country': most likely country,
143
+ 'confidence': confidence score
144
+ }
145
+ """
146
+
147
+ normalized = self.normalize_name(name)
148
+
149
+ table = "first_names" if name_type == "first" else "last_names"
150
+
151
+ # Query with ethnicity
152
+ query = f"""
153
+ SELECT country_code, ethnicity, frequency
154
+ FROM {table}
155
+ WHERE name = ? AND ethnicity IS NOT NULL
156
+ ORDER BY frequency DESC
157
+ LIMIT 1
158
+ """
159
+
160
+ cursor = self.conn.cursor()
161
+ cursor.execute(query, (normalized,))
162
+
163
+ result = cursor.fetchone()
164
+
165
+ if result:
166
+ # Country name
167
+ try:
168
+ country = pycountry.countries.get(alpha_3=result['country_code'])
169
+ country_name = country.name if country else result['country_code']
170
+ except:
171
+ country_name = result['country_code']
172
+
173
+ return {
174
+ 'name': normalized,
175
+ 'ethnicity': result['ethnicity'],
176
+ 'country': result['country_code'],
177
+ 'country_name': country_name,
178
+ 'frequency': result['frequency']
179
+ }
180
+
181
+ # Fallback to nationality prediction
182
+ nationality = self.predict_nationality(name, name_type, top_n=1)
183
+
184
+ return {
185
+ 'name': normalized,
186
+ 'ethnicity': None,
187
+ 'country': nationality['country'],
188
+ 'country_name': nationality.get('country_name'),
189
+ 'confidence': nationality['confidence']
190
+ }
191
+
192
+ def predict_full_name(
193
+ self,
194
+ first_name: str,
195
+ last_name: str,
196
+ top_n: int = 5
197
+ ) -> Dict:
198
+ """
199
+ Predict nationality from full name (first + last)
200
+
201
+ Combines predictions from both first and last names
202
+
203
+ Args:
204
+ first_name: First name
205
+ last_name: Last name
206
+ top_n: Number of top predictions
207
+
208
+ Returns:
209
+ Combined prediction with country probabilities
210
+ """
211
+
212
+ first_pred = self.predict_nationality(first_name, "first", top_n=top_n)
213
+ last_pred = self.predict_nationality(last_name, "last", top_n=top_n)
214
+
215
+ # Combine probabilities
216
+ combined_scores = {}
217
+
218
+ for item in first_pred['top_countries']:
219
+ combined_scores[item['country']] = item['probability'] * 0.4
220
+
221
+ for item in last_pred['top_countries']:
222
+ if item['country'] in combined_scores:
223
+ combined_scores[item['country']] += item['probability'] * 0.6
224
+ else:
225
+ combined_scores[item['country']] = item['probability'] * 0.6
226
+
227
+ # Sort by combined score
228
+ sorted_countries = sorted(
229
+ combined_scores.items(),
230
+ key=lambda x: x[1],
231
+ reverse=True
232
+ )[:top_n]
233
+
234
+ # Format results
235
+ top_countries = []
236
+ for country_code, score in sorted_countries:
237
+ try:
238
+ country = pycountry.countries.get(alpha_3=country_code)
239
+ country_name = country.name if country else country_code
240
+ except:
241
+ country_name = country_code
242
+
243
+ top_countries.append({
244
+ 'country': country_code,
245
+ 'country_name': country_name,
246
+ 'probability': round(score, 4)
247
+ })
248
+
249
+ return {
250
+ 'first_name': self.normalize_name(first_name),
251
+ 'last_name': self.normalize_name(last_name),
252
+ 'country': top_countries[0]['country'] if top_countries else None,
253
+ 'country_name': top_countries[0]['country_name'] if top_countries else None,
254
+ 'confidence': top_countries[0]['probability'] if top_countries else 0.0,
255
+ 'top_countries': top_countries
256
+ }
257
+
258
+ def get_stats(self) -> Dict:
259
+ """Get database statistics"""
260
+
261
+ cursor = self.conn.cursor()
262
+
263
+ stats = {}
264
+
265
+ cursor.execute("SELECT COUNT(*) as count FROM first_names")
266
+ stats['total_first_names'] = cursor.fetchone()['count']
267
+
268
+ cursor.execute("SELECT COUNT(*) as count FROM last_names")
269
+ stats['total_last_names'] = cursor.fetchone()['count']
270
+
271
+ cursor.execute("SELECT COUNT(DISTINCT country_code) as count FROM first_names")
272
+ stats['countries_first'] = cursor.fetchone()['count']
273
+
274
+ cursor.execute("SELECT COUNT(DISTINCT country_code) as count FROM last_names")
275
+ stats['countries_last'] = cursor.fetchone()['count']
276
+
277
+ return stats
@@ -0,0 +1,257 @@
1
+ Metadata-Version: 2.4
2
+ Name: ethnidata
3
+ Version: 3.1.5
4
+ Summary: Predict nationality, ethnicity, gender, region, language and religion from names - 238 countries, 6 major religions, 5.9M+ names, complete religious coverage
5
+ Author-email: Teyfik OZ <teyfikoz@example.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/teyfikoz/ethnidata
8
+ Project-URL: Documentation, https://github.com/teyfikoz/ethnidata#readme
9
+ Project-URL: Repository, https://github.com/teyfikoz/ethnidata.git
10
+ Project-URL: Issues, https://github.com/teyfikoz/ethnidata/issues
11
+ Keywords: names,nationality,ethnicity,demographics,prediction,NLP
12
+ Classifier: Development Status :: 4 - Beta
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.8
18
+ Classifier: Programming Language :: Python :: 3.9
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.11
21
+ Classifier: Programming Language :: Python :: 3.12
22
+ Requires-Python: >=3.8
23
+ Description-Content-Type: text/markdown
24
+ License-File: LICENSE
25
+ Requires-Dist: pycountry>=22.3.5
26
+ Requires-Dist: unidecode>=1.3.6
27
+ Provides-Extra: dev
28
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
29
+ Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
30
+ Provides-Extra: build
31
+ Requires-Dist: requests>=2.31.0; extra == "build"
32
+ Requires-Dist: pandas>=2.0.0; extra == "build"
33
+ Requires-Dist: numpy>=1.24.0; extra == "build"
34
+ Requires-Dist: beautifulsoup4>=4.12.0; extra == "build"
35
+ Requires-Dist: lxml>=4.9.0; extra == "build"
36
+ Requires-Dist: tqdm>=4.65.0; extra == "build"
37
+ Requires-Dist: wikipedia-api>=0.6.0; extra == "build"
38
+ Requires-Dist: sqlalchemy>=2.0.0; extra == "build"
39
+ Dynamic: license-file
40
+
41
+ # EthniData - Ethnicity and Nationality Prediction
42
+
43
+ [![Python](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
44
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
45
+ [![PyPI version](https://badge.fury.io/py/ethnidata.svg)](https://badge.fury.io/py/ethnidata)
46
+
47
+ Predict **nationality**, **ethnicity**, and **demographics** from names using a comprehensive global database built from multiple authoritative sources.
48
+
49
+ ## 🌟 Features
50
+
51
+ - **190+ Countries** - Comprehensive coverage from Wikipedia/Wikidata
52
+ - **106 Countries** - Enhanced with names-dataset
53
+ - **120 Years** of Olympic athlete names
54
+ - **Multiple Sources** - Phone directories, census data, public records
55
+ - **Fast Predictions** - SQLite-based for instant lookups
56
+ - **Normalized Data** - Unicode-aware, case-insensitive matching
57
+ - **Ethnicity Support** - Where available in source data
58
+ - **Simple API** - Easy to use Python interface
59
+
60
+ ## 📊 Data Sources
61
+
62
+ 1. **Wikipedia/Wikidata** - 190+ countries, biographical data with ethnicity
63
+ 2. **names-dataset** - 106 countries, curated name lists
64
+ 3. **Olympics Dataset** - 120 years of athlete names (271,116 records)
65
+ 4. **Phone Directories** - Public domain name lists from multiple countries
66
+ 5. **Census Data** - US Census and other government open data
67
+
68
+ ## 🚀 Installation
69
+
70
+ ```bash
71
+ pip install ethnidata
72
+ ```
73
+
74
+ ## 📖 Usage
75
+
76
+ ### Basic Usage
77
+
78
+ ```python
79
+ from ethnidata import EthniData
80
+
81
+ # Initialize
82
+ ed = EthniData()
83
+
84
+ # Predict nationality from first name
85
+ result = ed.predict_nationality("Ahmet", name_type="first")
86
+ print(result)
87
+ # {
88
+ # 'name': 'ahmet',
89
+ # 'country': 'TUR',
90
+ # 'country_name': 'Turkey',
91
+ # 'confidence': 0.89,
92
+ # 'top_countries': [
93
+ # {'country': 'TUR', 'country_name': 'Turkey', 'probability': 0.89},
94
+ # {'country': 'DEU', 'country_name': 'Germany', 'probability': 0.07},
95
+ # ...
96
+ # ]
97
+ # }
98
+
99
+ # Predict from last name
100
+ result = ed.predict_nationality("Tanaka", name_type="last")
101
+ print(result['country']) # 'JPN'
102
+
103
+ # Predict from full name (combines both)
104
+ result = ed.predict_full_name("Wei", "Chen")
105
+ print(result['country']) # 'CHN'
106
+
107
+ # Predict ethnicity (when available)
108
+ result = ed.predict_ethnicity("Muhammad", name_type="first")
109
+ print(result)
110
+ # {
111
+ # 'name': 'muhammad',
112
+ # 'ethnicity': 'Arab',
113
+ # 'country': 'SAU',
114
+ # 'country_name': 'Saudi Arabia'
115
+ # }
116
+ ```
117
+
118
+ ### Advanced Usage
119
+
120
+ ```python
121
+ # Get top 10 predictions
122
+ result = ed.predict_nationality("Maria", name_type="first", top_n=10)
123
+
124
+ for country in result['top_countries']:
125
+ print(f"{country['country_name']}: {country['probability']:.2%}")
126
+ # Spain: 35.4%
127
+ # Italy: 28.2%
128
+ # Portugal: 15.1%
129
+ # ...
130
+
131
+ # Database statistics
132
+ stats = ed.get_stats()
133
+ print(stats)
134
+ # {
135
+ # 'total_first_names': 123456,
136
+ # 'total_last_names': 234567,
137
+ # 'countries_first': 195,
138
+ # 'countries_last': 198
139
+ # }
140
+ ```
141
+
142
+ ## 🏗️ Project Structure
143
+
144
+ ```
145
+ ethnidata/
146
+ ├── ethnidata/ # Main package
147
+ │ ├── __init__.py
148
+ │ ├── predictor.py # Core prediction logic
149
+ │ └── ethnidata.db # SQLite database
150
+ ├── scripts/ # Data collection scripts
151
+ │ ├── 1_fetch_names_dataset.py
152
+ │ ├── 2_fetch_wikipedia.py
153
+ │ ├── 3_fetch_olympics.py
154
+ │ ├── 4_fetch_phone_directories.py
155
+ │ ├── 5_merge_all_data.py
156
+ │ └── 6_create_database.py
157
+ ├── tests/ # Unit tests
158
+ ├── examples/ # Example scripts
159
+ ├── docs/ # Documentation
160
+ ├── setup.py
161
+ ├── pyproject.toml
162
+ └── README.md
163
+ ```
164
+
165
+ ## 🔬 Accuracy & Methodology
166
+
167
+ ### How it works
168
+
169
+ 1. **Name Normalization**: Names are lowercased and Unicode-normalized (e.g., "José" → "jose")
170
+ 2. **Database Lookup**: Queries SQLite database for matching names
171
+ 3. **Frequency-Based Scoring**: Countries are ranked by how often the name appears
172
+ 4. **Probability Calculation**: Frequencies are converted to probabilities
173
+ 5. **Full Name Combination**: First name (40%) + last name (60%) weights
174
+
175
+ ### Limitations
176
+
177
+ - **Bias**: Database reflects historical Olympic participation, Wikipedia coverage
178
+ - **Missing Names**: Rare or new names may not be in database
179
+ - **Ethnicity**: Only available where source data included it
180
+ - **Migration**: Doesn't account for diaspora or modern migration patterns
181
+ - **Multiple Origins**: Common names (e.g., "Ali", "Maria") exist in many cultures
182
+
183
+ ## 🛠️ Development
184
+
185
+ ### Build Database from Scratch
186
+
187
+ ```bash
188
+ git clone https://github.com/teyfikoz/ethnidata.git
189
+ cd ethnidata
190
+
191
+ # Install dependencies
192
+ pip install -r requirements.txt
193
+
194
+ # Fetch all data (takes 10-30 minutes)
195
+ cd scripts
196
+ python 1_fetch_names_dataset.py
197
+ python 2_fetch_wikipedia.py
198
+ python 3_fetch_olympics.py
199
+ python 4_fetch_phone_directories.py
200
+ python 5_merge_all_data.py
201
+ python 6_create_database.py
202
+ ```
203
+
204
+ ### Run Tests
205
+
206
+ ```bash
207
+ pip install -e ".[dev]"
208
+ pytest tests/ -v
209
+ ```
210
+
211
+ ## 📜 License
212
+
213
+ MIT License - see [LICENSE](LICENSE) file for details
214
+
215
+ ## 🤝 Contributing
216
+
217
+ Contributions welcome! Please:
218
+
219
+ 1. Fork the repository
220
+ 2. Create a feature branch
221
+ 3. Commit your changes
222
+ 4. Push to the branch
223
+ 5. Open a Pull Request
224
+
225
+ ## 📚 Citations
226
+
227
+ If you use this database in research, please cite:
228
+
229
+ ```bibtex
230
+ @software{ethnidata_2024,
231
+ title = {EthniData: Ethnicity and Nationality Prediction from Names},
232
+ author = {Oz, Teyfik},
233
+ year = {2024},
234
+ url = {https://github.com/teyfikoz/ethnidata}
235
+ }
236
+ ```
237
+
238
+ ### Data Source Citations
239
+
240
+ - **Olympics Data**: Randi Griffin (2018). 120 years of Olympic history. [Kaggle](https://www.kaggle.com/datasets/heesoo37/120-years-of-olympic-history-athletes-and-results)
241
+ - **names-dataset**: Philippe Remy (2021). [name-dataset](https://github.com/philipperemy/name-dataset)
242
+ - **Wikidata**: Wikimedia Foundation. [Wikidata](https://www.wikidata.org)
243
+
244
+ ## 🔗 Related Projects
245
+
246
+ - [ethnicolr](https://github.com/appeler/ethnicolr) - Ethnicity prediction using LSTM
247
+ - [name-dataset](https://github.com/philipperemy/name-dataset) - Name database (106 countries)
248
+ - [gender-guesser](https://github.com/lead-ratings/gender-guesser) - Gender prediction
249
+
250
+ ## 📧 Contact
251
+
252
+ - GitHub Issues: [Report bugs or request features](https://github.com/teyfikoz/ethnidata/issues)
253
+ - GitHub: [@teyfikoz](https://github.com/teyfikoz)
254
+
255
+ ---
256
+
257
+ **Built with ❤️ using open data**
@@ -0,0 +1,10 @@
1
+ ethnidata/__init__.py,sha256=xeg_6_eiFa05LNQKzUtQ1F1EHhQbGRrBSkq6lLuZ1CY,1858
2
+ ethnidata/downloader.py,sha256=GNohBtHyn_14TuWPhRUMxGNHy0UieXzwFCC5z-oiVQs,5057
3
+ ethnidata/ethnidata.db,sha256=edDXYMOoNNtVprbYGqQ7qPgdn2U7e6Y1PPyocBADVOs,78405632
4
+ ethnidata/predictor.py,sha256=fmmLSVpluMpBeKxABAwT7OVIaY4c_H5TksiZu7dETEQ,17687
5
+ ethnidata/predictor_old.py,sha256=dGmfYWTO2BRYxQUzzE7foZMEEDaOd6VWPZk4ib5Gp9E,8696
6
+ ethnidata-3.1.5.dist-info/licenses/LICENSE,sha256=p5pRNvuSoG_JxH4Xy11FK2iXc3hyAnzOKUx9gBltulk,1095
7
+ ethnidata-3.1.5.dist-info/METADATA,sha256=WEsx8FKprwzqhA2GmYGN7i52FzTCpqs-lTZqMV2040M,8182
8
+ ethnidata-3.1.5.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
9
+ ethnidata-3.1.5.dist-info/top_level.txt,sha256=V5Cuyv_Ib3mDSp2KL8MocXzLyp3o3r2FG01rFA7Iatk,10
10
+ ethnidata-3.1.5.dist-info/RECORD,,
@@ -0,0 +1,5 @@
1
+ Wheel-Version: 1.0
2
+ Generator: setuptools (80.9.0)
3
+ Root-Is-Purelib: true
4
+ Tag: py3-none-any
5
+
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2024 NBD Database Team
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1 @@
1
+ ethnidata