nomenklatura-mpt 4.1.12__tar.gz → 4.1.14__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/PKG-INFO +3 -23
  2. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/logic_v2/names/match.py +63 -4
  3. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/logic_v4/model.py +7 -14
  4. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/pyproject.toml +4 -4
  5. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/.gitignore +0 -0
  6. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/LICENSE +0 -0
  7. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/README.md +0 -0
  8. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/__init__.py +0 -0
  9. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/cache.py +0 -0
  10. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/cli.py +0 -0
  11. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/conflicting_match.py +0 -0
  12. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/data/er-unstable.pkl +0 -0
  13. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/data/regression-v1.pkl +0 -0
  14. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/data/svm-v1.pkl +0 -0
  15. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/db.py +0 -0
  16. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/delta.py +0 -0
  17. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/enrich/__init__.py +0 -0
  18. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/enrich/aleph.py +0 -0
  19. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/enrich/common.py +0 -0
  20. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/enrich/nominatim.py +0 -0
  21. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/enrich/opencorporates.py +0 -0
  22. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/enrich/openfigi.py +0 -0
  23. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/enrich/permid.py +0 -0
  24. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/enrich/wikidata.py +0 -0
  25. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/enrich/yente.py +0 -0
  26. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/exceptions.py +0 -0
  27. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/index/__init__.py +0 -0
  28. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/index/common.py +0 -0
  29. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/index/entry.py +0 -0
  30. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/index/index.py +0 -0
  31. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/index/tokenizer.py +0 -0
  32. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/judgement.py +0 -0
  33. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/kv.py +0 -0
  34. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/__init__.py +0 -0
  35. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/bench.py +0 -0
  36. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/compare/__init__.py +0 -0
  37. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/compare/addresses.py +0 -0
  38. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/compare/countries.py +0 -0
  39. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/compare/dates.py +0 -0
  40. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/compare/gender.py +0 -0
  41. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/compare/identifiers.py +0 -0
  42. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/compare/names.py +0 -0
  43. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/compare/util.py +0 -0
  44. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/compat.py +0 -0
  45. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/erun/__init__.py +0 -0
  46. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/erun/countries.py +0 -0
  47. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/erun/identifiers.py +0 -0
  48. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/erun/misc.py +0 -0
  49. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/erun/model.py +0 -0
  50. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/erun/names.py +0 -0
  51. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/erun/train.py +0 -0
  52. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/erun/util.py +0 -0
  53. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/logic_v1/__init__.py +0 -0
  54. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/logic_v1/identifiers.py +0 -0
  55. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/logic_v1/model.py +0 -0
  56. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/logic_v1/multi.py +0 -0
  57. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/logic_v1/phonetic.py +0 -0
  58. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/logic_v2/__init__.py +0 -0
  59. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/logic_v2/identifiers.py +0 -0
  60. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/logic_v2/model.py +0 -0
  61. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/logic_v2/names/__init__.py +0 -0
  62. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/logic_v2/names/analysis.py +0 -0
  63. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/logic_v2/names/distance.py +0 -0
  64. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/logic_v2/names/magic.py +0 -0
  65. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/logic_v2/names/pairing.py +0 -0
  66. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/logic_v2/names/util.py +0 -0
  67. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/logic_v3/__init__.py +0 -0
  68. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/logic_v3/identifiers.py +0 -0
  69. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/logic_v3/model.py +0 -0
  70. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/logic_v3/multi.py +0 -0
  71. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/logic_v3/phonetic.py +0 -0
  72. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/logic_v4/__init__.py +0 -0
  73. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/logic_v4/identifiers.py +0 -0
  74. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/logic_v4/multi.py +0 -0
  75. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/logic_v4/phonetic.py +0 -0
  76. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/name_based/__init__.py +0 -0
  77. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/name_based/misc.py +0 -0
  78. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/name_based/model.py +0 -0
  79. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/name_based/names.py +0 -0
  80. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/pairs.py +0 -0
  81. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/regression_v1/__init__.py +0 -0
  82. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/regression_v1/misc.py +0 -0
  83. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/regression_v1/model.py +0 -0
  84. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/regression_v1/names.py +0 -0
  85. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/regression_v1/train.py +0 -0
  86. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/regression_v1/util.py +0 -0
  87. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/svm_v1/__init__.py +0 -0
  88. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/svm_v1/misc.py +0 -0
  89. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/svm_v1/model.py +0 -0
  90. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/svm_v1/names.py +0 -0
  91. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/svm_v1/train.py +0 -0
  92. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/svm_v1/util.py +0 -0
  93. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/types.py +0 -0
  94. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/matching/util.py +0 -0
  95. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/publish/__init__.py +0 -0
  96. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/publish/dates.py +0 -0
  97. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/publish/edges.py +0 -0
  98. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/py.typed +0 -0
  99. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/resolver/__init__.py +0 -0
  100. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/resolver/common.py +0 -0
  101. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/resolver/edge.py +0 -0
  102. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/resolver/identifier.py +0 -0
  103. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/resolver/linker.py +0 -0
  104. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/resolver/resolver.py +0 -0
  105. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/settings.py +0 -0
  106. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/store/__init__.py +0 -0
  107. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/store/base.py +0 -0
  108. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/store/level.py +0 -0
  109. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/store/memory.py +0 -0
  110. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/store/redis_.py +0 -0
  111. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/store/sql.py +0 -0
  112. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/store/util.py +0 -0
  113. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/store/versioned.py +0 -0
  114. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/tui/__init__.py +0 -0
  115. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/tui/app.py +0 -0
  116. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/tui/app.tcss +0 -0
  117. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/tui/comparison.py +0 -0
  118. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/tui/util.py +0 -0
  119. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/util.py +0 -0
  120. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/versions.py +0 -0
  121. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/wikidata/__init__.py +0 -0
  122. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/wikidata/client.py +0 -0
  123. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/wikidata/lang.py +0 -0
  124. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/wikidata/model.py +0 -0
  125. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/wikidata/props.py +0 -0
  126. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/wikidata/qualified.py +0 -0
  127. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/wikidata/query.py +0 -0
  128. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/wikidata/value.py +0 -0
  129. {nomenklatura_mpt-4.1.12 → nomenklatura_mpt-4.1.14}/nomenklatura/xref.py +0 -0
@@ -1,39 +1,19 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: nomenklatura_mpt
3
- Version: 4.1.12
3
+ Version: 4.1.14
4
4
  Summary: Make record linkages in followthemoney data.
5
5
  Project-URL: Documentation, https://github.com/opensanctions/nomenklatura/
6
6
  Project-URL: Repository, https://github.com/opensanctions/nomenklatura.git
7
7
  Project-URL: Issues, https://github.com/opensanctions/nomenklatura/issues
8
8
  Author-email: OpenSanctions <info@opensanctions.org>
9
- License: Copyright (c) 2013-2022, Friedrich Lindenberg
10
- Copyright (c) 2023-2025, OpenSanctions Datenbanken GmbH
11
-
12
- Permission is hereby granted, free of charge, to any person obtaining a
13
- copy of this software and associated documentation files (the
14
- "Software"), to deal in the Software without restriction, including
15
- without limitation the rights to use, copy, modify, merge, publish,
16
- distribute, sublicense, and/or sell copies of the Software, and to
17
- permit persons to whom the Software is furnished to do so, subject to
18
- the following conditions:
19
-
20
- The above copyright notice and this permission notice shall be included
21
- in all copies or substantial portions of the Software.
22
-
23
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
24
- OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25
- MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
26
- IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
27
- CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
28
- TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
29
- SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
9
+ License: MIT
30
10
  License-File: LICENSE
31
11
  Classifier: Intended Audience :: Developers
32
12
  Classifier: License :: OSI Approved :: MIT License
33
13
  Classifier: Operating System :: OS Independent
34
14
  Classifier: Programming Language :: Python :: 3.11
35
15
  Classifier: Programming Language :: Python :: 3.12
36
- Requires-Python: >=3.9
16
+ Requires-Python: <3.14,>=3.11
37
17
  Requires-Dist: click
38
18
  Requires-Dist: fingerprints
39
19
  Requires-Dist: followthemoney
@@ -5,7 +5,7 @@ from rigour.names import remove_obj_prefixes
5
5
  from followthemoney.proxy import E, EntityProxy
6
6
  from followthemoney import model
7
7
  from followthemoney.types import registry
8
- from followthemoney.names import schema_type_tag
8
+ from rigour.names import schema_type_tag
9
9
 
10
10
  from nomenklatura.matching.logic_v2.names.analysis import entity_names
11
11
  from nomenklatura.matching.logic_v2.names.magic import weight_extra_match
@@ -198,15 +198,74 @@ def name_match(query: E, result: E, config: ScoringConfig) -> FtResult:
198
198
 
199
199
 
200
200
  def name_match_levenshtein(query: E, result: E, config: ScoringConfig) -> FtResult:
201
- """Match two entities by analyzing and comparing their names."""
201
+ """Match two entities using Levenshtein distance on their normalized names.
202
+
203
+ This function compares entities by computing the Levenshtein distance between
204
+ all pairs of their names and returns the best match score.
205
+
206
+ Args:
207
+ query: The query entity to match
208
+ result: The result entity to match against
209
+ config: Scoring configuration parameters
210
+
211
+ Returns:
212
+ FtResult with score (0.0-1.0) and detail string explaining the match
213
+ """
202
214
  schema = model.common_schema(query.schema, result.schema)
203
215
  type_tag = schema_type_tag(schema)
204
216
  best = FtResult(score=0.0, detail=None)
217
+
205
218
  if type_tag == NameTypeTag.UNK:
206
219
  # Name matching is not supported for entities that are not listed
207
220
  # as a person, organization, or a thing.
208
221
  best.detail = "Unsuited for name matching: %s" % schema.name
209
222
  return best
210
-
211
- return match_object_names(query, result, config)
223
+
224
+ # For object types (vessels, assets, etc.), use the existing object name matching
225
+ if type_tag == NameTypeTag.OBJ:
226
+ return match_object_names(query, result, config)
227
+
228
+ # Get all names for both entities
229
+ query_names = entity_names(type_tag, query, is_query=True)
230
+ result_names = entity_names(type_tag, result)
231
+
232
+ if not query_names or not result_names:
233
+ best.detail = "No names available for matching"
234
+ return best
235
+
236
+ # Check for literal matches first (early return for efficiency)
237
+ query_comparable = {name.comparable: name for name in query_names}
238
+ result_comparable = {name.comparable: name for name in result_names}
239
+ common = set(query_comparable).intersection(result_comparable)
240
+ if len(common) > 0:
241
+ longest = max(common, key=len)
242
+ best.detail = f"[{longest!r} literalMatch]"
243
+ return FtResult(score=1.0, detail=best.detail)
244
+
245
+ # Consolidate names (remove short names contained in longer names)
246
+ query_names = Name.consolidate_names(query_names)
247
+ result_names = Name.consolidate_names(result_names)
248
+
249
+ # Compare all pairs of names using Levenshtein distance
250
+ for query_name in query_names:
251
+ for result_name in result_names:
252
+ # Get the comparable forms (normalized strings)
253
+ query_str = query_name.comparable
254
+ result_str = result_name.comparable
255
+
256
+ # Use strict_levenshtein which already implements a good scoring mechanism
257
+ # max_rate=4 means we allow up to len/4 edits
258
+ score = strict_levenshtein(query_str, result_str, max_rate=4)
259
+
260
+ if score > best.score:
261
+ best.score = score
262
+ if score == 1.0:
263
+ best.detail = f"[{query_str!r}≡{result_str!r}, exactMatch]"
264
+ else:
265
+ best.detail = f"[{query_str!r}≈{result_str!r}, levenshteinScore: {score:.4f}]"
266
+
267
+ if best.detail is None:
268
+ best.detail = "No sufficient Levenshtein match found"
269
+
270
+ return best
212
271
 
@@ -1,7 +1,7 @@
1
1
  from typing import Dict, List
2
2
 
3
- from nomenklatura.matching.logic_v1.phonetic import name_soundex_match, person_name_phonetic_match
4
- from nomenklatura.matching.logic_v4.phonetic import name_metaphone_match
3
+ from nomenklatura.matching.logic_v1.identifiers import orgid_disjoint
4
+ from nomenklatura.matching.logic_v3.multi import numbers_mismatch
5
5
  from nomenklatura.matching.types import Feature, FtResult, HeuristicAlgorithm
6
6
  from nomenklatura.matching.types import ConfigVar, ConfigVarType
7
7
  from nomenklatura.matching.compare.countries import country_mismatch
@@ -9,17 +9,14 @@ from nomenklatura.matching.compare.gender import gender_mismatch
9
9
  from nomenklatura.matching.compare.identifiers import crypto_wallet_address
10
10
  from nomenklatura.matching.compare.identifiers import identifier_match
11
11
  from nomenklatura.matching.compare.dates import dob_day_disjoint, dob_year_disjoint
12
- from nomenklatura.matching.compare.names import weak_alias_match
12
+ from nomenklatura.matching.compare.names import last_name_mismatch, weak_alias_match
13
13
  from nomenklatura.matching.compare.addresses import address_entity_match
14
- from nomenklatura.matching.compare.addresses import address_prop_match
15
14
  from nomenklatura.matching.logic_v2.names.match import name_match_levenshtein
16
15
  from nomenklatura.matching.logic_v2.identifiers import bic_code_match
17
16
  from nomenklatura.matching.logic_v2.identifiers import inn_code_match, ogrn_code_match
18
17
  from nomenklatura.matching.logic_v2.identifiers import isin_security_match
19
18
  from nomenklatura.matching.logic_v2.identifiers import lei_code_match
20
19
  from nomenklatura.matching.logic_v2.identifiers import vessel_imo_mmsi_match
21
- from nomenklatura.matching.logic_v2.identifiers import uei_code_match
22
- from nomenklatura.matching.logic_v2.identifiers import npi_code_match
23
20
  from nomenklatura.matching.util import FNUL
24
21
 
25
22
 
@@ -34,10 +31,6 @@ class LogicV4(HeuristicAlgorithm):
34
31
  NAME = "logic-v4"
35
32
  features = [
36
33
  Feature(func=name_match_levenshtein, weight=1.0),
37
- Feature(func=FtResult.wrap(person_name_phonetic_match), weight=0.9),
38
- # These are there so they can be enabled using custom weights:
39
- Feature(func=FtResult.wrap(name_metaphone_match), weight=FNUL),
40
- Feature(func=FtResult.wrap(name_soundex_match), weight=FNUL),
41
34
  Feature(func=address_entity_match, weight=0.98),
42
35
  Feature(func=crypto_wallet_address, weight=0.98),
43
36
  Feature(func=isin_security_match, weight=0.98),
@@ -46,15 +39,15 @@ class LogicV4(HeuristicAlgorithm):
46
39
  Feature(func=vessel_imo_mmsi_match, weight=0.95),
47
40
  Feature(func=inn_code_match, weight=0.95),
48
41
  Feature(func=bic_code_match, weight=0.95),
49
- Feature(func=uei_code_match, weight=0.95),
50
- Feature(func=npi_code_match, weight=0.95),
51
42
  Feature(func=identifier_match, weight=0.85),
52
43
  Feature(func=weak_alias_match, weight=0.8),
53
- Feature(func=address_prop_match, weight=0.2, qualifier=True),
54
44
  Feature(func=country_mismatch, weight=-0.2, qualifier=True),
45
+ Feature(func=FtResult.wrap(last_name_mismatch), weight=-0.2, qualifier=True),
55
46
  Feature(func=dob_year_disjoint, weight=-0.15, qualifier=True),
56
- Feature(func=dob_day_disjoint, weight=-0.25, qualifier=True),
47
+ Feature(func=dob_day_disjoint, weight=-0.2, qualifier=True),
57
48
  Feature(func=gender_mismatch, weight=-0.2, qualifier=True),
49
+ Feature(func=orgid_disjoint, weight=-0.2, qualifier=True),
50
+ Feature(func=numbers_mismatch, weight=-0.1, qualifier=True),
58
51
  ]
59
52
  CONFIG = {
60
53
  "nm_number_mismatch": ConfigVar(
@@ -4,10 +4,10 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "nomenklatura_mpt"
7
- version = "4.1.12"
7
+ version = "4.1.14"
8
8
  description = "Make record linkages in followthemoney data."
9
9
  readme = "README.md"
10
- license = { file = "LICENSE" }
10
+ license = { text = "MIT" }
11
11
  authors = [{ name = "OpenSanctions", email = "info@opensanctions.org" }]
12
12
  classifiers = [
13
13
  "Intended Audience :: Developers",
@@ -16,7 +16,7 @@ classifiers = [
16
16
  "Programming Language :: Python :: 3.11",
17
17
  "Programming Language :: Python :: 3.12",
18
18
  ]
19
- requires-python = ">= 3.9"
19
+ requires-python = ">=3.11,<3.14"
20
20
  dependencies = [
21
21
  "followthemoney",
22
22
  "rigour",
@@ -66,7 +66,7 @@ leveldb = ["plyvel < 2.0.0"]
66
66
  redis = ["redis > 5.0.0, < 7.0.0"]
67
67
 
68
68
  [tool.hatch.build.targets.sdist]
69
- only-include = ["nomenklatura", "LICENSE", "README.md"]
69
+ only-include = ["nomenklatura", "README.md"]
70
70
 
71
71
  [tool.hatch.build.targets.wheel]
72
72
  packages = ["nomenklatura"]