renard-pipeline 0.4.1__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of renard-pipeline might be problematic. Click here for more details.

@@ -159,6 +159,7 @@ class GraphRulesCharacterUnifier(PipelineStep):
159
159
  min_appearances: int = 0,
160
160
  additional_hypocorisms: Optional[List[Tuple[str, List[str]]]] = None,
161
161
  link_corefs_mentions: bool = False,
162
+ ignore_lone_titles: Optional[Set[str]] = None,
162
163
  ) -> None:
163
164
  """
164
165
  :param min_appearances: minimum number of appearances of a
@@ -173,10 +174,14 @@ class GraphRulesCharacterUnifier(PipelineStep):
173
174
  extract a lot of spurious links. However, linking by
174
175
  coref is sometimes the only way to resolve a character
175
176
  alias.
177
+ :param ignore_lone_titles: a set of titles to ignore when
178
+ they stand on their own. This avoids extracting false
179
+ positives characters such as 'Mr.' or 'Miss'.
176
180
  """
177
181
  self.min_appearances = min_appearances
178
182
  self.additional_hypocorisms = additional_hypocorisms
179
183
  self.link_corefs_mentions = link_corefs_mentions
184
+ self.ignore_lone_titles = ignore_lone_titles or set()
180
185
 
181
186
  super().__init__()
182
187
 
@@ -197,11 +202,16 @@ class GraphRulesCharacterUnifier(PipelineStep):
197
202
  import networkx as nx
198
203
 
199
204
  mentions = [m for m in entities if m.tag == "PER"]
200
- mentions_str = [" ".join(m.tokens) for m in mentions]
205
+ mentions_str = set(
206
+ filter(
207
+ lambda m: not m in self.ignore_lone_titles,
208
+ map(lambda m: " ".join(m.tokens), mentions),
209
+ )
210
+ )
201
211
 
202
212
  # * create a graph where each node is a mention detected by NER
203
213
  G = nx.Graph()
204
- for mention_str in set(mentions_str):
214
+ for mention_str in mentions_str:
205
215
  G.add_node(mention_str)
206
216
 
207
217
  # * HumanName local configuration - dependant on language
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: renard-pipeline
3
- Version: 0.4.1
3
+ Version: 0.4.2
4
4
  Summary: Relationships Extraction from NARrative Documents
5
5
  Home-page: https://github.com/CompNet/Renard
6
6
  License: GPL-3.0-only
@@ -3,7 +3,7 @@ renard/graph_utils.py,sha256=5jwky9JgJ-WMVHfeaiXkAAQwEfhR2BFSrWhck1Qmpgo,5812
3
3
  renard/ner_utils.py,sha256=jN1AQkaV0Kx-Bc0oc3SYBEmSUuKPBbzXqByOlaqH62k,11263
4
4
  renard/nltk_utils.py,sha256=mUJiwMrEDZV4Fla7WuMR-hA_OC2ZIwSXgW_0Ew18VSo,977
5
5
  renard/pipeline/__init__.py,sha256=8Yim2mmny8YGvM7N5-na5zK-C9UDxUb77K9ml-VirUA,35
6
- renard/pipeline/character_unification.py,sha256=GcnC8UYqn1RBOGVhYS9LVcTNqpxm9YoT-lPsE3vodek,14818
6
+ renard/pipeline/character_unification.py,sha256=GJvPKw2zSMi0RpLLVlKsu7ewpxkrdxytND9PLxolbP4,15252
7
7
  renard/pipeline/characters_extraction.py,sha256=NzF8H9X19diW6rqwS5ERrRku7rFueO3S077H5C6kb7I,363
8
8
  renard/pipeline/core.py,sha256=luKNUTCDtZfwKzxVIaImyIMwFFvIknfT1LdQtongj24,22570
9
9
  renard/pipeline/corefs/__init__.py,sha256=9c9AaXBcRrDBf1jhTtJ7DyjOJhX_Zej3FjlcGak7MK8,44
@@ -29,7 +29,7 @@ renard/resources/pronouns/pronouns.py,sha256=YJ8hM6H8QHrF2Xx6O5blqc-Sqe1D1YFL0sR
29
29
  renard/resources/titles/__init__.py,sha256=Jcg4B7stsWiAaXbFgNl_L3ICtCQmFe9bo3YjdkVL50w,45
30
30
  renard/resources/titles/titles.py,sha256=GsFccVJuTkgDWiAqWZpFd2R9pGvFKQZBOk4RWWuWDkw,968
31
31
  renard/utils.py,sha256=8J3swFqSi4YqhgYNXvttJ0s-DmJbl_yEYri6JpGEWH8,2340
32
- renard_pipeline-0.4.1.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
33
- renard_pipeline-0.4.1.dist-info/METADATA,sha256=KgpnPAR6BtLS4RNjsxIBWqUygUcoRdJfkqHigzZMSqU,3697
34
- renard_pipeline-0.4.1.dist-info/WHEEL,sha256=7Z8_27uaHI_UZAc4Uox4PpBhQ9Y5_modZXWMxtUi4NU,88
35
- renard_pipeline-0.4.1.dist-info/RECORD,,
32
+ renard_pipeline-0.4.2.dist-info/LICENSE,sha256=OXLcl0T2SZ8Pmy2_dmlvKuetivmyPd5m1q-Gyd-zaYY,35149
33
+ renard_pipeline-0.4.2.dist-info/METADATA,sha256=R1ZbG6Mdk1B5Zk73QSKB-lZu7rDnvWKe3M5JiDqPFxM,3697
34
+ renard_pipeline-0.4.2.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
35
+ renard_pipeline-0.4.2.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.5.2
2
+ Generator: poetry-core 1.9.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any